In [None]:
%%capture
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-8b-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth 2024.8: Fast Llama patching. Transformers = 4.43.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.26.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/172 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/464 [00:00<?, ?B/s]

In [None]:
from google.colab import drive
import pandas as pd

drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import zipfile
zip_path = '/content/drive/MyDrive/blog.zip'
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall('/content/extracted_files')

In [None]:
csv_path = '/content/extracted_files/blog.csv'
blog = pd.read_csv(csv_path)

In [None]:
blog.shape

(200140, 5)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

Unsloth: Already have LoRA adapters! We shall skip this step.


In [None]:
blog_prompt = """Write a comprehensive and lengthy blog with more than 1000 words about the title. Cover the following aspects:
- Key features
- Latest technologies involved

### Title:
{}

### Blog:
{}"""

def format_input_prompt(examples):
    titles = examples['blog_title']
    blogs = examples['blog_content']

    texts = []
    for title, blog in zip(titles, blogs):
        text = blog_prompt.format(title, blog)
        texts.append(text)

    return {"text": texts}

from datasets import load_dataset

# Load the custom dataset
df = load_dataset('csv', data_files={'train': '/content/extracted_files/blog.csv'})

# Check the column names
print(df['train'].column_names)

# Apply the formatting function to the dataset
formatted_dataset = df.map(format_input_prompt, batched=True)

# Check the first few rows of the formatted dataset
print(formatted_dataset)

['ratings', 'blog_title', 'blog_content', 'topic', 'text']


Map:   0%|          | 0/200140 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['ratings', 'blog_title', 'blog_content', 'topic', 'text'],
        num_rows: 200140
    })
})


In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = formatted_dataset['train'],
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

Map (num_proc=2):   0%|          | 0/200140 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [None]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 200,140 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 60
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
1,2.0806
2,2.0731
3,2.0292
4,2.423
5,2.1902
6,2.0697
7,1.9149
8,2.1232
9,2.1468
10,2.4636


In [None]:
trainer_stats

TrainOutput(global_step=60, training_loss=2.243802450100581, metrics={'train_runtime': 205.9117, 'train_samples_per_second': 2.331, 'train_steps_per_second': 0.291, 'total_flos': 2695952459022336.0, 'train_loss': 2.243802450100581, 'epoch': 0.0023983211751773758})

In [None]:
FastLanguageModel.for_inference(model)

blog_prompt = """Write a comprehensive and lengthy blog about the title. Cover the following aspects:
- Key features
- Latest technologies involved

### Title:
{}

### Blog:
"""

inputs = tokenizer([blog_prompt], return_tensors='pt').to('cuda')

outputs = model.generate(
    **inputs,
    max_length=5000,
    min_length=2000,
    repetition_penalty=1.2,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.pad_token_id,
    no_repeat_ngram_size=3,
    early_stopping=True,
    num_return_sequences=1
)

inputs = tokenizer([blog_prompt.format("Sony Headphones", "")], return_tensors='pt').to('cuda')

outputs = model.generate(**inputs, max_new_tokens=1000, repetition_penalty=1.2, eos_token_id=tokenizer.eos_token_id)

response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

print(response)

Write a comprehensive and lengthy blog about the title. Cover the following aspects:
- Key features
- Latest technologies involved

### Title:
Sony Headphones

### Blog:
The Sony WH1000XM5 is one of the best noise-canceling headphones on the market, but they’re not perfect. Here’s what we like — and don’t — about them. The Sony XM5s are some of our favorite wireless over-the-ear headphones you can buy right now… if you have $350 to spend. We’ve been using these cans for several months as part of an ongoing review process that began with their predecessor,…


In [None]:
model.push_to_hub("Darrinbright/Llama3FineTunedBlog", token = "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")
tokenizer.push_to_hub("Darrinbright/Llama3FineTunedBlog", token = "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")

README.md:   0%|          | 0.00/579 [00:00<?, ?B/s]

  0%|          | 0/1 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/168M [00:00<?, ?B/s]

Saved model to https://huggingface.co/Darrinbright/Llama3FineTunedBlog


In [None]:
import shutil
from google.colab import files

model.save_pretrained("Llama3FineTunedBlog")
tokenizer.save_pretrained("Llama3FineTunedBlog")

shutil.make_archive("Llama3FineTunedBlog", 'zip', 'Llama3FineTunedBlog')

files.download("Llama3FineTunedBlog.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
if False: #Now if you want to load the LoRA adapters we just saved for inference, set True
    from unsloth import FastLanguageModel
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "Llama3FineTunedBlog",
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
    )
    FastLanguageModel.for_inference(model)

blog_prompt = """Below is a title that describes the topic of a blog. Write a comprehensive and informative blog on the given title.

### Title:
{}

### Blog:
{}"""

inputs = tokenizer([blog_prompt.format("Samsung wireless earphones", "")], return_tensors='pt').to('cuda')
outputs = model.generate(**inputs, max_new_tokens=100)
response = tokenizer.batch_decode(outputs)[0]
print(response)

<|begin_of_text|>Below is a title that describes the topic of a blog. Write a comprehensive and informative blog on the given title.

### Title:
Samsung wireless earphones

### Blog:
Samsung has been in the earphone industry for quite some time now. Their earphones are known for their durability and sound quality. The Samsung Galaxy Buds Pro are the latest addition to the Samsung earphone family. The earbuds have been designed to provide a comfortable and secure fit. The earbuds…<|end_of_text|>
