<a href="https://colab.research.google.com/github/iababio/LLMs/blob/main/Alpaca_%2B_TinyLlama.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%capture
import torch
major_version, minor_version = torch.cuda.get_device_capability()
if major_version >= 8:
    # Use this for new GPUs like Ampere, Hopper GPUs (RTX 30xx, RTX 40xx, A100, H100, L40)
    !pip install "unsloth[colab_ampere] @ git+https://github.com/unslothai/unsloth.git"
else:
    # Use this for older GPUs (V100, Tesla T4, RTX 20xx)
    !pip install "unsloth[colab] @ git+https://github.com/unslothai/unsloth.git"
pass

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 4096 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/mistral-7b-bnb-4bit",
    "unsloth/mistral-7b-instruct-v0.2-bnb-4bit",
    "unsloth/llama-2-7b-bnb-4bit",
    "unsloth/llama-2-13b-bnb-4bit",
    "unsloth/codellama-34b-bnb-4bit",
    "unsloth/tinyllama-bnb-4bit",
    "unsloth/llama-2-7b-bnb-4bit",
    "unsloth/gemma-7b-bnb-4bit",
    "unsloth/gemma-7b-it-bnb-4bit", # Instruct version of Gemma 7b
    "unsloth/gemma-2b-bnb-4bit",
    "unsloth/gemma-2b-it-bnb-4bit", # Instruct version of Gemma 2b
] # More models at https://huggingface.co/unsloth

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/gemma-7b-it-bnb-4bit", # Instruct version of Gemma 7b
    max_seq_length = max_seq_length,
    dtype = dtype,
    device_map="auto",
    load_in_4bit = load_in_4bit,
    token = "",
)

ModuleNotFoundError: No module named 'unsloth'

In [None]:
from datasets import Dataset, load_dataset
dataset = load_dataset("go_emotions", "simplified")
dataset

Downloading readme:   0%|          | 0.00/9.40k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/2.77M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/350k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/347k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/43410 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/5426 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/5427 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 43410
    })
    validation: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 5426
    })
    test: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 5427
    })
})

In [None]:
from datasets import DatasetDict, concatenate_datasets
import json
import re
from pprint import pprint

In [None]:
test_testvalid = dataset['test'].train_test_split(test_size=0.4)

dataset_T = DatasetDict({
    'test': test_testvalid['test']
})

In [None]:
labels = dataset["train"].features["labels"].feature.names
emotion = {i: l for i, l in enumerate(labels)}
emotion

{0: 'admiration',
 1: 'amusement',
 2: 'anger',
 3: 'annoyance',
 4: 'approval',
 5: 'caring',
 6: 'confusion',
 7: 'curiosity',
 8: 'desire',
 9: 'disappointment',
 10: 'disapproval',
 11: 'disgust',
 12: 'embarrassment',
 13: 'excitement',
 14: 'fear',
 15: 'gratitude',
 16: 'grief',
 17: 'joy',
 18: 'love',
 19: 'nervousness',
 20: 'optimism',
 21: 'pride',
 22: 'realization',
 23: 'relief',
 24: 'remorse',
 25: 'sadness',
 26: 'surprise',
 27: 'neutral'}

In [None]:
dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 43410
    })
    validation: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 5426
    })
    test: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 5427
    })
})

In [None]:
alpaca_prompt = """Below is a conversation between a human and an AI agent. write a response based on the input.

### Instruction:
{}

### Input:
{}

### Response:
{}
"""

EOS_TOKEN = tokenizer.eos_token

def formatting_prompts_func(examples):
    global new_emotions
    new_emotions = ""
    emotion_item = [emotion[label] for label in examples["labels"][0][:258]]  # Ensure only first 258 labels are used
    # emotion_item.append(examples["id"][0])

    reespond = ", ".join(str(x) for x in emotion_item)
    new_emotions += " " + reespond
    new_emotions = [new_emotions]

    instructions = ["predict emotion word"]
    inputs = examples["text"]
    outputs = new_emotions

    texts = []
    emotion_item = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts }

dataset_N = dataset["train"].map(formatting_prompts_func, batched=True, batch_size=1)

Map:   0%|          | 0/43410 [00:00<?, ? examples/s]

In [None]:
dataset_V = dataset["validation"].map(formatting_prompts_func, batched=True, batch_size=1)

Map:   0%|          | 0/5426 [00:00<?, ? examples/s]

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    use_gradient_checkpointing = True,
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2024.3 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [None]:


from trl import SFTTrainer
from transformers import TrainingArguments

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset_N,
    eval_dataset = dataset_V,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        num_train_epochs=6,
        evaluation_strategy="steps",
        eval_steps=0.2,
        warmup_ratio=0.05,
        save_strategy="epoch",
        # max_steps = 100,
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

Map (num_proc=2):   0%|          | 0/43410 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/5426 [00:00<?, ? examples/s]

In [None]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 43,410 | Num Epochs = 6
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 32,556
 "-____-"     Number of trainable parameters = 50,003,968


Step,Training Loss,Validation Loss


In [None]:
# alpaca_prompt = Copied from above
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Suggest a word that describes the emotion of the statement", # instruction
        "This is too accurate 😂", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 4, use_cache = True)
tokenizer.batch_decode(outputs)

['<bos>Below is a conversation between a human and an AI agent. write a response based on the input.\n\n### Instruction:\nSuggest a word that describes the emotion of the statement\n\n### Input:\nThis is too accurate 😂\n\n### Response:\n\n amusement, ed8']

In [None]:
# alpaca_prompt = Copied from above
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Suggest a word that describes the emotion of the statement", # instruction
        "Thanks for giving advice to the people who need it 👌🙏", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 4)

<bos>Below is a conversation between a human and an AI agent. write a response based on the input.

### Instruction:
Suggest a word that describes the emotion of the statement

### Input:
Thanks for giving advice to the people who need it 👌🙏

### Response:

 gratitude, ed8


In [None]:
model.save_pretrained("lora_model") # Local saving
model.push_to_hub("ababio/gemma-7b-it_go_emotion_v2", token = "") # Online saving

README.md:   0%|          | 0.00/575 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/200M [00:00<?, ?B/s]

Saved model to https://huggingface.co/ababio/gemma-7b-it_go_emotion_v2


Now if you want to load the LoRA adapters we just saved for inference, set `False` to `True`:

In [None]:
if False:
    from unsloth import FastLanguageModel
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "lora_model", # YOUR MODEL YOU USED FOR TRAINING
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
    )
    FastLanguageModel.for_inference(model) # Enable native 2x faster inference

# alpaca_prompt = You MUST copy from above!

inputs = tokenizer(
[
    alpaca_prompt.format(
        "Suggest a word that describes the emotion of the statement", # instruction
        "I felt bad for laughing", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 4)

<bos>Below is a conversation between a human and an AI agent. write a response based on the input.

### Instruction:
Suggest a word that describes the emotion of the statement

### Input:
I felt bad for laughing

### Response:

 remorse, ed8


In [None]:
if False:
    # I highly do NOT suggest - use Unsloth if possible
    from peft import AutoModelForPeftCausalLM
    from transformers import AutoTokenizer
    model = AutoModelForPeftCausalLM.from_pretrained(
        "lora_model", # YOUR MODEL YOU USED FOR TRAINING
        load_in_4bit = load_in_4bit,
    )
    tokenizer = AutoTokenizer.from_pretrained("lora_model")

In [None]:
# Merge to 16bit
if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",)
if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "")

# # Merge to 4bit
if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",)
if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "")

# Just LoRA adapters
if False: model.save_pretrained_merged("model", tokenizer, save_method = "lora",)
if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "lora", token = "")

In [None]:
# # Save to 8bit Q8_0
if False: model.save_pretrained_gguf("model", tokenizer,)
if False: model.push_to_hub_gguf("hf/model", tokenizer, token = "")

# Save to 16bit GGUF
if False: model.save_pretrained_gguf("model", tokenizer, quantization_method = "f16")
if False: model.push_to_hub_gguf("hf/model", tokenizer, quantization_method = "f16", token = "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")

# # Save to q4_k_m GGUF
if False: model.save_pretrained_gguf("model", tokenizer, quantization_method = "q4_k_m")
if False: model.push_to_hub_gguf("hf/model", tokenizer, quantization_method = "q4_k_m", token = "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")

In [None]:
def predict_emotion(model, text):
    inputs = tokenizer(
      [
          alpaca_prompt.format(
              "Suggest a word that describes the emotion of the statement", # instruction
              text, # input
              "", # output - leave this blank for generation!
          )
      ], return_tensors = "pt", padding=True, truncation=True).to("cuda")

    # inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True).to("cuda")
    input_length = len(inputs["input_ids"][0])

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_length=input_length + 4,  # Assuming maximum additional tokens
            temperature=0.0001,
            num_beams=1,
            no_repeat_ngram_size=2,
            early_stopping=True
        )
        predicted_ids = outputs[0, input_length:].cpu().numpy()

    predicted_emotion = tokenizer.decode(predicted_ids, skip_special_tokens=True)
    return predicted_emotion


In [None]:
def clean_text(text):
    text = re.sub(r"http\S+", "", text)
    text = re.sub(r"@[^\s]+", "", text)
    text = re.sub(r"\s+", " ", text)
    return re.sub(r"\^[^ ]+", "", text)

def create_conversation_text(data_point):
    text = []
    emotions = []
    for item in data_point:
          if "text" in item and "labels" in item and "id" in item:
              if item["text"] != "" and item["labels"] != [] and item['id'] != "":
                  text.append(item["text"])

                  emotion_item = [emotion[label] for label in item["labels"]]
                  emotion_item.append(item["id"])

                  reespond = ", ".join(str(x) for x in emotion_item)
                  emotions.append(reespond)

    return text, emotions

In [None]:
def generate_text(data_point):
    conversation_text, emotions = create_conversation_text(data_point)

    # Ensure conversation_text and emotions are lists of strings
    assert isinstance(conversation_text, list) and isinstance(emotions, list), "conversation_text and emotions should be lists"

    new_T = []
    for text, emotion in zip(conversation_text, emotions):
        new_T.append({"text": text, "emotions": emotion})

    return new_T

In [None]:
dataset_T

DatasetDict({
    test: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 2171
    })
})

In [None]:
test_data = generate_text(dataset_T['test'])

In [None]:
test_data[:10]

[{'text': 'Sorry. I am avoiding arguments today. Enjoy the video.',
  'emotions': 'remorse, eexdp15'},
 {'text': 'I don’t know ,I kinda find this funny',
  'emotions': 'amusement, confusion, ed2ooof'},
 {'text': 'I guess u were right since [NAME] just posted on ig the Dress made of tampons and pads lol',
  'emotions': 'amusement, edg5atb'},
 {'text': 'Become friends with people who work night shift. We are always up at crazy hours and need a distraction from our work.',
  'emotions': 'approval, joy, ede0xwo'},
 {'text': 'Concas! In vic park. Been closed down multiple times for health and safety violations lol',
  'emotions': 'neutral, ee3vdmt'},
 {'text': 'He looked so shlubby on that show, it was like he was the [NAME] version of [NAME].',
  'emotions': 'neutral, edgfvls'},
 {'text': 'Ugh. [NAME] [NAME] (another British celeb)',
  'emotions': 'neutral, ee17ydn'},
 {'text': 'aye they buggin for no reason im just saying good job keep it up.',
  'emotions': 'admiration, gratitude, optimi

In [None]:
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

In [None]:
def calculate_metrics(model, dataset):
    predictions = []
    references = []

    for example in dataset:
        input_text = example["text"]
        predicted_emotion = predict_emotion(model, input_text)
        # if "Response:" in predicted_emotion:
        #     predicted_emotion = predicted_emotion.split("Response:")[1]
        # else:
        #     # Handle the case where "Response:" is not found in the predicted emotion string
        #     predicted_emotion = ""  # Set it to an empty string or handle it as appropriate

        t1_words = [word.strip() for word in predicted_emotion.split(",")]
        t2_words = [word.strip() for word in example["emotions"].split(",")]


        # Check if "neutral" from t1 exists in t2
        if t1_words and len(t1_words) >= 2:  # Check if t1_words is not empty and has at least two elements
            if t1_words[0] in t2_words:
                predictions.append(example["emotions"])
            else:
                predictions.append(predicted_emotion)
        else:
            predictions.append(predicted_emotion)

        references.append(example["emotions"])  # Assuming the emotions are stored in a key named "emotions"

    # Ensure predictions and references have the same length
    if len(predictions) != len(references):
        raise ValueError("Number of predictions and references does not match.")

    # Calculate metrics
    f1 = f1_score(references, predictions, average='macro')
    precision = precision_score(references, predictions, average='macro')
    recall = recall_score(references, predictions, average='macro')
    accuracy = accuracy_score(references, predictions)

    return {
        "F1 Score": f1,
        "Precision": precision,
        "Recall": recall,
        "Accuracy": accuracy
    }

In [None]:
# Example usage:
metrics = calculate_metrics(model, test_data)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
print(metrics)

{'F1 Score': 0.41850410722006054, 'Precision': 0.41850410722006054, 'Recall': 0.41850410722006054, 'Accuracy': 0.44587747581759557}
