In [None]:
# prompt: install bitsandbytes library

!pip install bitsandbytes
!pip install peft
!pip install evaluate
# !pip install pytorch
!pip install requests

In [3]:
from huggingface_hub import login

# Log in using your Hugging Face token
login(token='PUT YOUR HUGGING FACE TOKEN HERE')

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [4]:
id2label = {
    0: "SADNESS",
    1: "JOY",
    2: "LOVE",
    3: "ANGER",
    4: "FEAR",
    5: "SURPRISE"
}

label2id = {
    "SADNESS": 0,
    "JOY": 1,
    "LOVE": 2,
    "ANGER": 3,
    "FEAR": 4,
    "SURPRISE": 5
}


In [None]:
ds = load_dataset("dair-ai/emotion")

In [5]:
import os
import random
import functools
import csv
import numpy as np
import torch
import torch.nn.functional as F
from sklearn.metrics import f1_score
from skmultilearn.model_selection import iterative_train_test_split
from datasets import load_dataset
from peft import (
    LoraConfig,
    prepare_model_for_kbit_training,
    get_peft_model,
    PeftModel
)
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding,
)
from sklearn.metrics import accuracy_score, f1_score

In [None]:
# define custom batch preprocessor
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# define which metrics to compute for evaluation
def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=1)
    accuracy = accuracy_score(predictions, labels)
    
    f1_micro = f1_score(labels, predictions > 0, average = 'micro')
    f1_macro = f1_score(labels, predictions > 0, average = 'macro')
    f1_weighted = f1_score(labels, predictions > 0, average = 'weighted')
    return {
        'accuracy': accuracy,
        'f1_micro': f1_micro,
        'f1_macro': f1_macro,
        'f1_weighted': f1_weighted
    }

In [None]:
# model name
model_name = 'mistralai/Mistral-7B-v0.1'

# preprocess dataset with tokenizer
def tokenize_examples(examples, tokenizer):
    tokenized_inputs = tokenizer(examples['text'])
    tokenized_inputs['label'] = examples['label']
    return tokenized_inputs

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenized_ds = ds.map(functools.partial(tokenize_examples, tokenizer=tokenizer), batched=True)
tokenized_ds = tokenized_ds.with_format('torch')

# qunatization config
quantization_config = BitsAndBytesConfig(
    load_in_4bit = True, # enable 4-bit quantization
    bnb_4bit_quant_type = 'nf4', # information theoretically optimal dtype for normally distributed weights
    bnb_4bit_use_double_quant = True, # quantize quantized weights //insert xzibit meme
    bnb_4bit_compute_dtype = torch.bfloat16 # optimized fp format for ML
)

# lora config
lora_config = LoraConfig(
    r = 16, # the dimension of the low-rank matrices
    lora_alpha = 8, # scaling factor for LoRA activations vs pre-trained weight activations
    target_modules = ['q_proj', 'k_proj', 'v_proj', 'o_proj'],
    lora_dropout = 0.05, # dropout probability of the LoRA layers
    bias = 'none', # wether to train bias weights, set to 'none' for attention layers
    task_type = 'SEQ_CLS'
)

# load model
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    quantization_config=quantization_config,
    num_labels=6,
    device_map = "auto"
)
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)
model.config.pad_token_id = tokenizer.pad_token_id


In [None]:
# define training args
training_args = TrainingArguments(
    output_dir = 'ITSOLERA-4-MISTRAL-V1-TUNING',
    learning_rate = 1e-4,
    per_device_train_batch_size = 64,
    per_device_eval_batch_size = 64,
    num_train_epochs = 4,
    weight_decay = 0.01,
    evaluation_strategy = 'epoch',
    save_strategy = 'epoch',
    load_best_model_at_end = True,
    push_to_hub=True,
    dataloader_num_workers=4,
)

# train
trainer = Trainer(
    model = model,
    args = training_args,
    train_dataset = tokenized_ds['train'],
    eval_dataset = tokenized_ds['test'],
    tokenizer = tokenizer,
    data_collator = data_collator,
    compute_metrics = compute_metrics,
    #label_weights = torch.tensor(label_weights, device=model.device)
)

trainer.train()


# **INFERENCE**

In [6]:
from transformers import AutoModelForSequenceClassification, BitsAndBytesConfig
import torch

# Load the trained model
model_path = "Muhammad10101/ITSOLERA-4-MISTRAL-V1-TUNING"
model_path_base = "mistralai/Mistral-7B-v0.1"

bnb_config = BitsAndBytesConfig(load_in_4bit=True)
device_map = "auto"

# Load the model with 8-bit precision and the appropriate configuration
test_model = AutoModelForSequenceClassification.from_pretrained(
    model_path_base, 
    num_labels=6, 
    device_map=device_map,
    id2label = id2label,
    label2id = label2id,
    quantization_config=bnb_config  # Enables 4-bit quantization
)

model = PeftModel.from_pretrained(test_model, model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path_base)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of MistralForSequenceClassification were not initialized from the model checkpoint at mistralai/Mistral-7B-v0.1 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [10]:
text = "I am broken today"
inputs = tokenizer(text, return_tensors="pt")

# tokenizer = AutoTokenizer.from_pretrained(model_path_base)

In [11]:
with torch.no_grad():
    logits = model(**inputs).logits

In [12]:
predicted_class_id = logits.argmax().item()
model.config.id2label[predicted_class_id]

'SADNESS'