# Mistral for Sequence Classification

In [1]:
from datasets import load_dataset
from transformers import DataCollatorWithPadding, Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoTokenizer
import evaluate
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from pathlib import Path
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments,pipeline, logging
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model

In [5]:
model_id = "mistralai/Mistral-7B-v0.3"

# Load base model
bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= False,
)
model = AutoModelForSequenceClassification.from_pretrained(
        model_id,
        quantization_config=bnb_config,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        num_labels=4,
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Some weights of MistralForSequenceClassification were not initialized from the model checkpoint at mistralai/Mistral-7B-v0.3 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="SEQ_CLS",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"]
)
model = get_peft_model(model, peft_config)

In [7]:
label2id = {"Refuted": 0, "Supported": 1, "Not Enough Evidence": 2, "Conflicting Evidence/Cherrypicking": 3}

dotenv_path = Path('aic_averitec/.env')

DATASET_PATH = "/mnt/data/factcheck/averitec-data/data"

#SEED
SEED = 42 #Answer to the Ultimate Question of Life, the Universe, and Everything

data_collator = DataCollatorWithPadding(tokenizer)


#prepare dataset
dataset = load_dataset("json", data_files = {"train": os.path.join(DATASET_PATH, "train_nli_4concat.jsonl"), "dev": os.path.join(DATASET_PATH, "dev_nli_4concat.jsonl")})


#tokenize dataset
def tokenize_function(examples):
   #large has hidden size of 1024
   example = tokenizer(examples["claim"], examples["evidence"], truncation=True, max_length=1024)
   example["label"] = [label2id[label] for label in examples["label"]]
   return example

tokenized_dataset = dataset.map(tokenize_function, batched=True)
tokenized_dataset=tokenized_dataset.remove_columns(["claim", "evidence"])

clf_metrics = evaluate.combine(["accuracy", "f1", "precision", "recall"])

def compute_metrics(eval_pred):
   labels = eval_pred.label_ids
   preds = eval_pred.predictions.argmax(-1)

   # Calculate accuracy
   accuracy = accuracy_score(labels, preds)
   
   # Calculate precision, recall, and F1-score
   precision = precision_score(labels, preds, average="macro")
   recall = recall_score(labels, preds, average='macro')
   f1 = f1_score(labels, preds, average='macro')
   return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }


training_args = TrainingArguments(
   output_dir=f"/home/mlynatom/models/averitec/nli_4concat/mistral-7B-v0.3",
   learning_rate=1e-5,
   per_device_train_batch_size=1,
   per_device_eval_batch_size=1,
   num_train_epochs=5,
   weight_decay=0.01,
   eval_strategy="epoch",
   save_strategy="epoch",
   load_best_model_at_end=True,
   metric_for_best_model="f1",
   warmup_ratio=0.06,
   gradient_checkpointing=True,
   report_to="wandb",
   bf16=True,
   logging_steps=10,
   logging_strategy="steps",
   save_total_limit=1,
   optim="paged_adamw_32bit",
)



Map:   0%|          | 0/3068 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

In [8]:
trainer = Trainer(
   model=model,
   args=training_args,
   train_dataset=tokenized_dataset["train"].shuffle(seed=SEED),
   eval_dataset=tokenized_dataset["dev"],
   tokenizer=tokenizer,
   data_collator=data_collator,
   compute_metrics=compute_metrics,
)

trainer.train()

Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
[34m[1mwandb[0m: Currently logged in as: [33mmlynatom[0m. Use [1m`wandb login --relogin`[0m to force relogin


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Epoch,Training Loss,Validation Loss


We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


TrainOutput(global_step=15340, training_loss=1.067909813642191, metrics={'train_runtime': 7337.2368, 'train_samples_per_second': 2.091, 'train_steps_per_second': 2.091, 'total_flos': 1.3279237728976896e+17, 'train_loss': 1.067909813642191, 'epoch': 5.0})

In [9]:
trainer.save_model("/home/mlynatom/models/averitec/nli_4concat/mistral-7B-v0.3-final")