In [None]:
!huggingface-cli login --token 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

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [None]:
%%capture
%pip install -U bitsandbytes
%pip install -U transformers
%pip install -U accelerate
%pip install -U peft
%pip install -U trl

In [None]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import Dataset
from peft import LoraConfig, PeftConfig
from trl import SFTTrainer
from trl import setup_chat_format
from transformers import (AutoModelForCausalLM,
                          AutoTokenizer,
                          BitsAndBytesConfig,
                          TrainingArguments,
                          pipeline,
                          logging)
from sklearn.metrics import (accuracy_score,
                             classification_report,
                             confusion_matrix)
from sklearn.model_selection import train_test_split

## Loading and processing the dataset

In [None]:
# prompt: ucirvine/sms_spam load this dataset from huggingface and store it to the pandas as df

import pandas as pd
from datasets import load_dataset
dataset = load_dataset("ucirvine/sms_spam")
df = pd.DataFrame(dataset['train'])
df.label = df.label.map({0: "normal", 1: "spam"})
df.head()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Unnamed: 0,sms,label
0,"Go until jurong point, crazy.. Available only ...",normal
1,Ok lar... Joking wif u oni...\n,normal
2,Free entry in 2 a wkly comp to win FA Cup fina...,spam
3,U dun say so early hor... U c already then say...,normal
4,"Nah I don't think he goes to usf, he lives aro...",normal


In [None]:
df = df.sample(frac=1, random_state=77).reset_index(drop=True)

# Split the DataFrame
train_size = 0.5
eval_size = 0.1

# Calculate sizes
train_end = int(train_size * len(df))
eval_end = train_end + int(eval_size * len(df))

# Split the data
X_train = df[:train_end].reset_index(drop=True)
X_eval = df[train_end:eval_end].reset_index(drop=True)
X_test = df[eval_end:].reset_index(drop=True)

# Define the prompt generation functions
def generate_prompt(data_point):
    return f"""
            Classify the text into Spam and normal, return only the label as spam or normal, no other category please.
text: {data_point["sms"]}
label: {data_point["label"]}""".strip()

def generate_test_prompt(data_point):
    return f"""
            Classify the text into Spam and normal, return only the label as spam or normal, no other category please.
text: {data_point["sms"]}
label: """.strip()

# Generate prompts for training and evaluation data
X_train.loc[:,'sms'] = X_train.apply(generate_prompt, axis=1)
X_eval.loc[:,'sms'] = X_eval.apply(generate_prompt, axis=1)

# Generate test prompts and extract true labels
y_true = X_test.loc[:,'label']
X_test = pd.DataFrame(X_test.apply(generate_test_prompt, axis=1), columns=["sms"])

In [None]:
# Convert to datasets
train_data = Dataset.from_pandas(X_train[["sms"]])
eval_data = Dataset.from_pandas(X_eval[["sms"]])

In [None]:
train_data['sms'][3]

'Classify the text into Spam and normal, return only the label as spam or normal, no other category please.\ntext: PRIVATE! Your 2003 Account Statement for shows 800 un-redeemed S.I.M. points. Call 08718738001 Identifier Code: 49557 Expires 26/11/04\n\nlabel: spam'

## Loading the model and tokenizer

In [None]:
base_model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
# base_model_name = "arshiakarimian1/spam-llama3.1-8B-teacher-2"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
)

model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    torch_dtype="float16",
    quantization_config=bnb_config,
)

model.config.use_cache = False
model.config.pretraining_tp = 1

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [None]:
tokenizer = AutoTokenizer.from_pretrained(base_model_name)

tokenizer.pad_token_id = tokenizer.eos_token_id

tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

## Model evalution before fine-tuning

In [None]:
def predict(test, model, tokenizer):
    y_pred = []
    categories = ["normal", "spam"]

    for i in tqdm(range(len(test))):
        prompt = test.iloc[i]["sms"]
        pipe = pipeline(task="text-generation",
                        model=model,
                        tokenizer=tokenizer,
                        max_new_tokens=2,
                        temperature=0.1)

        result = pipe(prompt)
        answer = result[0]['generated_text'].split("label:")[-1].strip()

        for category in categories:
            if category.lower() in answer.lower():
                y_pred.append(category)
                break
        else:
            y_pred.append("none")

    return y_pred

In [None]:
def evaluate(y_true, y_pred):
    labels = ["normal", "spam"]
    mapping = {label: idx for idx, label in enumerate(labels)}

    def map_func(x):
        return mapping.get(x, -1)  # Map to -1 if not found, but should not occur with correct data

    y_true_mapped = np.vectorize(map_func)(y_true)
    y_pred_mapped = np.vectorize(map_func)(y_pred)

    accuracy = accuracy_score(y_true=y_true_mapped, y_pred=y_pred_mapped)
    print(f'Accuracy: {accuracy:.3f}')

    unique_labels = set(y_true_mapped)  # Get unique labels

    for label in unique_labels:
        label_indices = [i for i in range(len(y_true_mapped)) if y_true_mapped[i] == label]
        label_y_true = [y_true_mapped[i] for i in label_indices]
        label_y_pred = [y_pred_mapped[i] for i in label_indices]
        label_accuracy = accuracy_score(label_y_true, label_y_pred)
        print(f'Accuracy for label {labels[label]}: {label_accuracy:.3f}')

    class_report = classification_report(y_true=y_true_mapped, y_pred=y_pred_mapped, target_names=labels, labels=list(range(len(labels))))
    print('\nClassification Report:')
    print(class_report)

    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_true=y_true_mapped, y_pred=y_pred_mapped, labels=list(range(len(labels))))
    print('\nConfusion Matrix:')
    print(conf_matrix)

In [None]:
y_pred = predict(X_test, model, tokenizer)
evaluate(y_true, y_pred)

## Extracting the linear modules names

In [None]:
import bitsandbytes as bnb

def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

In [None]:
modules = find_all_linear_names(model)
modules

['gate_proj', 'up_proj', 'k_proj', 'down_proj', 'o_proj', 'v_proj', 'q_proj']

## Setting up the model

In [None]:
output_dir="spam-llama3.1-8B-teacher"
hub_model_id = "arshiakarimian1/spam-llama3.1-8B-teacher-2"

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.25,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=modules #['q_proj', 'v_proj', 'o_proj'], # which modules to use
)

training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=1,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    gradient_checkpointing=True,
    optim="paged_adamw_32bit",
    logging_steps=1,
    learning_rate=5e-5,
    weight_decay=0.001,
    fp16=True,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=False,
    lr_scheduler_type="cosine",
    eval_strategy="steps",
    eval_steps=0.2,
    # Add these parameters for pushing to Hub
    push_to_hub=True,
    hub_model_id=hub_model_id,
    hub_strategy="every_save",
)

trainer = SFTTrainer(
    model=model,
    args=training_arguments,
    train_dataset=train_data,
    eval_dataset=eval_data,
    peft_config=peft_config,
    dataset_text_field="sms",
    tokenizer=tokenizer,
    max_seq_length=64,
    packing=False,
    dataset_kwargs={
    "add_special_tokens": False,
    "append_concat_token": False,
    }
)

trainer.train()


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/3623 [00:00<?, ? examples/s]

Map:   0%|          | 0/278 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Step,Training Loss,Validation Loss
91,1.6576,1.805522
182,1.5242,1.728432
273,1.5695,1.688723
364,1.3364,1.674009


TrainOutput(global_step=452, training_loss=1.7332487012696478, metrics={'train_runtime': 2029.9576, 'train_samples_per_second': 1.785, 'train_steps_per_second': 0.223, 'total_flos': 8007809538834432.0, 'train_loss': 1.7332487012696478, 'epoch': 0.9980678995307756})

## Model Training

In [None]:
# wandb.finish()
# model.config.use_cache = True

## Saving the model and tokenizer

In [None]:
# # Save trained model and tokenizer
trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)

('spam-llama3.1-8B-teacher/tokenizer_config.json',
 'spam-llama3.1-8B-teacher/special_tokens_map.json',
 'spam-llama3.1-8B-teacher/tokenizer.json')

## Testing model after fine-tuning

In [None]:
########### Loading The Model #############
saved_model_name = "arshiakarimian1/spam-llama3.1-8B-teacher-2"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
)

model = AutoModelForCausalLM.from_pretrained(
    saved_model_name,
    device_map="auto",
    torch_dtype="float16",
    quantization_config=bnb_config,
)

model.config.use_cache = False
model.config.pretraining_tp = 1

adapter_config.json:   0%|          | 0.00/740 [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/671M [00:00<?, ?B/s]

In [None]:
y_pred = predict(X_test, model, tokenizer)
evaluate(y_true, y_pred)

100%|██████████| 1673/1673 [07:51<00:00,  3.55it/s]

Accuracy: 0.987
Accuracy for label normal: 0.995
Accuracy for label spam: 0.927

Classification Report:
              precision    recall  f1-score   support

      normal       0.99      1.00      0.99      1468
        spam       0.96      0.93      0.95       205

    accuracy                           0.99      1673
   macro avg       0.98      0.96      0.97      1673
weighted avg       0.99      0.99      0.99      1673


Confusion Matrix:
[[1461    7]
 [  15  190]]





# Creating Student Model

In [None]:
# from transformers import AutoConfig, AutoModelForCausalLM

# # Define the student model configuration
# student_config = AutoConfig.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct")
# student_config.num_hidden_layers = 8  # Reduce the number of layers
# # Create the student model
# student_model = AutoModelForCausalLM.from_config(student_config)

# # Move the student model to the same device as the teacher model
# student_model.to(model.device)

# Raw Pytorch

In [None]:
# import torch
# from torch.utils.data import Dataset, DataLoader

# class TeacherForcingDataset(Dataset):
#     def __init__(self, data, tokenizer, max_length):
#         self.data = data
#         self.tokenizer = tokenizer
#         self.max_length = max_length

#     def __len__(self):
#         return len(self.data)

#     def __getitem__(self, idx):
#         item = self.data.iloc[idx]
#         inputs = self.tokenizer(item['sms'], truncation=True, padding='max_length', max_length=self.max_length, return_tensors='pt')
#         return {key: val.squeeze(0) for key, val in inputs.items()}

# # Create datasets and dataloaders
# train_dataset = TeacherForcingDataset(X_train, tokenizer, max_length=512)
# eval_dataset = TeacherForcingDataset(X_eval, tokenizer, max_length=512)

# train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)
# eval_dataloader = DataLoader(eval_dataset, batch_size=8)
# import torch
# from torch.utils.data import Dataset, DataLoader

# class TeacherForcingDataset(Dataset):
#     def __init__(self, data, tokenizer, max_length):
#         self.data = data
#         self.tokenizer = tokenizer
#         self.max_length = max_length

#     def __len__(self):
#         return len(self.data)

#     def __getitem__(self, idx):
#         item = self.data.iloc[idx]
#         inputs = self.tokenizer(item['sms'], truncation=True, padding='max_length', max_length=self.max_length, return_tensors='pt')
#         return {key: val.squeeze(0) for key, val in inputs.items()}

# # Create datasets and dataloaders
# train_dataset = TeacherForcingDataset(X_train, tokenizer, max_length=512)
# eval_dataset = TeacherForcingDataset(X_eval, tokenizer, max_length=512)

# train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)
# eval_dataloader = DataLoader(eval_dataset, batch_size=8)
# import torch
# from torch.utils.data import Dataset, DataLoader

# class TeacherForcingDataset(Dataset):
#     def __init__(self, data, tokenizer, max_length):
#         self.data = data
#         self.tokenizer = tokenizer
#         self.max_length = max_length

#     def __len__(self):
#         return len(self.data)

#     def __getitem__(self, idx):
#         item = self.data.iloc[idx]
#         inputs = self.tokenizer(item['sms'], truncation=True, padding='max_length', max_length=self.max_length, return_tensors='pt')
#         return {key: val.squeeze(0) for key, val in inputs.items()}

# # Create datasets and dataloaders
# train_dataset = TeacherForcingDataset(X_train, tokenizer, max_length=256)
# eval_dataset = TeacherForcingDataset(X_eval, tokenizer, max_length=256)

# train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)
# eval_dataloader = DataLoader(eval_dataset, batch_size=8)

In [None]:
# import torch
# import torch.nn.functional as F
# from tqdm import tqdm
# from torch.cuda.amp import autocast, GradScaler
# import gc

# def train_student(student_model, teacher_model, train_dataloader, eval_dataloader, num_epochs, learning_rate):
#     optimizer = torch.optim.AdamW(student_model.parameters(), lr=learning_rate)
#     scaler = GradScaler()
#     accumulation_steps = 4  # Adjust as needed

#     for epoch in range(num_epochs):
#         student_model.train()
#         total_loss = 0
#         optimizer.zero_grad()

#         for batch_idx, batch in enumerate(tqdm(train_dataloader, desc=f"Epoch {epoch + 1}")):
#             input_ids = batch['input_ids'].to(student_model.device)
#             attention_mask = batch['attention_mask'].to(student_model.device)

#             with torch.no_grad():
#                 teacher_outputs = teacher_model(input_ids=input_ids, attention_mask=attention_mask)
#                 teacher_logits = teacher_outputs.logits

#             with autocast():
#                 student_outputs = student_model(input_ids=input_ids, attention_mask=attention_mask)
#                 student_logits = student_outputs.logits
#                 loss = F.mse_loss(student_logits, teacher_logits)
#                 loss = loss / accumulation_steps

#             scaler.scale(loss).backward()

#             if (batch_idx + 1) % accumulation_steps == 0:
#                 scaler.step(optimizer)
#                 scaler.update()
#                 optimizer.zero_grad()

#             total_loss += loss.item() * accumulation_steps

#             if batch_idx % 10 == 0:  # Clear cache every 10 batches
#                 torch.cuda.empty_cache()

#         avg_loss = total_loss / len(train_dataloader)
#         print(f"Epoch {epoch + 1}, Average Loss: {avg_loss:.4f}")

#         # Evaluation
#         student_model.eval()
#         eval_loss = 0

#         with torch.no_grad():
#             for batch in eval_dataloader:
#                 input_ids = batch['input_ids'].to(student_model.device)
#                 attention_mask = batch['attention_mask'].to(student_model.device)

#                 teacher_outputs = teacher_model(input_ids=input_ids, attention_mask=attention_mask)
#                 teacher_logits = teacher_outputs.logits

#                 student_outputs = student_model(input_ids=input_ids, attention_mask=attention_mask)
#                 student_logits = student_outputs.logits

#                 loss = F.mse_loss(student_logits, teacher_logits)
#                 eval_loss += loss.item()

#         avg_eval_loss = eval_loss / len(eval_dataloader)
#         print(f"Epoch {epoch + 1}, Evaluation Loss: {avg_eval_loss:.4f}")

#         torch.cuda.empty_cache()
#         gc.collect()

# # Train the student model
# num_epochs = 10
# learning_rate = 1e-4

# train_student(student_model, model, train_dataloader, eval_dataloader, num_epochs, learning_rate)

In [None]:
# def evaluate_student(student_model, test_data, tokenizer):
#     student_model.eval()
#     predictions = []

#     with torch.no_grad():
#         for _, row in test_data.iterrows():
#             inputs = tokenizer(row['sms'], return_tensors='pt', truncation=True, max_length=512)
#             inputs = {k: v.to(student_model.device) for k, v in inputs.items()}

#             outputs = student_model(**inputs)
#             logits = outputs.logits

#             # Get the predicted token (spam or normal)
#             predicted_token = tokenizer.decode(logits[0, -1, :].argmax())
#             predictions.append(predicted_token.strip())

#     return predictions

# # Evaluate the student model
# student_predictions = evaluate_student(student_model, X_test, tokenizer)

# # Calculate accuracy
# from sklearn.metrics import accuracy_score

# accuracy = accuracy_score(y_true, student_predictions)
# print(f"Student Model Accuracy: {accuracy:.4f}")

In [None]:
# st_repo_name = "arshiakarimian1/spam-student-4-256"
# student_model.push_to_hub(st_repo_name)

In [None]:
import gc
torch.cuda.empty_cache()
gc.collect()

51

# Model Visualization

In [None]:
# %%capture
# !pip install torchview

In [None]:
# from transformers import AutoModel, AutoTokenizer
# from torchview import draw_graph


# # model_for_viz = AutoModel.from_pretrained(
# #     'meta-llama/Meta-Llama-3.1-8B-Instruct',
# # )
# tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct")
# inputs = tokenizer("Don't Miss Our 80% Sales On Amazon!", return_tensors="pt")

# model_graph = draw_graph(student_model, input_data=inputs)
# model_graph.visual_graph

# Using Huggingface Instead

In [None]:
from transformers import Trainer, TrainingArguments
from transformers import AutoConfig, AutoModelForCausalLM
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
# Data collator
from transformers import DataCollatorWithPadding


In [None]:

class TeacherForcingDataset(Dataset):
    def __init__(self, data, tokenizer, max_length):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data.iloc[idx]
        inputs = self.tokenizer(item['sms'], truncation=True, padding='max_length', max_length=self.max_length, return_tensors='pt')
        return {key: val.squeeze(0) for key, val in inputs.items()}




In [None]:
# Create your student model
student_config = AutoConfig.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct")
student_config.num_hidden_layers = 6  # Reduce the number of layers
student_config.hidden_size = 2048  # Reduce the hidden size

student_model = AutoModelForCausalLM.from_config(student_config)
student_model.to(model.device)

# Create datasets
train_dataset = TeacherForcingDataset(X_train, tokenizer, max_length=64)
eval_dataset = TeacherForcingDataset(X_eval, tokenizer, max_length=64)

In [None]:
class TeacherForcingDataCollator(DataCollatorWithPadding):
    def __init__(self, tokenizer, teacher_model):
        super().__init__(tokenizer, padding=True)
        self.teacher_model = teacher_model

    def __call__(self, features):
        batch = super().__call__(features)
        with torch.no_grad():
            teacher_outputs = self.teacher_model(**batch)
        batch["teacher_logits"] = teacher_outputs.logits
        return batch

data_collator = TeacherForcingDataCollator(tokenizer, model)  # 'model' is your teacher model

In [None]:
student_dir_name = "arshiakarimian1/spam-student-6-2048"

# Training arguments
training_args = TrainingArguments(
    output_dir=student_dir_name + "/results",
    num_train_epochs=10,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    gradient_accumulation_steps=4,
    warmup_steps=500,
    weight_decay=0.015,
    learning_rate = 5e-3, #5e-5 is the default
    logging_dir=student_dir_name + "/logs",
    logging_steps=10,
    evaluation_strategy="no",
    eval_steps=500,
    save_steps=500,
    fp16=True,
    push_to_hub=True,
    hub_model_id=student_dir_name  # Replace with your desired model name
)

# Custom Trainer
class TeacherStudentTrainer(Trainer):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def compute_loss(self, model, inputs, return_outputs=False):
        teacher_logits = inputs.pop("teacher_logits", None)
        outputs = model(**inputs)
        if teacher_logits is not None:
            # This is training
            loss = F.mse_loss(outputs.logits, teacher_logits)
        else:
            # This is evaluation, use a different loss if needed
            # or just return a dummy loss
            loss = outputs.loss if outputs.loss is not None else torch.tensor(0.0).to(outputs.logits.device)
        return (loss, outputs) if return_outputs else loss


# Create and run the Trainer
trainer = TeacherStudentTrainer(
    model=student_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    data_collator=data_collator,
)

trainer.train()

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss
10,0.6483
20,0.6585
30,0.6131
40,0.6721
50,0.631
60,0.6076
70,0.6067
80,0.5845
90,0.5636
100,0.5616


TrainOutput(global_step=1130, training_loss=0.4351237451080727, metrics={'train_runtime': 2284.6302, 'train_samples_per_second': 15.858, 'train_steps_per_second': 0.495, 'total_flos': 1.1856488368963584e+16, 'train_loss': 0.4351237451080727, 'epoch': 9.977924944812361})

In [None]:
trainer.push_to_hub()

CommitInfo(commit_url='https://huggingface.co/arshiakarimian1/spam-student-6-2048/commit/c2adb2d93bae762461c9bf8a4503192e9ae5b7ca', commit_message='End of training', commit_description='', oid='c2adb2d93bae762461c9bf8a4503192e9ae5b7ca', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
def predict(test, model, tokenizer):
    y_pred = []
    real_preds = []
    categories = ["normal", "spam"]

    for i in tqdm(range(len(test))):
        prompt = test.iloc[i]["sms"]
        pipe = pipeline(task="text-generation",
                        model=model,
                        tokenizer=tokenizer,
                        max_new_tokens=2,
                        temperature=0.1,
                        device=torch.device('cuda')
                        )

        result = pipe(prompt)
        answer = result[0]['generated_text'].split("label:")[-1].strip()
        full_answer = result[0]['generated_text']

        for category in categories:
            if category.lower() in answer.lower():
                y_pred.append(category)
                break
        else:
            y_pred.append("none")

        real_preds.append(full_answer)

    return y_pred, real_preds

In [None]:
y_pred, real_preds = predict(X_test, student_model, tokenizer)
evaluate(y_true, y_pred)

100%|██████████| 1673/1673 [00:44<00:00, 37.84it/s]

Accuracy: 0.979
Accuracy for label normal: 1.000
Accuracy for label spam: 0.829

Classification Report:
              precision    recall  f1-score   support

      normal       0.98      1.00      0.99      1468
        spam       1.00      0.83      0.91       205

    accuracy                           0.98      1673
   macro avg       0.99      0.91      0.95      1673
weighted avg       0.98      0.98      0.98      1673


Confusion Matrix:
[[1468    0]
 [  35  170]]



