In [1]:
import torch
from datasets import Dataset
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer, AutoModelForCausalLM ,BitsAndBytesConfig
import pandas as pd
from sklearn.metrics import classification_report

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
MODEL_NAME = "microsoft/Phi-3-mini-128k-instruct"
DATA_PATH = "data/spam_or_not_spam.csv"
MAX_LENGTH = 512
hf_token = 

In [3]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True,token = hf_token)
tokenizer.pad_token = tokenizer.eos_token

In [4]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,  # ✅ evita il warning
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

In [5]:
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    quantization_config=bnb_config,
    torch_dtype=torch.float16,
    token = hf_token
)

Loading checkpoint shards: 100%|██████████| 2/2 [00:07<00:00,  3.84s/it]


In [6]:
def format_chat(example):
    conversation = [
        {
            "role": "user",
            "content": f'Classify this email as spam (1) or not spam (0): "{example["email"]}"'
        },
        {
            "role": "assistant",
            "content": str(example["label"])
        }
    ]
    full_text = tokenizer.apply_chat_template(
        conversation,
        tokenize=False,
        add_generation_prompt=False
    )
    return {"text": full_text}

In [7]:
def tokenize(example):
    return tokenizer(example["email"], truncation=True, padding="max_length", max_length=MAX_LENGTH)

In [8]:
df = pd.read_csv("data/spam_or_not_spam.csv").dropna(subset=["email", "label"])
df = df.dropna(subset=['email', 'label'])

train_val_df, test_df = train_test_split(df, test_size=0.2, random_state=1)

train_df, val_df = train_test_split(train_val_df, test_size=0.2, random_state=1)

train = Dataset.from_pandas(train_df.reset_index(drop=True))
val = Dataset.from_pandas(val_df.reset_index(drop=True))
test = Dataset.from_pandas(test_df.reset_index(drop=True))

In [9]:
val = val.map(tokenize)
test = test.map(tokenize)

Map: 100%|██████████| 480/480 [00:00<00:00, 1149.57 examples/s]
Map: 100%|██████████| 600/600 [00:00<00:00, 1590.32 examples/s]


In [10]:
model.eval()
predictions, references = [], []

for example in val:
    input_ids = torch.tensor(example["input_ids"]).unsqueeze(0).to("cuda")
    attention_mask = torch.tensor(example["attention_mask"]).unsqueeze(0).to("cuda")

    with torch.no_grad():
        output = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_new_tokens=2,
            pad_token_id=tokenizer.pad_token_id
        )

    decoded = tokenizer.decode(output[0], skip_special_tokens=True).strip()
    pred = 1 if "1" in decoded[-3:] else 0
    true = int(example["label"])

    predictions.append(pred)
    references.append(true)

print("== Classification Report on Validation Set ==")
print(classification_report(references, predictions, digits=4))

== Classification Report on Validation Set ==
              precision    recall  f1-score   support

           0     0.8378    0.6975    0.7613       400
           1     0.1769    0.3250    0.2291        80

    accuracy                         0.6354       480
   macro avg     0.5074    0.5112    0.4952       480
weighted avg     0.7277    0.6354    0.6726       480



In [11]:
model.eval()
predictions, references = [], []

for example in test:
    input_ids = torch.tensor(example["input_ids"]).unsqueeze(0).to("cuda")
    attention_mask = torch.tensor(example["attention_mask"]).unsqueeze(0).to("cuda")

    with torch.no_grad():
        output = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_new_tokens=2,
            pad_token_id=tokenizer.pad_token_id
        )

    decoded = tokenizer.decode(output[0], skip_special_tokens=True).strip()
    pred = 1 if "1" in decoded[-3:] else 0
    true = int(example["label"])

    predictions.append(pred)
    references.append(true)

print("== Classification Report on Test Set ==")
print(classification_report(references, predictions, digits=4))

== Classification Report on Test Set ==
              precision    recall  f1-score   support

           0     0.8042    0.6931    0.7445       492
           1     0.1420    0.2315    0.1761       108

    accuracy                         0.6100       600
   macro avg     0.4731    0.4623    0.4603       600
weighted avg     0.6850    0.6100    0.6422       600

