In [9]:
import pandas as pd
import torch
import time
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load test data
df = pd.read_csv("Test_Dataset.csv", encoding='cp1252')
df = df[['text', 'label']].dropna()
df['label'] = df['label'].str.lower().str.strip()
label_map = {'fraud': 1, 'normal': 0}
df['label_id'] = df['label'].map(label_map)

# Load tokenizer and base model (Gemma 2B)
base_model_path = r"C:\Users\HAN4COB\.conda\envs\test\AI_Enabled_Scam_Call_Detection\gemma-2b"
lora_adapter_path = r"C:\Users\HAN4COB\.conda\envs\test\AI_Enabled_Scam_Call_Detection\gemma2b-finetuned\kaggle\working\gemma2b-finetuned\checkpoint-2250"

tokenizer = AutoTokenizer.from_pretrained(base_model_path, local_files_only=True)
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_path,
    torch_dtype=torch.float32,
    device_map={"": "cpu"},
    local_files_only=True
)
model = PeftModel.from_pretrained(base_model, lora_adapter_path, device_map={"": "cpu"}, local_files_only=True)
model.eval()

# Classify function
def classify(text):
    prompt = f"Classify this call:\n{text}\nLabel:"
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to("cpu")

    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=5,
            do_sample=False,
            pad_token_id=tokenizer.eos_token_id
        )
    decoded = tokenizer.decode(output[0], skip_special_tokens=True)
    label = decoded.split("Label:")[-1].strip().lower()

    # Mapping label text to binary
    fraud_keywords = ["fraud", "urgent", "loan", "kyc", "promotion", "limited", "alert", "job", "click", "investment"]
    normal_keywords = ["normal", "chat", "call", "personal", "fun", "home", "invitation", "friend", "plan"]

    if any(k in label for k in fraud_keywords):
        return 1, label
    elif any(k in label for k in normal_keywords):
        return 0, label
    else:
        return None, label

# Inference
preds, labels, texts, raw_preds = [], [], [], []
start_time = time.time()

for _, row in df.iterrows():
    pred, raw = classify(row['text'])
    raw_preds.append(raw)
    if pred is not None:
        preds.append(pred)
        labels.append(row['label_id'])
        texts.append(row['text'])

total_time = time.time() - start_time
avg_time = total_time / len(df)

# Metrics
if preds:
    acc = accuracy_score(labels, preds)
    prec = precision_score(labels, preds, zero_division=0)
    rec = recall_score(labels, preds, zero_division=0)
    f1 = f1_score(labels, preds, zero_division=0)
else:
    acc = prec = rec = f1 = 0.0

# Print detailed results
for i, (t, p, r, raw) in enumerate(zip(texts, labels, preds, raw_preds[:len(preds)])):
    result = "✅" if p == r else "❌"
    print(f"[{i+1:03}] {result} True: {'fraud' if r else 'normal'} | Pred: {'fraud' if p else 'normal'} | Raw: {raw}\n     Text: {t[:100]}...\n")

# Final metrics
print("\n✅ Overall Evaluation Metrics (on", len(preds), "samples):")
print(f"Accuracy : {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall   : {rec:.4f}")
print(f"F1 Score : {f1:.4f}")
print(f"\n⏱️ Total Inference Time   : {total_time:.2f} sec")
print(f"⏱️ Avg Time per Sample     : {avg_time:.2f} sec")


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


[001] ✅ True: fraud | Pred: fraud | Raw: urgent
share: share
     Text: Nani, it’s your favorite grandson. I’m stranded in Goa with no money. Send Rs. 3,000 at http://trave...

[002] ✅ True: fraud | Pred: fraud | Raw: urgent
share: share
     Text: Uncle, it’s Aman. I’m in deep trouble in Ahmedabad. Need Rs. 9,000 to escape danger. Click http://sa...

[003] ✅ True: fraud | Pred: fraud | Raw: urgent
call type:
     Text: Mummy, I’ve been in a bike crash in Bangalore. I’m bleeding. Need Rs. 8,000 now. Click http://urgent...

[004] ✅ True: fraud | Pred: fraud | Raw: urgent
call type:
     Text: Dad, I’ve been in a bike accident in Gurgaon. Need Rs. 6,500 for hospital bills. Click http://gurgao...

[005] ✅ True: fraud | Pred: fraud | Raw: urgent! your parcel is
     Text: Hello madam, this is from the courier company. Your package is stuck due to unpaid customs duty. Ple...

[006] ✅ True: fraud | Pred: fraud | Raw: limited time offer
call
     Text: Congratulations! You've been chosen as o