In [None]:
# ==============================================================================
# CELL: GET PER-CLASS REPORT FOR WINNING MODEL
# ==============================================================================
import warnings
warnings.filterwarnings("ignore")

# --- Step 1: Install Libraries and Import ---
print("Installing necessary libraries...")
!pip install transformers datasets scikit-learn pandas openpyxl --quiet

import pandas as pd
import numpy as np
from google.colab import drive
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, f1_score
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
import torch
import gc
import re

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# --- Step 2: Mount Google Drive ---
print("\nMounting Google Drive...")
drive.mount('/content/drive', force_remount=True)

# --- Step 3: Define Helper Functions ---
def load_data_from_drive(filename="Copy of behaviour_simulation_train.xlsx"):
    file_path = f'/content/drive/MyDrive/{filename}'
    try:
        print(f"\nAttempting to load full training data from: {file_path}")
        df = pd.read_excel(file_path)
        print(f"Data loaded successfully! Shape: {df.shape}")
        df.rename(columns={'dates': 'date', 'inferred company': 'company'}, inplace=True)
        df['date'] = pd.to_datetime(df['date'])
        return df
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

def create_buckets(likes):
    if likes <= 100: return 0
    elif likes <= 1000: return 1
    elif likes <= 10000: return 2
    else: return 3

def format_input_text(row):
    tweet_text = str(row['content']).strip()
    company = str(row['company']).strip()
    hour = row['date'].hour
    day = row['date'].day_name()
    has_media = "yes" if pd.notna(row['media']) else "no"
    input_str = f"Brand: {company} | Day: {day} | Hour: {hour} | Media: {has_media} | Tweet: {tweet_text}"
    return input_str

# --- Step 4: Re-create the 60,000-sample Validation Set ---
train_df = load_data_from_drive()
print("Applying transformations to the full dataset...")
train_df['label'] = train_df['likes'].apply(create_buckets)
train_df['text'] = train_df.apply(format_input_text, axis=1)

print("Re-creating the identical 80/20 train/validation split...")
df_train, df_val = train_test_split(
    train_df[['text', 'label']],
    test_size=0.2,
    random_state=42, # This ensures we get the *same* 60,000 samples
    stratify=train_df['label']
)
print(f"Validation set (`df_val`) created with {len(df_val)} samples.")

# --- Step 5: Load Your Winning Model and Tokenizer ---
MODEL_PATH = "/content/drive/MyDrive/my_best_ROBERTA_model3"
print(f"Loading winning model and tokenizer from {MODEL_PATH}...")

try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
    model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH).to(device)
    print("Model loaded successfully.")
except Exception as e:
    print(f"*** ERROR LOADING MODEL: {e} ***")
    print("Please check the path and folder contents.")

# --- Step 6: Tokenize the Validation Set ---
def tokenize(batch):
    return tokenizer(
        batch['text'],
        padding="max_length",
        truncation=True,
        max_length=256
    )

print("Tokenizing the 60,000-sample validation set...")
val_dataset = Dataset.from_pandas(df_val).map(tokenize, batched=True)
val_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])
print("Validation set is tokenized and ready.")

# --- Step 7: Run Evaluation and Print Per-Class Report ---
if 'model' in locals():
    print("\n--- Running Final Evaluation on 'my_best_ROBERTA_model3' ---")

    # Create a simple Trainer object just to run the evaluation
    trainer = Trainer(model=model)

    # Run the prediction
    predictions = trainer.predict(val_dataset)
    predicted_labels = np.argmax(predictions.predictions, axis=1)
    true_labels = predictions.label_ids

    # --- This is the Per-Class Report You Asked For ---
    print("\n--- Detailed Per-Class Classification Report ---")
    target_names = ["0: Low (0-100)", "1: Medium (101-1k)", "2: High (1k-10k)", "3: Viral (10k+)"]
    print(classification_report(true_labels, predicted_labels, target_names=target_names))
else:
    print("\nModel not loaded. Skipping evaluation.")

Installing necessary libraries...
Using device: cuda

Mounting Google Drive...
Mounted at /content/drive

Attempting to load full training data from: /content/drive/MyDrive/Copy of behaviour_simulation_train.xlsx
Data loaded successfully! Shape: (300000, 7)
Applying transformations to the full dataset...
Re-creating the identical 80/20 train/validation split...
Validation set (`df_val`) created with 60000 samples.
Loading winning model and tokenizer from /content/drive/MyDrive/my_best_ROBERTA_model3...
Model loaded successfully.
Tokenizing the 60,000-sample validation set...


Map:   0%|          | 0/60000 [00:00<?, ? examples/s]

Validation set is tokenized and ready.

--- Running Final Evaluation on 'my_best_ROBERTA_model3' ---


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mmc240041012[0m ([33mmc240041012-iit-indore[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin



--- Detailed Per-Class Classification Report ---
                    precision    recall  f1-score   support

    0: Low (0-100)       0.90      0.88      0.89     32913
1: Medium (101-1k)       0.71      0.72      0.71     19632
  2: High (1k-10k)       0.57      0.62      0.60      6707
   3: Viral (10k+)       0.63      0.37      0.46       748

          accuracy                           0.79     60000
         macro avg       0.70      0.65      0.67     60000
      weighted avg       0.80      0.79      0.79     60000

