In [None]:
!pip install -q transformers datasets peft accelerate

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.2/491.2 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.1/76.1 MB[0m [31m32.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m183.9/183.9 kB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m125.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━

In [None]:
!pip install bitsandbytes



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"

# Step 1: Mount Drive & install deps
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
[0m

In [None]:
# ─── Cell: Emotion Analysis on YouTube Comments ─────────────────────────────

# (If you haven't already) Mount your Drive
from google.colab import drive
drive.mount('/content/drive')

# Step 1: Imports & HF login
import re
import pandas as pd
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    pipeline,
    BitsAndBytesConfig
)
from huggingface_hub import login

login()  # paste your HF_TOKEN

# Step 2: Paths (adjust if yours differ)
cached_mistral_path = "/content/drive/MyDrive/Emotion_Recognition/cached_models/mistral"
cached_llama_path   = "/content/drive/MyDrive/Emotion_Recognition/cached_models/llama3"
data_path           = "/content/drive/MyDrive/Emotion_Recognition/data/youtube_comments.csv"
output_path         = "/content/drive/MyDrive/Emotion_Recognition/results/youtube_comments_emotions.csv"

# Step 3: Quantization config
bnb_cfg = BitsAndBytesConfig(
    load_in_8bit=True,
    llm_int8_threshold=6.0
)

# Step 4: Load Mistral 7B from Drive (8‑bit + auto offload)
tokenizer_mistral = AutoTokenizer.from_pretrained(
    cached_mistral_path, local_files_only=True
)
model_mistral = AutoModelForCausalLM.from_pretrained(
    cached_mistral_path,
    quantization_config=bnb_cfg,
    device_map="auto",
    # local_files_only=True
)
gen_mistral = pipeline(
    "text-generation",
    model=model_mistral,
    tokenizer=tokenizer_mistral,
    device_map="auto",
    torch_dtype=torch.float16
)

# Step 5: Load LLaMA 3 from Drive (8‑bit + auto offload)
tokenizer_llama = AutoTokenizer.from_pretrained(
    cached_llama_path, local_files_only=True
)
model_llama = AutoModelForCausalLM.from_pretrained(
    cached_llama_path,
    quantization_config=bnb_cfg,
    device_map="auto",
    # local_files_only=True
)
gen_llama = pipeline(
    "text-generation",
    model=model_llama,
    tokenizer=tokenizer_llama,
    device_map="auto",
    torch_dtype=torch.float16
)

# Step 6: Load & clean your YouTube comments
df = pd.read_csv(data_path)
# Take only the first 1000 comments
df = df.head(1000)

def clean_text(s: str) -> str:
    # Remove URLs only
    s = re.sub(r'https?://\S+|www\.\S+', '', str(s))
    return s.strip()

df["clean_comment"] = df["comment_text"].apply(clean_text)

# Step 7: Build emotion‐classification prompts
def build_prompts(texts):
    return [
        (
            "Task: Classify the emotion in the following text into one of these categories: "
            "sadness, joy, love, anger, fear, surprise.\n\n"
            f"Text: {t}\n\nEmotion:"
        )
        for t in texts
    ]
prompts = build_prompts(df["clean_comment"].tolist())

# Step 8: Batched inference with error handling
def get_preds(gen, prompts, batch_size=8):
    out = []
    for i in range(0, len(prompts), batch_size):
        batch = prompts[i : i + batch_size]
        res = gen(batch, max_new_tokens=10, do_sample=False)
        for r in res:
            txt = r[0]["generated_text"]
            if "Emotion:" in txt:
                emo = txt.split("Emotion:")[-1].strip().split("\n")[0].lower()
            else:
                emo = "unknown"
            out.append(emo)
    return out

# Step 9: Run inference
df["mistral_emotion"] = get_preds(gen_mistral, prompts, batch_size=16)
df["llama_emotion"]   = get_preds(gen_llama,   prompts, batch_size=16)

# Step 10: Save results
df.to_csv(output_path, index=False)
print(f"Done! Results saved to:\n  {output_path}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Device set to use cuda:0


config.json:   0%|          | 0.00/855 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

Device set to use cuda:0
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Done! Results saved to:
  /content/drive/MyDrive/Emotion_Recognition/results/youtube_comments_emotions.csv


In [None]:
df.to_csv("inference_output.csv")

In [None]:
import pandas as pd
from statsmodels.stats.contingency_tables import mcnemar
from sklearn.metrics import accuracy_score
import numpy as np
inference_results=pd.read_csv("/content/drive/MyDrive/Emotion_Recognition/results/youtube_comments_emotions.csv")

In [None]:
inference_results.head()

Unnamed: 0,comment_text,clean_comment,mistral_emotion,llama_emotion
0,All products can be found on https://www.justi...,All products can be found on 🙌🏻😁,joy,joy
1,I would like to get that couch console.,I would like to get that couch console.,joy,joy
2,But it's really expensive.,But it's really expensive.,joy,anger
3,Link to products doesn't work,Link to products doesn't work,anger,anger
4,Uueyiduisu😊,Uueyiduisu😊,joy,joy


In [None]:
inference_results.tail()

Unnamed: 0,comment_text,clean_comment,mistral_emotion,llama_emotion
995,Did anyone see the shrek,Did anyone see the shrek,fear,anger
996,wooow,wooow,joy,joy
997,"Unboxing collab, does anyone interested?","Unboxing collab, does anyone interested?",joy,joy
998,YOU PUT PLASTIC IN THE AIRFRIER,YOU PUT PLASTIC IN THE AIRFRIER,anger,anger
999,All of these things aren't.. needed tho..,All of these things aren't.. needed tho..,sadness,sadness


In [None]:
# Compare predictions: 1 if same, 0 if different
inference_results["same_prediction"] = inference_results["mistral_emotion"] == inference_results["llama_emotion"]

In [None]:
# Create binary values for correct/incorrect relative to each other
# We'll assume Mistral as baseline and see when LLaMA disagrees (and vice versa)
disagree_mistral_correct = ((inference_results["mistral_emotion"] == inference_results["llama_emotion"]) == False) & (inference_results["mistral_emotion"] == inference_results["llama_emotion"])
disagree_llama_correct = ((inference_results["mistral_emotion"] == inference_results["llama_emotion"]) == False) & (inference_results["llama_emotion"] == inference_results["llama_emotion"])

In [None]:
# Construct 2x2 contingency table
# a: both correct (not applicable here as we don't have ground truth)
# b: Mistral correct, LLaMA wrong - we treat this as Mistral and LLaMA disagree (favoring Mistral)
# c: LLaMA correct, Mistral wrong - we treat this as Mistral and LLaMA disagree (favoring LLaMA)
# d: both wrong (not applicable here either)

In [None]:
# For McNemar's test, we just need counts of disagreements:
# b = Mistral ≠ LLaMA and LLaMA predicted wrong
# c = Mistral ≠ LLaMA and Mistral predicted wrong

In [None]:
# In our setup, since there's no ground truth, we'll treat:
# b = Mistral != LLaMA (Mistral is base)
# c = Mistral != LLaMA (LLaMA is base)
b = sum((inference_results["mistral_emotion"] != inference_results["llama_emotion"]) & (inference_results["mistral_emotion"] == inference_results["mistral_emotion"]))
c = sum((inference_results["mistral_emotion"] != inference_results["llama_emotion"]) & (inference_results["llama_emotion"] == inference_results["llama_emotion"]))

In [None]:
# Create contingency table
table = [[0, b],
         [c, 0]]

In [None]:
# Perform McNemar’s test (without continuity correction for small sample sizes)
result = mcnemar(table, exact=False, correction=True)

In [None]:
table, result.statistic, result.pvalue

([[0, 497], [497, 0]],
 np.float64(0.001006036217303823),
 np.float64(0.9746968811825852))