In [None]:
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
%pip install transformers
%pip install nltk
%pip install scikit-learn
%pip install pandas

import re
import pandas as pd
import numpy as np

import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

from sklearn.metrics import classification_report

from transformers import (
    AutoTokenizer,
    AutoModelForTokenClassification,
    AutoModelForSequenceClassification,
    pipeline
)

# Download required NLTK data
nltk.download('vader_lexicon')

print("✅ Dependencies loaded.\n")


sent_topic_df = pd.read_csv('sentiment-topic-test.tsv', sep='\t', header=0)
print("=== sentiment-topic-test.tsv (Test set) ===")
print(f"Rows: {len(sent_topic_df)}, Columns: {sent_topic_df.columns.tolist()}\n")
print(sent_topic_df.head(3).to_string(index=False), "\n")

token_df = pd.read_csv('NER-test.tsv', sep='\t', header=0)
print("=== NER-test.tsv (Token-level Test set) ===")
print(f"Rows: {len(token_df)}, Columns: {token_df.columns.tolist()}\n")
print(token_df.head(3).to_string(index=False), "\n")

sentence_records = []
for sid, group in token_df.groupby('sentence_id'):
    group = group.sort_values('token_id')
    tokens = group['token'].tolist()
    full_sentence = " ".join(tokens)
    sentence_records.append({'sentence_id': sid, 'sentence': full_sentence})

ner_sent_df = pd.DataFrame(sentence_records)
print("=== Reconstructed NER Sentences (first 5) ===")
print(ner_sent_df.head(5).to_string(index=False), "\n")


model_checkpoint = "dslim/bert-base-NER"
tokenizer_ner = AutoTokenizer.from_pretrained(model_checkpoint)
model_ner = AutoModelForTokenClassification.from_pretrained(model_checkpoint)

ner_pipeline = pipeline(
    "ner",
    model=model_ner,
    tokenizer=tokenizer_ner,
    grouped_entities=True
)

def preprocess_text(text):
    if not isinstance(text, str):
        return ""
    text = re.sub(r'\[?\(?\d{2}:\d{2}:\d{2}\)?\]?', '', text)
    text = re.sub(r'^[A-Z ]{2,20}:', '', text, flags=re.MULTILINE)
    text = re.sub(r'\n+', ' ', text)
    text = re.sub(r'\s{2,}', ' ', text)
    return text.strip()

ner_sent_df['text_clean'] = ner_sent_df['sentence'].apply(preprocess_text)

def extract_entities(text):
    try:
        results = ner_pipeline(text)
        return [(ent['word'], ent['entity_group']) for ent in results]
    except:
        return []

ner_sent_df['predicted_entities'] = ner_sent_df['text_clean'].apply(extract_entities)
print("=== NER Predictions (first 5) ===")
print(ner_sent_df[['sentence_id', 'predicted_entities']].head(5).to_string(index=False), "\n")


# System A: VADER baseline
sia = SentimentIntensityAnalyzer()
def vader_baseline(text):
    scores = sia.polarity_scores(text)
    c = scores['compound']
    if c >= 0.05:
        return 'positive'
    elif c <= -0.05:
        return 'negative'
    else:
        return 'neutral'

sent_topic_df['vader_sentiment'] = sent_topic_df['sentence'].apply(vader_baseline)


# System B: Hybrid zero‐shot (“negative”/“neutral”/“positive”) + keyword fallback

# Load MNLI‐based zero‐shot classifier
zs_tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-mnli")
zs_model = AutoModelForSequenceClassification.from_pretrained("facebook/bart-large-mnli")
zs_classifier = pipeline("zero-shot-classification", model=zs_model, tokenizer=zs_tokenizer)

# Define small domain‐specific sentiment keywords
positive_keywords = {
    "win", "incredible", "unbelievable", "favourite", "love", "enjoyed", "best", "liked", "amazing", "excited"
}
negative_keywords = {
    "dragged", "confusing", "fell", "apart", "disappointing", "disappointed", "hate", "bored", "worse", "boring"
}

def hybrid_sentiment(text, zero_shot_threshold=0.7):
    # 1) Zero-shot entailment step
    out = zs_classifier(text, candidate_labels=["negative","neutral","positive"])
    top_label = out["labels"][0]
    top_score = out["scores"][0]
    if top_score >= zero_shot_threshold:
        return top_label

    # 2) Keyword fallback if MNLI is uncertain
    tokens = {w.strip('.,!?;:"\'').lower() for w in text.split()}
    if tokens & positive_keywords:
        return "positive"
    if tokens & negative_keywords:
        return "negative"
    return "neutral"

sent_topic_df['zs_sentiment'] = sent_topic_df['sentence'].apply(hybrid_sentiment)

print("=== Sentiment: Gold vs. VADER vs. Hybrid Zero-Shot+KWD (first 5) ===")
print(sent_topic_df[['sentence','sentiment','vader_sentiment','zs_sentiment']]
      .head(5).to_string(index=False), "\n")

print("=== VADER Sentiment Classification Report (Test) ===")
print(classification_report(
    sent_topic_df['sentiment'],
    sent_topic_df['vader_sentiment'],
    labels=['negative','neutral','positive'],
    target_names=['negative','neutral','positive']
), "\n")

print("=== Hybrid Zero-Shot+KWD Sentiment Classification Report (Test) ===")
print(classification_report(
    sent_topic_df['sentiment'],
    sent_topic_df['zs_sentiment'],
    labels=['negative','neutral','positive'],
    target_names=['negative','neutral','positive']
), "\n")

# Show a few error‐analysis examples
mismatch_df = sent_topic_df[sent_topic_df['vader_sentiment'] != sent_topic_df['zs_sentiment']]
print("=== Examples where VADER vs. Hybrid differ (first 5) ===")
print(mismatch_df[['sentence','sentiment','vader_sentiment','zs_sentiment']].head(5).to_string(index=False), "\n")


candidate_labels = ["sports", "book", "movie"]

topic_keywords_fallback = {
    "sports": {"sport","game","goal","team","score","coach","match","stadium","league","win"},
    "book":   {"book","story","author","chapter","novel","read","fiction","page","literature","publish"},
    "movie":  {"movie","film","actor","actress","director","scene","watch","cinema","screen","dvd"}
}

def hybrid_topic_classifier(sentence: str, threshold: float = 0.5) -> str:
    out = zs_classifier(sentence, candidate_labels)
    top_label = out['labels'][0]
    top_score = out['scores'][0]
    if top_score >= threshold:
        return top_label
    tokens = {w.strip('.,!?;:"\'').lower() for w in sentence.split()}
    scores = {lbl: len(tokens & kws) for lbl, kws in topic_keywords_fallback.items()}
    best_lbl = max(scores, key=scores.get)
    return best_lbl if scores[best_lbl] > 0 else top_label

sent_topic_df['pred_topic'] = sent_topic_df['sentence'].apply(hybrid_topic_classifier)

print("=== Topic (Gold vs. Hybrid) on Test Set (first 5) ===")
print(sent_topic_df[['sentence','topic','pred_topic']].head(5).to_string(index=False), "\n")

print("=== Topic Classification Report (Hybrid) ===")
print(classification_report(
    sent_topic_df['topic'],
    sent_topic_df['pred_topic'],
    labels=['sports','book','movie'],
    target_names=['sports','book','movie']
), "\n")


sent_topic_df.to_csv("sentiment_topic_test_predictions.csv", index=False)
ner_sent_df.to_csv("ner_test_predictions.csv", index=False)

print("All predictions saved to CSV. ✅")




Looking in indexes: https://download.pytorch.org/whl/cpu
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
✅ Dependencies loaded.

=== sentiment-topic-test.tsv (Test set) ===
Rows: 18, Columns: ['sentence_id', 'sentence', 'sentiment', 'topic']

 sentence_id                                                                         sentence sentiment  topic
           0      The stadium was alive with the roar of the crowd after that incredible win.  positive sports
           1 That last-minute goal had me jumping out of my seat—what an unbelievable finish!  positive sports
           2                I couldn’t put the book down; it swept me into a whole new world.  positive   book 

=== NER-test.tsv (Token-level

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/rayanelmourabit/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use mps:0


=== NER Predictions (first 5) ===
 sentence_id                                                     predicted_entities
           0            [(Paris, LOC), (Lou, ORG), (##vre, LOC), (Mona Lisa, MISC)]
           1                            [(Amazon, ORG), (Google, ORG), (Meta, ORG)]
           2 [(Ph, PER), (##aro, PER), (##ah Sanders, PER), (Floating Points, ORG)]
           3                                                                     []
           4    [(Kevin, PER), (Succession, MISC), (Ki, PER), (##eran Culkin, PER)] 



Device set to use mps:0


=== Sentiment: Gold vs. VADER vs. Hybrid Zero-Shot+KWD (first 5) ===
                                                                             sentence sentiment vader_sentiment zs_sentiment
          The stadium was alive with the roar of the crowd after that incredible win.  positive        positive     positive
     That last-minute goal had me jumping out of my seat—what an unbelievable finish!  positive        positive     positive
                    I couldn’t put the book down; it swept me into a whole new world.  positive         neutral     positive
        The story had its moments, though some parts felt like they dragged on a bit.   neutral        positive     negative
I enjoyed the way the timelines shifted, even if it got a little confusing sometimes.   neutral        positive     positive 

=== VADER Sentiment Classification Report (Test) ===
              precision    recall  f1-score   support

    negative       1.00      0.17      0.29         6
     neutral     

In [5]:
from sklearn.metrics import classification_report
print(classification_report(sent_topic_df['sentiment'], sent_topic_df['predicted_sentiment']))


              precision    recall  f1-score   support

    negative       1.00      0.17      0.29         6
     neutral       0.20      0.17      0.18         6
    positive       0.25      0.50      0.33         6

    accuracy                           0.28        18
   macro avg       0.48      0.28      0.27        18
weighted avg       0.48      0.28      0.27        18

