In [None]:
!pip install textblob --quiet

import pandas as pd
from textblob import TextBlob

# -----------------------------
# STEP 1: Load dataset
# -----------------------------
df = pd.read_csv("/content/labelled.csv")   # contains ["Heading","Body","Category","URL"]

# Use Heading + Body as text input
df["text"] = df["Heading"].astype(str) + " " + df["Body"].astype(str)

# -----------------------------
# STEP 2: Sentiment using TextBlob
# -----------------------------
def get_sentiment(text):
    analysis = TextBlob(str(text))
    polarity = analysis.sentiment.polarity
    if polarity > 0:
        return "Positive"
    elif polarity < 0:
        return "Negative"
    else:
        return "Neutral"

df["Sentiment"] = df["text"].apply(get_sentiment)

# -----------------------------
# STEP 3: Save CSV
# -----------------------------
output_file = "labelled_with_textblob_sentiment.csv"
df.to_csv(output_file, index=False)

print(f"✅ Final CSV saved as {output_file}")
print(df[["Heading", "Sentiment"]].head())


✅ Final CSV saved as labelled_with_textblob_sentiment.csv
                                             Heading Sentiment
0  free speech not hate speech madras high court ...  Positive
1  comment take context say us cop mock indian st...  Negative
2  first meeting one nation one election committe...  Positive
3  us airlines flight depressurize midair plummet...  Positive
4  terrorist kill security force foil infiltratio...  Positive
✅ Final CSV saved as labelled_with_textblob_sentiment.csv
                                             Heading Sentiment
0  free speech not hate speech madras high court ...  Positive
1  comment take context say us cop mock indian st...  Negative
2  first meeting one nation one election committe...  Positive
3  us airlines flight depressurize midair plummet...  Positive
4  terrorist kill security force foil infiltratio...  Positive


ROberta

In [None]:
!pip install transformers torch scipy --quiet

import pandas as pd
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax
import urllib.request, csv



In [None]:
# -----------------------------
# STEP 1: Load your dataset
# -----------------------------
df = pd.read_csv("/content/labelled.csv")   # should have Heading, Body, Category, URL

# Use Heading + Body as text input
df["text"] = df["Heading"].astype(str) + " " + df["Body"].astype(str)



In [None]:
# -----------------------------
# STEP 2: Load RoBERTa sentiment model
# -----------------------------
task = "sentiment"
MODEL = f"cardiffnlp/twitter-roberta-base-{task}"

# Load tokenizer & model
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)

# Download label mapping
labels = []
mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/{task}/mapping.txt"
with urllib.request.urlopen(mapping_link) as f:
    html = f.read().decode("utf-8").split("\n")
    csvreader = csv.reader(html, delimiter="\t")
    labels = [row[1] for row in csvreader if len(row) > 1]



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
# -----------------------------
# STEP 3: Define sentiment function
# -----------------------------
def sentiment(text):
    if pd.isna(text):  # Check for NaN or None
        return "Neutral" # Or any other default sentiment for missing values

    text = text[:1500]   # truncate manually (optional safeguard)
    encoded_input = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        padding="max_length",
        max_length=512   # ✅ explicitly set max_length
    )
    with torch.no_grad():
        output = model(**encoded_input)
    scores = softmax(output.logits[0].cpu().numpy())
    label_id = np.argmax(scores)
    return labels[label_id]




In [None]:
# -----------------------------
# STEP 4: Apply sentiment analysis
# -----------------------------
df["Sentiment"] = df["text"].apply(lambda x: sentiment(str(x)))



In [None]:
df.to_csv("labelled_with_sentiment.csv", index=False)
print("Saved labelled_with_sentiment.csv successfully")
