<a href="https://colab.research.google.com/github/janz15/mcq/blob/master/Sentiment_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title Install dependencies (one-time per session)
!pip -q install --upgrade pip
!pip -q install transformers>=4.45.0 datasets>=2.18.0 accelerate>=0.30 gradio>=4.19 scikit-learn>=1.4 matplotlib>=3.8

# Optional: torch install
# Colab usually already ships a compatible torch; this just guarantees availability.
# CPU-only:
!pip -q install torch --index-url https://download.pytorch.org/whl/cpu

# If you enabled GPU runtime and want CUDA builds (Colab default often has them):
# !pip -q install torch torchvision torchaudio

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.8 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.8/1.8 MB[0m [31m54.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m38.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [8]:
#@title Quick inference pipeline
from transformers import pipeline

sentiment = pipeline("sentiment-analysis")  # defaults to a small DistilBERT-based classifier
texts = [
    "I love how fast this app is!",
    "This is the worst service I’ve ever used.",
    "It’s okay, nothing special."
]

for t in texts:
    print(f"{t:50s} -> {sentiment(t)[0]}")

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f.
Using a pipeline without specifying a model name and revision in production is not recommended.


Loading weights:   0%|          | 0/104 [00:00<?, ?it/s]

I love how fast this app is!                       -> {'label': 'POSITIVE', 'score': 0.9996739625930786}
This is the worst service I’ve ever used.          -> {'label': 'NEGATIVE', 'score': 0.9997960925102234}
It’s okay, nothing special.                        -> {'label': 'NEGATIVE', 'score': 0.8472709655761719}


In [9]:
#@title Helper function
def analyze_text(text: str):
    text = (text or "").strip()
    if not text:
        return {"label": "NEUTRAL", "score": 0.0}
    out = sentiment(text)[0]
    return {"label": out["label"], "score": float(out["score"])}

print(analyze_text("Absolutely fantastic experience!"))
print(analyze_text("Terrible quality and very disappointing."))

{'label': 'POSITIVE', 'score': 0.9998812675476074}
{'label': 'NEGATIVE', 'score': 0.9997851252555847}


In [10]:
#@title Load dataset and evaluate the pretrained pipeline
from datasets import load_dataset
from sklearn.metrics import accuracy_score, classification_report
import numpy as np

# SST2: binary sentiment (positive/negative)
dataset = load_dataset("glue", "sst2")
test_ds = dataset["validation"]  # 872 examples

# Map labels to strings to align with pipeline outputs
id2label = {0: "NEGATIVE", 1: "POSITIVE"}

preds = []
golds = []

for ex in test_ds:
    r = analyze_text(ex["sentence"])
    preds.append(r["label"])
    golds.append(id2label[ex["label"]])

print("Accuracy:", accuracy_score(golds, preds))
print(classification_report(golds, preds, digits=3))


README.md: 0.00B [00:00, ?B/s]

sst2/train-00000-of-00001.parquet:   0%|          | 0.00/3.11M [00:00<?, ?B/s]

sst2/validation-00000-of-00001.parquet:   0%|          | 0.00/72.8k [00:00<?, ?B/s]

sst2/test-00000-of-00001.parquet:   0%|          | 0.00/148k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/67349 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/872 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1821 [00:00<?, ? examples/s]

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Accuracy: 0.9105504587155964
              precision    recall  f1-score   support

    NEGATIVE      0.925     0.890     0.907       428
    POSITIVE      0.898     0.930     0.914       444

    accuracy                          0.911       872
   macro avg      0.911     0.910     0.910       872
weighted avg      0.911     0.911     0.910       872



In [11]:
#@title Prepare data for fine-tuning
from transformers import AutoTokenizer
from datasets import load_dataset, DatasetDict
from sklearn.model_selection import train_test_split

base_model = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(base_model)

raw = load_dataset("glue", "sst2")
train_full = raw["train"]

# Use a small subset to keep training snappy in Colab
small_size = 5000  # adjust down if you want faster; SST2 train is ~67k
train_small = train_full.select(range(min(small_size, len(train_full))))

# Split into train/validation
train_texts, val_texts = train_test_split(
    list(range(len(train_small))),
    test_size=0.1,
    random_state=42,
    stratify=[x["label"] for x in train_small]
)
train_ds = train_small.select(train_texts)
val_ds = train_small.select(val_texts)

def tokenize(batch):
    return tokenizer(batch["sentence"], truncation=True, padding=False)

train_tok = train_ds.map(tokenize, batched=True)
val_tok = val_ds.map(tokenize, batched=True)

train_tok = train_tok.rename_column("label", "labels")
val_tok = val_tok.rename_column("label", "labels")

train_tok.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
val_tok.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

DatasetDict({"train": train_tok, "validation": val_tok})

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/4500 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['sentence', 'labels', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 4500
    })
    validation: Dataset({
        features: ['sentence', 'labels', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 500
    })
})

In [15]:
#@title Fine-tune the classifier (quick)
import torch
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer, DataCollatorWithPadding
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import numpy as np

num_labels = 2
model = AutoModelForSequenceClassification.from_pretrained(base_model, num_labels=num_labels)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1);
    return {
        "accuracy": accuracy_score(labels, preds),
        "precision": precision_score(labels, preds),
        "recall": recall_score(labels, preds),
        "f1": f1_score(labels, preds),
    }

args = TrainingArguments(
    output_dir="./sentiment-model",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    num_train_epochs=2,              # small for demo; bump to 3–5 for better results
    weight_decay=0.01,
    logging_steps=50,
    load_best_model_at_end=True,
    fp16=torch.cuda.is_available(),  # use mixed precision on GPU
    report_to="none"
)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_tok,
    eval_dataset=val_tok,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

trainer.train()

Loading weights:   0%|          | 0/100 [00:00<?, ?it/s]

DistilBertForSequenceClassification LOAD REPORT from: distilbert-base-uncased
Key                     | Status     | 
------------------------+------------+-
vocab_transform.weight  | UNEXPECTED | 
vocab_layer_norm.weight | UNEXPECTED | 
vocab_transform.bias    | UNEXPECTED | 
vocab_projector.bias    | UNEXPECTED | 
vocab_layer_norm.bias   | UNEXPECTED | 
classifier.weight       | MISSING    | 
pre_classifier.weight   | MISSING    | 
classifier.bias         | MISSING    | 
pre_classifier.bias     | MISSING    | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING	:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.340164,0.29029,0.892,0.914179,0.887681,0.900735
2,0.214545,0.340723,0.898,0.921348,0.891304,0.906077


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['distilbert.embeddings.LayerNorm.weight', 'distilbert.embeddings.LayerNorm.bias'].
There were unexpected keys in the checkpoint model loaded: ['distilbert.embeddings.LayerNorm.beta', 'distilbert.embeddings.LayerNorm.gamma'].


TrainOutput(global_step=564, training_loss=0.29427223636748945, metrics={'train_runtime': 33.4645, 'train_samples_per_second': 268.942, 'train_steps_per_second': 16.854, 'total_flos': 80713006942032.0, 'train_loss': 0.29427223636748945, 'epoch': 2.0})

In [16]:
#@title Evaluate the fine-tuned model on SST2 validation
from datasets import load_dataset
from transformers import TextClassificationPipeline

pipe_ft = TextClassificationPipeline(
    model=trainer.model,
    tokenizer=tokenizer,
    return_all_scores=False,
    function_to_apply="softmax",
    device=0 if torch.cuda.is_available() else -1
)

raw_val = val_ds  # our held-out subset
preds = []
golds = []

for ex in raw_val:
    lab = "POSITIVE" if ex["label"] == 1 else "NEGATIVE"
    r = pipe_ft(ex["sentence"])[0]["label"]
    # HF returns "LABEL_0"/"LABEL_1", map to text:
    r = "NEGATIVE" if r in ("LABEL_0", 0) else "POSITIVE"
    preds.append(r)
    golds.append(lab)

print("Accuracy:", accuracy_score(golds, preds))
print(classification_report(golds, preds, digits=3))

Accuracy: 0.892
              precision    recall  f1-score   support

    NEGATIVE      0.866     0.897     0.882       224
    POSITIVE      0.914     0.888     0.901       276

    accuracy                          0.892       500
   macro avg      0.890     0.893     0.891       500
weighted avg      0.893     0.892     0.892       500



In [17]:
#@title Toggle between pretrained vs fine-tuned model
USE_FINETUNED = True  # change to False to use the pretrained pipeline

if USE_FINETUNED:
    from transformers import TextClassificationPipeline
    import torch
    sentiment_fn = TextClassificationPipeline(
        model=trainer.model,
        tokenizer=tokenizer,
        return_all_scores=False,
        function_to_apply="softmax",
        device=0 if torch.cuda.is_available() else -1
    )
    def infer(text):
        if not text.strip():
            return {"label":"NEUTRAL","score":0.0}
        out = sentiment_fn(text)[0]
        # Map to POSITIVE/NEGATIVE strings
        label = "NEGATIVE" if out["label"] in ("LABEL_0", 0) else "POSITIVE"
        return {"label": label, "score": float(out["score"])}
else:
    # fall back to the earlier pipeline
    infer = analyze_text

print(infer("I absolutely love this!"))
print(infer("What a terrible experience."))

{'label': 'POSITIVE', 'score': 0.9521309733390808}
{'label': 'NEGATIVE', 'score': 0.9314625263214111}


In [18]:
#@title Launch a web UI (Gradio) for interactive demo
import gradio as gr

def predict_ui(text):
    r = infer(text)
    return f"{r['label']} (confidence: {r['score']:.3f})"

with gr.Blocks(title="Sentiment Mini") as demo:
    gr.Markdown("## Sentiment Analysis Demo")
    inp = gr.Textbox(label="Enter text")
    btn = gr.Button("Analyze")
    out = gr.Textbox(label="Result")

    btn.click(predict_ui, inputs=inp, outputs=out)

demo.launch(share=True)  # share=True gives you a public URL from Colab

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://bf657bce87cc06353e.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


