In [15]:

!pip install textblob vaderSentiment scikit-learn pandas


import nltk
nltk.download('punkt')




[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [5]:

import pandas as pd
import numpy as np
import random, time
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, classification_report

from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer


In [14]:
data = [
    ("I love this phone, the battery life is fantastic!", "positive"),
    ("Absolutely terrible service. I'm never coming back.", "negative"),
    ("The movie was okay, not great, not terrible.", "neutral"),
    ("What a wonderful experience, staff were so friendly!", "positive"),
    ("The product broke after two days. Very disappointing.", "negative"),
]

df = pd.DataFrame(data, columns=["text", "label"])


print("Dataset sample:")
print(df.head())

Dataset sample:
                                                text     label
0  I love this phone, the battery life is fantastic!  positive
1  Absolutely terrible service. I'm never coming ...  negative
2       The movie was okay, not great, not terrible.   neutral
3  What a wonderful experience, staff were so fri...  positive
4  The product broke after two days. Very disappo...  negative


In [13]:

X_train, X_test, y_train, y_test = train_test_split(df["text"], df["label"], test_size=0.3, random_state=42)

vectorizer = TfidfVectorizer(max_features=5000, ngram_range=(1,2))
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

model = LogisticRegression(max_iter=1000)
model.fit(X_train_vec, y_train)

y_pred = model.predict(X_test_vec)
print("📊 Logistic Regression Results:")
print(classification_report(y_test, y_pred))


📊 Logistic Regression Results:
              precision    recall  f1-score   support

    negative       0.00      0.00      0.00       2.0
    positive       0.00      0.00      0.00       0.0

    accuracy                           0.00       2.0
   macro avg       0.00      0.00      0.00       2.0
weighted avg       0.00      0.00      0.00       2.0



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [8]:

def predict_textblob(texts):
    preds = []
    for t in texts:
        polarity = TextBlob(t).sentiment.polarity
        if polarity > 0.05:
            preds.append("positive")
        elif polarity < -0.05:
            preds.append("negative")
        else:
            preds.append("neutral")
    return preds

def predict_vader(texts):
    analyzer = SentimentIntensityAnalyzer()
    preds = []
    for t in texts:
        s = analyzer.polarity_scores(t)["compound"]
        if s > 0.05:
            preds.append("positive")
        elif s < -0.05:
            preds.append("negative")
        else:
            preds.append("neutral")
    return preds


tb_preds = predict_textblob(X_test)
print("TextBlob Results:")
print(classification_report(y_test, tb_preds))


vader_preds = predict_vader(X_test)
print("VADER Results:")
print(classification_report(y_test, vader_preds))


📊 TextBlob Results:
              precision    recall  f1-score   support

    negative       1.00      1.00      1.00         2

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2

📊 VADER Results:
              precision    recall  f1-score   support

    negative       1.00      1.00      1.00         2

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2



In [9]:

def timeit_predict(fn, texts, name):
    start = time.time()
    fn(texts)
    duration = time.time() - start
    print(f"{name} took {duration:.4f} sec for {len(texts)} samples ({duration/len(texts):.6f} sec per sample)")

timeit_predict(lambda t: model.predict(vectorizer.transform(t)), list(X_test), "Logistic Regression")
timeit_predict(predict_textblob, list(X_test), "TextBlob")
timeit_predict(predict_vader, list(X_test), "VADER")


Logistic Regression took 0.0019 sec for 2 samples (0.000959 sec per sample)
TextBlob took 0.0007 sec for 2 samples (0.000353 sec per sample)
VADER took 0.0162 sec for 2 samples (0.008107 sec per sample)


In [10]:

from collections import defaultdict, Counter

def tokenize(s): return s.split()

def build_ngram_model(texts, n=2):
    model = defaultdict(Counter)
    for doc in texts:
        tokens = tokenize(doc)
        for i in range(len(tokens)-n):
            context = tuple(tokens[i:i+n])
            nxt = tokens[i+n]
            model[context][nxt] += 1
    return model

def generate_text(model, n=2, max_tokens=20):
    context = random.choice(list(model.keys()))
    result = list(context)
    for _ in range(max_tokens):
        if context not in model: break
        next_word = random.choice(list(model[context].elements()))
        result.append(next_word)
        context = tuple(result[-n:])
    return " ".join(result)

ngram_model = build_ngram_model(df["text"], n=2)
for i in range(3):
    print(f"Generated {i+1}: {generate_text(ngram_model, n=2)}")


Generated 1: What a wonderful experience, staff were so friendly!
Generated 2: service. I'm never coming back.
Generated 3: phone, the battery life is fantastic!
