### 1.Import 

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt


### 2.Load & Prepare Data

In [2]:
csv_file = '../../data/data_processed.csv'
df = pd.read_csv(csv_file)
X = df['lemmatized']
y = df['sentiment']
df

Unnamed: 0.1,Unnamed: 0,review,sentiment,clean_text,no_stopwords,tokenized,lemmatized
0,0,"""Paula, I may be a bitch, but I'll never be a ...",negative,paula i may be a bitch but ill never be a butc...,paula may bitch ill never butch br br hilariou...,"['paula', 'may', 'bitch', 'ill', 'never', 'but...",paula may bitch ill never butch br br hilariou...
1,1,Many people here say that this show is for kid...,negative,many people here say that this show is for kid...,many people say show kids hm kid approximately...,"['many', 'people', 'say', 'show', 'kids', 'hm'...",many people say show kid hm kid approximately ...
2,2,This was a well written tale of the Making of ...,positive,this was a well written tale of the making of ...,well written tale making batman sitcom actuall...,"['well', 'written', 'tale', 'making', 'batman'...",well write tale make batman sitcom actually re...
3,3,I think this movie is absolutely beautiful. An...,positive,i think this movie is absolutely beautiful and...,think movie absolutely beautiful im not referr...,"['think', 'movie', 'absolutely', 'beautiful', ...",think movie absolutely beautiful im not refer ...
4,4,The film was very outstanding despite the NC-1...,positive,the film was very outstanding despite the nc17...,film outstanding despite nc17 rating disturbin...,"['film', 'outstanding', 'despite', 'nc17', 'ra...",film outstanding despite nc17 rating disturb s...
...,...,...,...,...,...,...,...
59995,59995,Nothing special.,neutral,nothing special,nothing special,"['nothing', 'special']",nothing special
59996,59996,Avoid this one! It is a terrible movie. So wha...,negative,avoid this one it is a terrible movie so what ...,avoid one terrible movie exciting pointless mu...,"['avoid', 'one', 'terrible', 'movie', 'excitin...",avoid one terrible movie excite pointless murd...
59997,59997,This production was quite a surprise for me. I...,positive,this production was quite a surprise for me i ...,production quite surprise absolutely love obsc...,"['production', 'quite', 'surprise', 'absolutel...",production quite surprise absolutely love obsc...
59998,59998,This is a decent movie. Although little bit sh...,positive,this is a decent movie although little bit sho...,decent movie although little bit short time pa...,"['decent', 'movie', 'although', 'little', 'bit...",decent movie although little bit short time pa...


### 3. Text to Numerical Features

In [3]:
vectorizer = TfidfVectorizer(max_features=20000)
X_tfidf = vectorizer.fit_transform(X)


### 4. Train-Test Split

In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    X_tfidf, y, test_size=0.2, random_state=42
)

### 5. Train SVM

In [5]:
from sklearn.linear_model import SGDClassifier
from sklearn.utils import shuffle
from tqdm import tqdm
import numpy as np

# Initialize model
svm = SGDClassifier(
    loss="hinge",
    penalty="l2",
    max_iter=1,  # we’ll control epochs manually
    tol=None,
    random_state=42
)

classes = np.unique(y_train)

# Train for, say, 10 epochs with a tqdm progress bar
for epoch in tqdm(range(10), desc="Training Progress"):
    X_shuff, y_shuff = shuffle(X_train, y_train, random_state=epoch)
    svm.partial_fit(X_shuff, y_shuff, classes=classes)


Training Progress:   0%|          | 0/10 [00:00<?, ?it/s]

Training Progress: 100%|██████████| 10/10 [00:02<00:00,  3.49it/s]


### 6. Evaluate

In [None]:
y_pred = svm.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


### 7. Test

In [None]:
test_sentences = [
    "This phone is amazing, I love the battery life!",   # positive
    "Worst customer service I have ever experienced.",   # negative
    "The product is okay, nothing special.",             # neutral
    "Absolutely wonderful, exceeded my expectations!",   # positive
    "Completely useless, broke after one use.",          # negative
]

# Example of predicting
X_test_tfidf = vectorizer.transform(test_sentences)
predictions = svm.predict(X_test_tfidf)

for sent, pred in zip(test_sentences, predictions):
    print(f"Text: {sent} --> Predicted sentiment: {pred}")
