### 1.Import 

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt


### 2.Load & Prepare Data

In [None]:
csv_file = '../../data/data_processed.csv'
df = pd.read_csv(csv_file)
X = df['lemmatized']
y = df['sentiment']
df

### 3. Text to Numerical Features

In [None]:
vectorizer = TfidfVectorizer(max_features=20000)
X_tfidf = vectorizer.fit_transform(X)


### 4. Train-Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X_tfidf, y, test_size=0.2, random_state=42
)

### 5. Train SVM

In [None]:
from sklearn.linear_model import SGDClassifier
from sklearn.utils import shuffle
from tqdm import tqdm
import numpy as np

# Initialize model
svm = SGDClassifier(
    loss="hinge",
    penalty="l2",
    max_iter=1,
    tol=None,
    random_state=42
)

classes = np.unique(y_train)

n_epochs = 10
batch_size = 2048
steps_per_epoch = int(np.ceil(X_train.shape[0] / batch_size))
total_steps = n_epochs * steps_per_epoch

progress = tqdm(total=total_steps, desc="Training Progress")

for epoch in range(n_epochs):
    X_shuff, y_shuff = shuffle(X_train, y_train, random_state=epoch)

    for i in range(0, X_shuff.shape[0], batch_size):
        X_batch = X_shuff[i:i+batch_size]
        y_batch = y_shuff[i:i+batch_size]

        svm.partial_fit(X_batch, y_batch, classes=classes)
        progress.update(1)  # update global bar

progress.close()


### 6. Evaluate

In [None]:
y_pred = svm.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


### 7. Test

In [19]:
test_sentences = [
    "This phone is amazing, I love the battery life!",   # positive
    "Worst customer service I have ever experienced.",   # negative
    "The product is okay, nothing special.",             # neutral
    "Absolutely wonderful, exceeded my expectations!",   # positive
    "Completely useless, broke after one use.",          # negative
]

# Example of predicting
X_test_tfidf = vectorizer.transform(test_sentences)
predictions = svm.predict(X_test_tfidf)

for sent, pred in zip(test_sentences, predictions):
    print(f"Text: {sent} --> Predicted sentiment: {pred}")


Text: This phone is amazing, I love the battery life! --> Predicted sentiment: positive
Text: Worst customer service I have ever experienced. --> Predicted sentiment: negative
Text: The product is okay, nothing special. --> Predicted sentiment: neutral
Text: Absolutely wonderful, exceeded my expectations! --> Predicted sentiment: positive
Text: Completely useless, broke after one use. --> Predicted sentiment: negative


### 8. Save model

In [21]:
import joblib

joblib.dump(svm, "../Models/svm_sentiment_model.pkl")
joblib.dump(vectorizer, "../Models/svm_vectorizer.pkl")



['../Models/svm_vectorizer.pkl']