In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import joblib

In [13]:
# Load the dataset
df = pd.read_csv("Preprocess_Reviews.csv")

In [14]:
# Handle missing values
df.dropna(inplace=True)

In [15]:
df_Positive = df[df['sentiment'] == 'positive'][0:20000]
df_Neutral = df[df['sentiment'] == 'neutral'][0:20000]
df_Negative = df[df['sentiment'] == 'negative'][0:20000]

In [16]:
df = pd.concat([df_Positive, df_Neutral, df_Negative], axis=0)

In [17]:
# TF-IDF Vectorization
tfidf = TfidfVectorizer(stop_words='english')
X = tfidf.fit_transform(df['Reviews'])

In [18]:
# Label Encoding for Sentiment
y = df['sentiment'].map({'negative': 0, 'neutral': 1, 'positive': 2})

In [19]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [20]:
# SVM Model Training
svm_model = SVC()
svm_model.fit(X_train, y_train)

In [21]:
# Save the SVM model to a file
joblib_file = "svm_model.joblib"
joblib.dump(svm_model, joblib_file)
print(f"Model saved to {joblib_file}")

Model saved to svm_model.joblib


In [22]:
# Load the model from the file
loaded_model = joblib.load(joblib_file)
print("Model loaded successfully")

Model loaded successfully


In [23]:
# Predictions with the loaded model
y_pred = loaded_model.predict(X_test)

In [24]:
# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=['negative', 'neutral', 'positive']))

Accuracy: 0.8820833333333333
Classification Report:
              precision    recall  f1-score   support

    negative       0.90      0.89      0.89      4023
     neutral       0.84      0.85      0.84      4014
    positive       0.92      0.91      0.91      3963

    accuracy                           0.88     12000
   macro avg       0.88      0.88      0.88     12000
weighted avg       0.88      0.88      0.88     12000



### Hyperparameter tuning

In [10]:
# SVM Model with Hyperparameter Tuning
param_grid = {
    'C': [0.1, 1, 10, 100],  # Regularization parameter
    'kernel': ['linear', 'rbf', 'poly'],  # Kernel types
    'gamma': ['scale', 'auto'],  # Kernel coefficient
}

In [11]:
grid_search = GridSearchCV(SVC(), param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)


KeyboardInterrupt



In [None]:
# Best parameters
print("Best Parameters:", grid_search.best_params_)

In [None]:
# Best estimator
best_model = grid_search.best_estimator_

In [None]:
# Save the best model to a file
joblib_file = "svm_model_tune.joblib"
joblib.dump(best_model, joblib_file)
print(f"Model saved to {joblib_file}")

In [None]:
# Load the model from the file
loaded_model = joblib.load(joblib_file)
print("Model loaded successfully")

In [None]:
# Predictions with the loaded model
y_pred = loaded_model.predict(X_test)

In [None]:
# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=['negative', 'neutral', 'positive']))