In [3]:
%%time
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder
from scipy.sparse import hstack

CPU times: total: 0 ns
Wall time: 0 ns


In [4]:
df = pd.read_csv("Sentiment_Class.csv", encoding='utf-8')

df = df.dropna(subset=['Clean_data', 'Label'])

X = df['Clean_data']
y = df['Label']

In [5]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [6]:
vectorizer = TfidfVectorizer(
    max_df=0.9,  
    min_df=2,   
    ngram_range=(1, 3),  
    max_features=5000  
)

In [8]:
X_vectorized = vectorizer.fit_transform(X)

compound_scores = np.array(df['Compound_Score']).reshape(-1, 1)

X_combined = hstack([X_vectorized, compound_scores])

X_train_combined, X_test_combined, y_train, y_test = train_test_split(X_combined, y_encoded, test_size=0.2, random_state=42)

param_grid = {
    'C': [1],
    'gamma': [0.1],
    'kernel': ['linear']  
}

grid_search = GridSearchCV(SVC(), param_grid, refit=True, verbose=1, cv=15)

grid_search.fit(X_train_combined, y_train)

print("Best Parameters found by Grid Search:", grid_search.best_params_)

Fitting 15 folds for each of 1 candidates, totalling 15 fits
Best Parameters found by Grid Search: {'C': 1, 'gamma': 0.1, 'kernel': 'linear'}


In [9]:
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test_combined)

In [10]:
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=label_encoder.classes_)

print(f'Accuracy (with tuning): {accuracy:.2f}')
print('Classification Report:')
print(report)

Accuracy (with tuning): 0.85
Classification Report:
              precision    recall  f1-score   support

    Negative       0.85      0.96      0.90       198
     Neutral       0.70      0.33      0.45        42
    Positive       0.89      0.82      0.85        57

    accuracy                           0.85       297
   macro avg       0.81      0.71      0.74       297
weighted avg       0.83      0.85      0.83       297

