In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix, classification_report

# Load the dataset
file_path = 'stock_data.csv'
df = pd.read_csv(file_path)

# Extract features and labels
X = df['Text']
y = df['Sentiment']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Vectorize the text data using TF-IDF
vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Naive Bayes Classifier
nb_classifier = MultinomialNB()
nb_classifier.fit(X_train_tfidf, y_train)
nb_predictions = nb_classifier.predict(X_test_tfidf)

# SVM Classifier with Grid Search
svm_classifier = SVC()
param_grid = {'kernel': ['linear', 'poly', 'rbf'],
              'C': [0.1, 0.5, 0.9]}
grid_search = GridSearchCV(svm_classifier, param_grid, cv=3)
grid_search.fit(X_train_tfidf, y_train)
svm_predictions = grid_search.predict(X_test_tfidf)

# Confusion Matrix and Classification Report for Naive Bayes
print("Naive Bayes Results:")
nb_confusion = confusion_matrix(y_test, nb_predictions)
print("Confusion Matrix:\n", nb_confusion)
print("Classification Report:\n", classification_report(y_test, nb_predictions))

# Confusion Matrix and Classification Report for SVM
print("\nSVM Results:")
svm_confusion = confusion_matrix(y_test, svm_predictions)
print("Confusion Matrix:\n", svm_confusion)
print("Classification Report:\n", classification_report(y_test, svm_predictions))

# Best parameters for SVM from the grid search
print("\nBest Parameters for SVM from Grid Search:", grid_search.best_params_)


Naive Bayes Results:
Confusion Matrix:
 [[153 274]
 [ 34 698]]
Classification Report:
               precision    recall  f1-score   support

          -1       0.82      0.36      0.50       427
           1       0.72      0.95      0.82       732

    accuracy                           0.73      1159
   macro avg       0.77      0.66      0.66      1159
weighted avg       0.75      0.73      0.70      1159


SVM Results:
Confusion Matrix:
 [[279 148]
 [ 77 655]]
Classification Report:
               precision    recall  f1-score   support

          -1       0.78      0.65      0.71       427
           1       0.82      0.89      0.85       732

    accuracy                           0.81      1159
   macro avg       0.80      0.77      0.78      1159
weighted avg       0.80      0.81      0.80      1159


Best Parameters for SVM from Grid Search: {'C': 0.9, 'kernel': 'linear'}
