In [None]:
import pandas as pd 
import numpy as np 
from sklearn.model_selection import train_test_split, cross_val_score 
from sklearn.feature_extraction.text import TfidfVectorizer 
from sklearn.naive_bayes import MultinomialNB 
from sklearn.linear_model import LogisticRegression 
from sklearn.svm import SVC 
from sklearn.ensemble import RandomForestClassifier 
from sklearn.metrics import classification_report 


In [None]:
df = pd.read_csv('Automotive reviews for AI model.csv')

In [None]:
X = df['clean_review'] 
y = df['detailed_emotion']

In [None]:
vectorizer = TfidfVectorizer() 
X_vectorized = vectorizer.fit_transform(X)

In [None]:
NB_model = MultinomialNB() 
lr_model = LogisticRegression(max_iter=50000) 
svm_model = SVC() 
rf_model = RandomForestClassifier()

In [None]:
cv_scores_NB = cross_val_score(NB_model, X_vectorized, y, cv=20, scoring='accuracy') 
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42) 
NB_model.fit(X_train, y_train) 
y_pred_NB = NB_model.predict(X_test) 
 
print("Naive Bayes Classification Report (Test Set):") 
print(classification_report(y_test, y_pred_NB, zero_division=1)) 
print("\nNaive Bayes Cross-Validation Scores:", cv_scores_NB) 
print("Naive Bayes Mean CV Accuracy:", cv_scores_NB.mean()) 

In [None]:
cv_scores_lr = cross_val_score(lr_model, X_vectorized, y, cv=10, scoring='accuracy') 
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42) 
lr_model.fit(X_train, y_train) 
y_pred_lr = lr_model.predict(X_test) 
print("\nLogistic Regression Classification Report (Test Set):") 
print(classification_report(y_test, y_pred_lr, zero_division=1)) 
print("\nLogistic Regression Cross-Validation Scores:", cv_scores_lr) 
print("Logistic Regression Mean CV Accuracy:", cv_scores_lr.mean()) 

In [None]:
cv_scores_rf = cross_val_score(rf_model, X_vectorized, y, cv=10, scoring='accuracy') 
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42) 
rf_model.fit(X_train, y_train) 
y_pred_rf = rf_model.predict(X_test) 
 
print("\nRandom Forest Classification Report (Test Set):") 
print(classification_report(y_test, y_pred_rf, zero_division=1)) 
print("\nRandom Forest Cross-Validation Scores:", cv_scores_rf) 
print("Random Forest Mean CV Accuracy:", cv_scores_rf.mean()) 

In [None]:
svm_model = SVC(kernel='linear') 
cv_scores_svm = cross_val_score(svm_model, X_vectorized, y, cv=10, scoring='accuracy') 
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42) 
svm_model.fit(X_train, y_train) 
y_pred_svm = svm_model.predict(X_test) 
 
print("\nSVM Classification Report (Test Set):") 
print(classification_report(y_test, y_pred_svm, zero_division=1)) 
print("\nSVM Cross-Validation Scores:", cv_scores_svm) 
print("SVM Mean CV Accuracy:", cv_scores_svm.mean()) 

In [None]:
from sklearn.ensemble import VotingClassifier

In [None]:
svm_model = SVC(kernel='linear') 
lr_model = LogisticRegression(max_iter=1000)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, 
random_state=42)

In [None]:
ensemble_model = VotingClassifier(estimators=[('svm', svm_model), ('lr', lr_model)], 
voting='hard')

In [None]:
cv_scores_ensemble = cross_val_score(ensemble_model, X_vectorized, y, cv=10, scoring='accuracy')

In [None]:
ensemble_model.fit(X_train, y_train) 

In [None]:
y_pred_ensemble = ensemble_model.predict(X_test) 

In [None]:
print("\nEnsemble Classification Report (Test Set):") 
print(classification_report(y_test, y_pred_ensemble, zero_division=1)) 
print("\nEnsemble Cross-Validation Scores:", cv_scores_ensemble) 
print("Ensemble Mean CV Accuracy:", cv_scores_ensemble.mean()) 

In [None]:
import pickle

In [None]:
pickle_file_path = 'ensemble_model.pkl'

In [None]:
with open(pickle_file_path, 'wb') as file: 
    pickle.dump(ensemble_model, file) 
 
print("Ensemble model saved to", pickle_file_path)

In [None]:
vectorizer_pickle_path = 'vectorizer_final.pkl' 
# Save the vectorizer to a pickle file 
with open(vectorizer_pickle_path, 'wb') as file: 
    pickle.dump(vectorizer, file) 
 
print("Vectorizer saved to", vectorizer_pickle_path) 