In [1]:
import pandas as pd
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, classification_report
from keras.models import load_model
import joblib

In [2]:
LR_model = joblib.load('LR_model.pkl')
LSVC_model = joblib.load('LSVC_model.pkl')
RF_model = joblib.load('RF_model.pkl')
GB_model = joblib.load('GB_model.pkl')
MLP_model = joblib.load('MLP_model.pkl')

In [3]:
# Define the voting classifier
voting_classifier = VotingClassifier(
    estimators=[
        ('model_1', LR_model),
        ('model_2', LSVC_model),
        ('model_3', RF_model),
        ('model_4', GB_model),
        ('model_5', MLP_model)
    ],
    voting='hard'  # 'hard' for majority voting, 'soft' for probability-based voting
)

In [4]:
all_texts = pd.read_csv("all_texts.csv")

In [5]:
train_texts, test_texts, train_labels, test_labels = train_test_split(all_texts['text'], all_texts['result'], test_size=0.3, random_state=42)

In [6]:
# Convert text data into numerical vectors using TF-IDF vectorization
tfidf_vectorizer = TfidfVectorizer(max_features=5000)
train_texts_tfidf = tfidf_vectorizer.fit_transform(train_texts)
test_texts_tfidf = tfidf_vectorizer.transform(test_texts)

In [7]:
# Fit the voting classifier on your training data
voting_classifier.fit(train_texts_tfidf, train_labels)



In [8]:
# Predict and evaluate
voting_predictions = voting_classifier.predict(test_texts_tfidf)
voting_accuracy = accuracy_score(test_labels, voting_predictions)
voting_report = classification_report(test_labels, voting_predictions)

print(f"Voting Classifier Accuracy: {voting_accuracy}")
print("Voting Classifier Classification Report:\n", voting_report)

Voting Classifier Accuracy: 0.9383888888888889
Voting Classifier Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.94      0.94     44901
           1       0.94      0.94      0.94     45099

    accuracy                           0.94     90000
   macro avg       0.94      0.94      0.94     90000
weighted avg       0.94      0.94      0.94     90000



In [9]:
# Save the trained classifier to a file
joblib.dump(voting_classifier, 'voting_model2.pkl')

['voting_model2.pkl']