In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

# Load the filtered product reviews dataset
df = pd.read_csv('product_reviews.csv')

# Preprocess the text data
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X = vectorizer.fit_transform(df['comment'])
y = df['sentiment']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

# Initialize the models
svm_model = SVC(probability=True, random_state=42)
rf_model = RandomForestClassifier(random_state=42)

# Train the models
svm_model.fit(X_train, y_train)
rf_model.fit(X_train, y_train)


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import StackingClassifier
from sklearn.metrics import accuracy_score, classification_report

# Define the base models
base_models = [
    ('svm', svm_model),
    ('rf', rf_model)
]

# Define the meta-classifier
meta_classifier = LogisticRegression()

# Create the stacking ensemble model
stacking_model = StackingClassifier(estimators=base_models, final_estimator=meta_classifier, cv=5)

# Train the stacking model
stacking_model.fit(X_train, y_train)

# Make predictions
y_pred = stacking_model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


In [None]:
import joblib

# Save the trained stacking model to a file
model_filename = 'stacking_model.pkl'
joblib.dump(stacking_model, model_filename)

# Save the vectorizer as well
vectorizer_filename = 'tfidf_vectorizer.pkl'
joblib.dump(vectorizer, vectorizer_filename)

print(f"Model saved to {model_filename}")
print(f"Vectorizer saved to {vectorizer_filename}")
