In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import LabelEncoder

In [18]:
# Load the dataset
df = pd.read_csv("Preprocess_Reviews.csv")

In [19]:
# Handle missing values
df.dropna(inplace=True)

In [20]:
# Encode the sentiment labels to numerical values
label_encoder = LabelEncoder()
df['sentiment'] = label_encoder.fit_transform(df['sentiment'])

In [21]:
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['Reviews'], df['sentiment'], test_size=0.2, random_state=42)

In [22]:
# Convert text data into numerical feature vectors
vectorizer = TfidfVectorizer(max_features=1000)  # Adjust max_features as needed
X_train_vectors = vectorizer.fit_transform(X_train)
X_test_vectors = vectorizer.transform(X_test)

In [23]:
# Function to train, evaluate, and fine-tune models
def evaluate_model(model, param_grid=None):
    if param_grid:
        grid_search = GridSearchCV(model, param_grid, cv=5)
        grid_search.fit(X_train_vectors, y_train)
        best_model = grid_search.best_estimator_
        best_params = grid_search.best_params_
        print(f"Best Parameters: {best_params}")
    else:
        best_model = model
        best_model.fit(X_train_vectors, y_train)

    y_pred = best_model.predict(X_test_vectors)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy}")
    print("Classification Report:")
    print(classification_report(y_test, y_pred))
    return best_model

In [24]:
# Logistic Regression
print("\nLogistic Regression Model")
lr_model = LogisticRegression(max_iter=1000)
lr_param_grid = {'C': [0.1, 1, 10]}
lr_best_model = evaluate_model(lr_model, lr_param_grid)


Logistic Regression Model
Best Parameters: {'C': 10}
Accuracy: 0.8436035008288623
Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.80      0.78     13358
           1       0.46      0.09      0.15      4561
           2       0.88      0.95      0.91     38182

    accuracy                           0.84     56101
   macro avg       0.70      0.61      0.61     56101
weighted avg       0.82      0.84      0.82     56101

