In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [2]:
data = pd.read_csv('/content/IMDB Dataset.csv')

In [3]:
data.isnull().sum()

Unnamed: 0,0
review,0
sentiment,0


#**TF IDF**

In [7]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(data['review'])
y = data['sentiment']


#**Scaling**

In [8]:
from sklearn.preprocessing import StandardScaler

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert sparse matrix to dense array for StandardScaler
X_train_dense = X_train.toarray()
X_test_dense = X_test.toarray()

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_dense)
X_test_scaled = scaler.transform(X_test_dense)


#**Logistic Regression**

In [10]:
model = LogisticRegression(solver='liblinear', max_iter=1000)
model.fit(X_train_scaled, y_train)

y_pred = model.predict(X_test_scaled)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")


Accuracy: 0.8691


#**Lasso, Ridge and ElasticNet**

In [12]:
from sklearn.linear_model import Lasso, Ridge, ElasticNet

y_train = y_train.map({'negative': 0, 'positive': 1})
y_test = y_test.map({'negative': 0, 'positive': 1})

# Lasso Regression
lasso_model = Lasso(alpha=0.1)
lasso_model.fit(X_train_scaled, y_train)
y_pred_lasso = lasso_model.predict(X_test_scaled)

# Convert predictions to binary (0 or 1) for accuracy calculation
y_pred_lasso_binary = [1 if pred > 0.5 else 0 for pred in y_pred_lasso]

accuracy_lasso = accuracy_score(y_test, y_pred_lasso_binary)
print(f"Lasso Accuracy: {accuracy_lasso}")


# Ridge Regression
ridge_model = Ridge(alpha=0.1)
ridge_model.fit(X_train_scaled, y_train)
y_pred_ridge = ridge_model.predict(X_test_scaled)

y_pred_ridge_binary = [1 if pred > 0.5 else 0 for pred in y_pred_ridge]

accuracy_ridge = accuracy_score(y_test, y_pred_ridge_binary)
print(f"Ridge Accuracy: {accuracy_ridge}")


# ElasticNet Regression
elasticnet_model = ElasticNet(alpha=0.1, l1_ratio=0.5)
elasticnet_model.fit(X_train_scaled, y_train)
y_pred_elasticnet = elasticnet_model.predict(X_test_scaled)

y_pred_elasticnet_binary = [1 if pred > 0.5 else 0 for pred in y_pred_elasticnet]

accuracy_elasticnet = accuracy_score(y_test, y_pred_elasticnet_binary)
print(f"ElasticNet Accuracy: {accuracy_elasticnet}")


Lasso Accuracy: 0.6548
Ridge Accuracy: 0.8767
ElasticNet Accuracy: 0.7533


#**Random Forest**

In [15]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)
y_pred_rf = rf_model.predict(X_test_scaled)

accuracy_rf = accuracy_score(y_test, y_pred_rf)
print(f"Random Forest Accuracy: {accuracy_rf}")


Random Forest Accuracy: 0.8459


#**Decision Tree**

In [16]:
from sklearn.tree import DecisionTreeClassifier

dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train_scaled, y_train)
y_pred_dt = dt_model.predict(X_test_scaled)

accuracy_dt = accuracy_score(y_test, y_pred_dt)
print(f"Decision Tree Accuracy: {accuracy_dt}")


Decision Tree Accuracy: 0.7135
