In [1]:
!pip install pandas numpy scikit-learn nltk spacy tensorflow torch transformers fastapi uvicorn
!python -m spacy download en_core_web_sm


Collecting en-core-web-sm==3.8.0
  Using cached https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[38;5;2m[+] Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [2]:
import pandas as pd
import numpy as np
import re
import nltk
import spacy
import tensorflow as tf
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, GRU, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from transformers import BertTokenizer, TFBertForSequenceClassification

# Load IMDB dataset
df = pd.read_csv("C:\\Users\\hp\\Desktop\\121ME0017\\IMDB Dataset.csv", encoding="ISO-8859-1")

nltk.download('stopwords')
nltk.download('wordnet')
nlp = spacy.load("en_core_web_sm")
lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    text = re.sub('<.*?>', '', text)
    text = re.sub(r'[^a-zA-Z]', ' ', text.lower())
    doc = nlp(text)
    words = [token.lemma_ for token in doc if not token.is_stop]
    return ' '.join(words)

df['review'] = df['review'].apply(preprocess_text)
df['sentiment'] = df['sentiment'].map({'negative': 0, 'positive': 1})

X_train, X_test, y_train, y_test = train_test_split(df['review'], df['sentiment'], test_size=0.2, random_state=42)

# TF-IDF Vectorization
vectorizer = TfidfVectorizer(max_features=7000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# SVM Model
svm_model = SVC(kernel='linear', C=1.0, random_state=42)
svm_model.fit(X_train_tfidf, y_train)
y_pred_svm = svm_model.predict(X_test_tfidf)
print(f"SVM Accuracy: {accuracy_score(y_test, y_pred_svm):.2f}")

# Random Forest Model
rf_model = RandomForestClassifier(n_estimators=200, random_state=42)
rf_model.fit(X_train_tfidf, y_train)
y_pred_rf = rf_model.predict(X_test_tfidf)
print(f"Random Forest Accuracy: {accuracy_score(y_test, y_pred_rf):.2f}")

# Tokenization for Deep Learning Models
tokenizer = Tokenizer(num_words=7000)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)
X_train_pad = pad_sequences(X_train_seq, maxlen=200)
X_test_pad = pad_sequences(X_test_seq, maxlen=200)

def build_lstm_model():
    model = Sequential([
        Embedding(7000, 128, input_length=200),
        LSTM(128, dropout=0.3, recurrent_dropout=0.3),
        Dense(1, activation='sigmoid')
    ])
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

lstm_model = build_lstm_model()
lstm_model.fit(X_train_pad, y_train, epochs=4, batch_size=64, validation_data=(X_test_pad, y_test), verbose=2)
lstm_accuracy = lstm_model.evaluate(X_test_pad, y_test)[1]
print(f"LSTM Accuracy: {lstm_accuracy:.2f}")

def build_gru_model():
    model = Sequential([
        Embedding(7000, 128, input_length=200),
        GRU(128, dropout=0.3, recurrent_dropout=0.3),
        Dense(1, activation='sigmoid')
    ])
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

gru_model = build_gru_model()
gru_model.fit(X_train_pad, y_train, epochs=4, batch_size=64, validation_data=(X_test_pad, y_test), verbose=2)
gru_accuracy = gru_model.evaluate(X_test_pad, y_test)[1]
print(f"GRU Accuracy: {gru_accuracy:.2f}")







[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


SVM Accuracy: 0.89
Random Forest Accuracy: 0.85
Epoch 1/4




625/625 - 627s - 1s/step - accuracy: 0.8267 - loss: 0.3985 - val_accuracy: 0.8642 - val_loss: 0.3267
Epoch 2/4
625/625 - 610s - 977ms/step - accuracy: 0.8827 - loss: 0.2940 - val_accuracy: 0.8609 - val_loss: 0.3230
Epoch 3/4
625/625 - 653s - 1s/step - accuracy: 0.9020 - loss: 0.2538 - val_accuracy: 0.8715 - val_loss: 0.3090
Epoch 4/4
625/625 - 677s - 1s/step - accuracy: 0.9137 - loss: 0.2222 - val_accuracy: 0.8726 - val_loss: 0.3219
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 48ms/step - accuracy: 0.8670 - loss: 0.3281
LSTM Accuracy: 0.87
Epoch 1/4
625/625 - 199s - 319ms/step - accuracy: 0.8080 - loss: 0.4256 - val_accuracy: 0.8719 - val_loss: 0.3125
Epoch 2/4
625/625 - 214s - 343ms/step - accuracy: 0.8815 - loss: 0.2958 - val_accuracy: 0.8822 - val_loss: 0.2818
Epoch 3/4
625/625 - 216s - 345ms/step - accuracy: 0.9112 - loss: 0.2289 - val_accuracy: 0.8822 - val_loss: 0.2860
Epoch 4/4
625/625 - 215s - 345ms/step - accuracy: 0.9307 - loss: 0.1861 - val_accuracy: 0.

In [3]:
!pip uninstall keras -y
!pip install keras==2.15.0

Found existing installation: keras 3.8.0
Uninstalling keras-3.8.0:
  Successfully uninstalled keras-3.8.0
Collecting keras==2.15.0
  Using cached keras-2.15.0-py3-none-any.whl.metadata (2.4 kB)
Using cached keras-2.15.0-py3-none-any.whl (1.7 MB)
Installing collected packages: keras
Successfully installed keras-2.15.0


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-intel 2.18.0 requires keras>=3.5.0, but you have keras 2.15.0 which is incompatible.
