In [1]:
pip install keras-tuner --upgrade

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.metrics import classification_report
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from textblob.classifiers import NaiveBayesClassifier
from sklearn.metrics import accuracy_score
import nltk
nltk.download('punkt')
import keras_tuner as kt
from tensorflow.keras.optimizers import Adam


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [3]:
df = pd.read_csv('/content/amazon_alexa.tsv', sep='\t')

#PRE-PROCESSING

In [4]:
df['verified_reviews'] = df['verified_reviews'].astype(str).fillna('')

# Tokenization and padding
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(df['verified_reviews'])
X_seq = tokenizer.texts_to_sequences(df['verified_reviews'])
X_pad = pad_sequences(X_seq, maxlen=200)

# Label encoding for target
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(df['feedback'])

In [5]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_pad, y_encoded, test_size=0.2, random_state=42)

#LSTM MODEL

In [6]:
model = Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=200),
    LSTM(units=128, return_sequences=True),
    Dropout(0.3),
    LSTM(units=64),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=32)

# Evaluate the model
y_pred = (model.predict(X_test) > 0.5).astype("int32")
print(classification_report(y_test, y_pred))




Epoch 1/5
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 750ms/step - accuracy: 0.9216 - loss: 0.3635 - val_accuracy: 0.9079 - val_loss: 0.3211
Epoch 2/5
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 617ms/step - accuracy: 0.9263 - loss: 0.2653 - val_accuracy: 0.9079 - val_loss: 0.3062
Epoch 3/5
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 558ms/step - accuracy: 0.9184 - loss: 0.2865 - val_accuracy: 0.9079 - val_loss: 0.2921
Epoch 4/5
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 562ms/step - accuracy: 0.9216 - loss: 0.2435 - val_accuracy: 0.9079 - val_loss: 0.2090
Epoch 5/5
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 564ms/step - accuracy: 0.9383 - loss: 0.1333 - val_accuracy: 0.9222 - val_loss: 0.1966
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 149ms/step
              precision    recall  f1-score   support

           0       0.66      0.33      0.44        58
      

#HYPERPARAMETER TUINING

In [7]:
def build_model(hp):
    model = Sequential()

    # Embedding Layer
    model.add(Embedding(input_dim=5000, output_dim=128, input_length=200))

    # First LSTM Layer
    units1 = hp.Int('units1', min_value=32, max_value=128, step=16)
    model.add(LSTM(units=units1, return_sequences=True))

    # Dropout Layer
    dropout_rate = hp.Float('dropout_rate', min_value=0.2, max_value=0.5, step=0.1)
    model.add(Dropout(dropout_rate))

    # Second LSTM Layer
    units2 = hp.Int('units2', min_value=32, max_value=128, step=16)
    model.add(LSTM(units=units2))

    # Dense Layer
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.2))

    # Output Layer
    model.add(Dense(1, activation='sigmoid'))

    # Compile the model with a tunable learning rate
    learning_rate = hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log')
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    return model


In [10]:
tuner = kt.RandomSearch(
    build_model,
    objective='val_accuracy',  # Maximize validation accuracy
    max_trials=10,  # Try 10 different sets of hyperparameters
    executions_per_trial=1,  # Number of models to train per trial
    directory='tuning_dir',  # Directory to save logs
    project_name='lstm_tuning'  # Name of the tuning project
)


In [11]:
tuner.search(X_train, y_train,
             validation_data=(X_test, y_test),
             epochs=5,
             batch_size=32)


Trial 10 Complete [00h 02m 51s]
val_accuracy: 0.9396825432777405

Best val_accuracy So Far: 0.9396825432777405
Total elapsed time: 00h 34m 11s


In [12]:
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"Best Units1: {best_hps.get('units1')}")
print(f"Best Units2: {best_hps.get('units2')}")
print(f"Best Dropout Rate: {best_hps.get('dropout_rate')}")
print(f"Best Learning Rate: {best_hps.get('learning_rate')}")

# Build the best model
best_model = tuner.hypermodel.build(best_hps)

# Train the best model
history_best = best_model.fit(X_train, y_train,
                              validation_data=(X_test, y_test),
                              epochs=5,
                              batch_size=32)


Best Units1: 32
Best Units2: 64
Best Dropout Rate: 0.2
Best Learning Rate: 0.004737198185425148
Epoch 1/5
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 253ms/step - accuracy: 0.8818 - loss: 0.3339 - val_accuracy: 0.9143 - val_loss: 0.2614
Epoch 2/5
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 229ms/step - accuracy: 0.9564 - loss: 0.1318 - val_accuracy: 0.9206 - val_loss: 0.2112
Epoch 3/5
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 229ms/step - accuracy: 0.9725 - loss: 0.0708 - val_accuracy: 0.9302 - val_loss: 0.1692
Epoch 4/5
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 226ms/step - accuracy: 0.9902 - loss: 0.0298 - val_accuracy: 0.9317 - val_loss: 0.3231
Epoch 5/5
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 232ms/step - accuracy: 0.9909 - loss: 0.0264 - val_accuracy: 0.9222 - val_loss: 0.2507


In [13]:
y_pred_best = (best_model.predict(X_test) > 0.5).astype("int32")
print("After tuning with Keras Tuner:")
print(classification_report(y_test, y_pred_best))

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 102ms/step
After tuning with Keras Tuner:
              precision    recall  f1-score   support

           0       0.60      0.48      0.53        58
           1       0.95      0.97      0.96       572

    accuracy                           0.92       630
   macro avg       0.77      0.72      0.75       630
weighted avg       0.92      0.92      0.92       630



#TEXTBOB

In [14]:
# TextBlob requires text in a specific format
train_data = [(review, 'pos' if label == 1 else 'neg') for review, label in zip(df['verified_reviews'], df['feedback'])]
train_data, test_data = train_test_split(train_data, test_size=0.2, random_state=42)

# Train TextBlob classifier
textblob_classifier = NaiveBayesClassifier(train_data)

# Test the TextBlob classifier
textblob_pred = [textblob_classifier.classify(text) for text, label in test_data]
true_labels = [label for text, label in test_data]

# Convert 'pos'/'neg' back to 1/0 for comparison
y_test_blob = [1 if label == 'pos' else 0 for label in true_labels]
y_pred_blob = [1 if label == 'pos' else 0 for label in textblob_pred]

# Accuracy comparison
print("TextBlob Accuracy:", accuracy_score(y_test_blob, y_pred_blob))
print("LSTM Accuracy:", accuracy_score(y_test, y_pred))

TextBlob Accuracy: 0.8809523809523809
LSTM Accuracy: 0.9222222222222223


#USER-DEFINED REVIEW


In [19]:
import nltk
from textblob import TextBlob

# Download necessary resources for TextBlob
nltk.download('punkt')

# Function to classify a user-defined review using TextBlob
def classify_review_textblob(review):
    blob = TextBlob(review)
    sentiment = blob.sentiment.polarity
    if sentiment > 0:
        return "Positive"
    elif sentiment == 0:
        return "Neutral"
    else:
        return "Negative"

# Prompt the user for a review
user_review = input("Please enter a review: ")

# Classify using TextBlob
textblob_result = classify_review_textblob(user_review)
print(f"TextBlob classification result: {textblob_result}")

# Assuming you have trained your LSTM model and tokenizer
# Preprocess the input review for the LSTM model
def preprocess_review_lstm(review, tokenizer, max_len=200):
    review_seq = tokenizer.texts_to_sequences([review])
    review_pad = pad_sequences(review_seq, maxlen=max_len)
    return review_pad

# Classify the input review using your LSTM model
preprocessed_review = preprocess_review_lstm(user_review, tokenizer)
lstm_prediction = model.predict(preprocessed_review)

# Assuming your LSTM model outputs probabilities (softmax)
if lstm_prediction >= 0.5:
    lstm_result = "Positive"
else:
    lstm_result = "Negative"

print(f"LSTM classification result: {lstm_result}")


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Please enter a review: This product exceeded my expectations! The quality is excellent, and the customer service was outstanding.
TextBlob classification result: Positive
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 139ms/step
LSTM classification result: Positive
