# BLSTM Sentiment Analysis on E-Commerce Reviews (Zepto vs Jiomart vs Blinkit)


In [18]:
## Imports

In [19]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

from tensorflow.keras.preprocessing.text import Tokenizer # type: ignore
from keras.preprocessing.sequence import pad_sequences # type: ignore
from keras.models import Sequential # type: ignore
from keras.layers import Embedding, SpatialDropout1D, LSTM, Dense # type: ignore
from keras.callbacks import EarlyStopping # type: ignore
import plotly.graph_objects as go # type: ignore
from sklearn.metrics import classification_report# type: ignore
from tensorflow.keras.models import Sequential # type: ignore
from tensorflow.keras.layers import Embedding, SpatialDropout1D, Bidirectional, LSTM, Dense # type: ignore



In [20]:
data = pd.read_csv(r'C:\Users\USER\OneDrive\Documents\Desktop\PROJECT_25\reviews.csv')
data

Unnamed: 0,rating,date,review,platform
0,2,30 December 2024,"I was using it for long time, but have to stop...",blinkit
1,1,4 November 2024,Loving the fast deliveries and mostly they are...,blinkit
2,1,31 October 2024,The customer support is very disappointing. I ...,blinkit
3,5,29 August 2024,"I've been using Blinkit for a while now, and i...",blinkit
4,2,31 December 2024,Blinkit was my go to app and it was rare that ...,blinkit
...,...,...,...,...
4615,1,27 November 2024,I have deposited ₹1300 to my zepto wallet to o...,zepto
4616,4,5 December 2024,Prices r competitive for certain items. Not fo...,zepto
4617,1,13 November 2024,Worst delivery ever. They delivered a broken p...,zepto
4618,5,17 November 2024,"I had a great experience with zepto, the quali...",zepto


In [21]:
data['rating_label'] = data['rating'].apply(lambda x: 'positive' if x >= 4 else 'neutral' if x == 3 else 'negative')
data

Unnamed: 0,rating,date,review,platform,rating_label
0,2,30 December 2024,"I was using it for long time, but have to stop...",blinkit,negative
1,1,4 November 2024,Loving the fast deliveries and mostly they are...,blinkit,negative
2,1,31 October 2024,The customer support is very disappointing. I ...,blinkit,negative
3,5,29 August 2024,"I've been using Blinkit for a while now, and i...",blinkit,positive
4,2,31 December 2024,Blinkit was my go to app and it was rare that ...,blinkit,negative
...,...,...,...,...,...
4615,1,27 November 2024,I have deposited ₹1300 to my zepto wallet to o...,zepto,negative
4616,4,5 December 2024,Prices r competitive for certain items. Not fo...,zepto,positive
4617,1,13 November 2024,Worst delivery ever. They delivered a broken p...,zepto,negative
4618,5,17 November 2024,"I had a great experience with zepto, the quali...",zepto,positive


In [22]:
label_encoder = LabelEncoder()

data['rating_label_encoded'] = label_encoder.fit_transform(data['rating_label'])

label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print("\nLabel Mapping:")
print(label_mapping)

num_classes = data['rating_label_encoded'].nunique()
print(f"\nNumber of classes: {num_classes}")


Label Mapping:
{'negative': 0, 'neutral': 1, 'positive': 2}

Number of classes: 3


In [23]:
# Initialize Tokenizer
tokenizer = Tokenizer(split=' ')
tokenizer.fit_on_texts(data['review'].values)

# Convert texts to sequences
X = tokenizer.texts_to_sequences(data['review'].values)

# maximum sequence length
maxlen = 100  

# Pad sequences to ensure uniform length
X = pad_sequences(X, maxlen=maxlen)

print(f"\nShape of X: {X.shape}")

# One-hot encode the target labels
y = pd.get_dummies(data['rating_label_encoded']).values

print(f"Shape of y: {y.shape}")


Shape of X: (4620, 100)
Shape of y: (4620, 3)


In [24]:
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

# Second split: Validation and Test from Temp
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

print(f"\nTraining set shape: {X_train.shape}, {y_train.shape}")
print(f"Validation set shape: {X_val.shape}, {y_val.shape}")
print(f"Test set shape: {X_test.shape}, {y_test.shape}")


Training set shape: (3234, 100), (3234, 3)
Validation set shape: (693, 100), (693, 3)
Test set shape: (693, 100), (693, 3)


In [25]:
# Hyperparameters
embed_dim = 128        # Embedding dimensions
lstm_out = 196         # LSTM units
max_features = len(tokenizer.word_index) + 1  # Vocabulary size (+1 for padding)

print(f"\nVocabulary Size: {max_features}")

# Define the BLSTM Model
model = Sequential()
model.add(Embedding(input_dim=max_features, output_dim=embed_dim))  # Embedding Layer
model.add(SpatialDropout1D(0.4))  # Regularization with Dropout
model.add(Bidirectional(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2)))  # Bidirectional LSTM
model.add(Dense(num_classes, activation='softmax'))  # Fully Connected Layer for Multi-class

print("\nModel Summary:")
print(model.summary())

# Compile the Model
model.compile(
    loss='categorical_crossentropy',  # Loss function for multi-class classification
    optimizer='adam',                # Optimizer
    metrics=['accuracy']             # Metric to monitor
)



Vocabulary Size: 8945

Model Summary:


None


In [26]:
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True
)

In [27]:
batch_size = 64
epochs = 20  # Increased epochs with early stopping

history = model.fit(
    X_train, y_train,
    epochs=epochs,
    batch_size=batch_size,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping],
    verbose=1)

Epoch 1/20
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 262ms/step - accuracy: 0.7290 - loss: 0.7555 - val_accuracy: 0.8052 - val_loss: 0.5012
Epoch 2/20
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 322ms/step - accuracy: 0.8422 - loss: 0.4437 - val_accuracy: 0.8903 - val_loss: 0.3531
Epoch 3/20
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 307ms/step - accuracy: 0.9102 - loss: 0.2991 - val_accuracy: 0.8947 - val_loss: 0.3859
Epoch 4/20
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 318ms/step - accuracy: 0.9177 - loss: 0.2794 - val_accuracy: 0.9004 - val_loss: 0.3214
Epoch 5/20
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 304ms/step - accuracy: 0.9423 - loss: 0.1939 - val_accuracy: 0.9033 - val_loss: 0.3099
Epoch 6/20
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 363ms/step - accuracy: 0.9367 - loss: 0.1874 - val_accuracy: 0.8874 - val_loss: 0.4023
Epoch 7/20
[1m51/51[

In [28]:
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f"\nTest Accuracy: {test_accuracy:.2f}")

[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 51ms/step - accuracy: 0.9035 - loss: 0.3258

Test Accuracy: 0.89


In [29]:
# Predict probabilities
y_pred = model.predict(X_test)

y_pred_classes = y_pred.argmax(axis=1)
y_true = y_test.argmax(axis=1)

print(classification_report(y_true, y_pred_classes, target_names=label_encoder.classes_))


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 65ms/step
              precision    recall  f1-score   support

    negative       0.92      0.96      0.94       537
     neutral       0.00      0.00      0.00        29
    positive       0.77      0.80      0.78       127

    accuracy                           0.89       693
   macro avg       0.56      0.59      0.58       693
weighted avg       0.86      0.89      0.87       693




Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



In [30]:
def plot_training_history(history):
    fig = go.Figure()

    # Plot Training Accuracy
    fig.add_trace(go.Scatter(
        x=list(range(1, len(history.history['accuracy']) + 1)),
        y=history.history['accuracy'],
        mode='lines+markers',
        name='Training Accuracy'
    ))

    # Plot Validation Accuracy
    fig.add_trace(go.Scatter(
        x=list(range(1, len(history.history['val_accuracy']) + 1)),
        y=history.history['val_accuracy'],
        mode='lines+markers',
        name='Validation Accuracy'
    ))

    # Plot Training Loss
    fig.add_trace(go.Scatter(
        x=list(range(1, len(history.history['loss']) + 1)),
        y=history.history['loss'],
        mode='lines+markers',
        name='Training Loss'
    ))

    # Plot Validation Loss
    fig.add_trace(go.Scatter(
        x=list(range(1, len(history.history['val_loss']) + 1)),
        y=history.history['val_loss'],
        mode='lines+markers',
        name='Validation Loss'
    ))  
    fig.update_layout(
        title='Model Training History',
        xaxis_title='Epoch',
        yaxis_title='Metric Value',
        legend_title='Metrics',
        template='plotly_white'
    )

    fig.show()


plot_training_history(history)


In [31]:
def predict_review(review, model, tokenizer, label_encoder, maxlen=100):
    sequence = tokenizer.texts_to_sequences([review])
    padded_sequence = pad_sequences(sequence, maxlen=maxlen)
    prediction = model.predict(padded_sequence)

    class_idx = prediction.argmax(axis=1)[0]
    label = label_encoder.inverse_transform([class_idx])[0]

    confidence = prediction[0][class_idx]

    return f"Sentiment: {label}, Confidence: {confidence:.2f}"

In [35]:
example_review = "I love the fast delivery and great service!"
print(example_review)
print(f"{predict_review(example_review, model, tokenizer, label_encoder, maxlen)}\n")

example_review = "best and great service love this "
print(example_review)
print(f"{predict_review(example_review, model, tokenizer, label_encoder, maxlen)}\n")

example_review = "App is Bad."
print(example_review)
print(f"{predict_review(example_review, model, tokenizer, label_encoder, maxlen)}\n")

example_review = "Delivery can't be better."
print(example_review)
print(f"{predict_review(example_review, model, tokenizer, label_encoder, maxlen)}\n")

I love the fast delivery and great service!
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
Sentiment: positive, Confidence: 0.98

best and great service love this 
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
Sentiment: positive, Confidence: 0.96

App is Bad.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
Sentiment: negative, Confidence: 0.45

Delivery can't be better.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
Sentiment: positive, Confidence: 0.52

