# Appliances Reviews LSTM Model

## 1. Basic Imports Lib & Modules

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix 
from torch.utils.data import DataLoader, TensorDataset
from collections import Counter
import re
import warnings
warnings.filterwarnings("ignore")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## 2. Load Data & Basic Cleaning

In [2]:
df = pd.read_csv('/kaggle/input/datasets/maulikgajera/appliances-reviews/Appliances_Reviews.csv')
df['sentiment'] = df['overall'].apply(lambda x: 1 if x > 3 else 0)
df['text'] = df['reviewText'].fillna('') + ' ' + df['summary'].fillna('')

def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z ]', '', text)
    return text
df['text'] = df['text'].apply(clean_text)

### a. Build vocabulary

In [3]:
counter = Counter()
for text in df['text']:
    counter.update(text.split())

vocab = dict(counter.most_common(5000))
word2idx = {word: i+1 for i, (word, _) in enumerate(vocab.items())}  # +1 for padding
vocab_size = 5001

### b. Convert text to sequences

In [4]:
def text_to_sequence(text):
    return [word2idx.get(word, 0) for word in text.split()]

sequences = df['text'].apply(text_to_sequence)

### c. Padding

In [5]:
max_len = 100
def pad_sequence(seq):
    if len(seq) < max_len:
        seq = seq + [0]*(max_len - len(seq))
    else:
        seq = seq[:max_len]
    return seq

X = np.array([pad_sequence(seq) for seq in sequences])
y = df['sentiment'].values

## 3. Train-Test split

In [6]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = torch.tensor(X_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

train_data = TensorDataset(X_train, y_train)
test_data = TensorDataset(X_test, y_test)

## 4. DataLoader

In [7]:
train_loader = DataLoader(train_data,batch_size=64,shuffle=True,num_workers=4,pin_memory=True)
test_loader = DataLoader(test_data,batch_size=64,shuffle=False,num_workers=4,pin_memory=True)

## 5. LSTM Model

In [8]:
class SentimentModel(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim):
        super(SentimentModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        x = self.embedding(x)
        lstm_out, _ = self.lstm(x)
        out = lstm_out[:, -1, :]
        out = self.fc(out)
        return out.squeeze()

model = SentimentModel(vocab_size, 128, 128).to(device)

# Initialize model
model = torch.compile(model)
criterion = nn.BCEWithLogitsLoss()  # Faster & more stable than BCELoss
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Mixed Precision
scaler = torch.cuda.amp.GradScaler()

## 6. Training Loop

In [9]:
epochs = 5

for epoch in range(epochs):
    model.train()
    total_loss = 0
    
    for xb, yb in train_loader:
        xb = xb.to(device, non_blocking=True)
        yb = yb.to(device, non_blocking=True)

        optimizer.zero_grad()

        with torch.cuda.amp.autocast():
            outputs = model(xb)
            loss = criterion(outputs, yb)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item()

## 7. Evaluation

In [10]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for xb, yb in test_loader:
        xb = xb.to(device, non_blocking=True)
        yb = yb.to(device, non_blocking=True)

        outputs = model(xb)
        predictions = (torch.sigmoid(outputs) > 0.5).float()

        correct += (predictions == yb).sum().item()
        total += yb.size(0)

accuracy = correct / total

## 8. Accuracy

In [11]:
print(f'Accuracy: {accuracy:.4f}')

Accuracy: 0.9543
