# Ling406 Sentiment Analysis Term Project
## Youngjun Yu

**Baseline System**: Simple DNN

**Algorithm Selection**: LSTM / CNN / Transformer

**Feature Engineering Experiment**: Random Embedding / GloVe Embedding (frozen) / GloVe Embedding (fine‑tuning) / TF‑IDF weighted Average Embeddings

**Dataset**:
- movie review (Pang/Lee 2004 polarity v2.0)
- Yelp review (over 3.5 ⇒ positive, under ⇒ negative) (for extra-crdit)

In [31]:
import os, re, glob, tarfile
from collections import Counter
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

In [32]:
# Hyperparameters

BATCH_SIZE = 64
EPOCHS = 5
LR = 1e-3
EMBED_DIM = 100
HIDDEN_DIM = 128
VOCAB_SIZE = 10000
MAX_SEQ_LEN = 200
DROPOUT_PROB = 0.5

In [33]:
# Tokenizer and Vocabulary

# Simple whitespace tokenizer 
def tokenize(text):
    return text.lower().split()

# Build vocabulary from a list of texts
def build_vocab(texts, max_size, min_freq=1):
    counter = Counter()
    for txt in texts:
        counter.update(tokenize(txt))
    most_common = []
    for w, f in counter.items():
        if f >= min_freq:
            most_common.append(w)
    most_common = sorted(most_common, key=lambda w: -counter[w])[:max_size]
    itos = ["<PAD>","<UNK>"] + most_common
    stoi = {w:i for i,w in enumerate(itos)}
    return stoi, itos

# Convert a text string into a list of token indices based on stoi mapping
def encode(text, stoi):
    tokens = tokenize(text)
    idxs = []
    for t in tokens:
        idx = stoi.get(t, stoi["<UNK>"])
        idxs.append(idx)
    idxs = idxs[:MAX_SEQ_LEN]
    if len(idxs) < MAX_SEQ_LEN:
        idxs += [stoi["<PAD>"]] * (MAX_SEQ_LEN - len(idxs))
    return idxs

In [34]:
# Data Preporcessing- Load Pang Lee Movie Reviews Dataset

# Unzip the data file, read the text file, and return it along with the labels
def load_panglee(base_tar: str = "../Data/review_polarity.tar.gz",
                extract_dir: str = "../Data/review_polarity"):
    if not os.path.isdir(extract_dir) or not os.listdir(extract_dir):
        os.makedirs(extract_dir, exist_ok=True)
        with tarfile.open(base_tar, "r:gz") as tar:
            tar.extractall(path=extract_dir)

    txt_dir = None
    for root, dirs, files in os.walk(extract_dir):
        if "txt_sentoken" in dirs:
            txt_dir = os.path.join(root, "txt_sentoken")
            break

    texts, labels = [], []
    for label in ["pos", "neg"]:
        pattern = os.path.join(txt_dir, label, "*.txt")
        for fn in glob.glob(pattern):
            with open(fn, encoding="utf-8") as f:
                texts.append(f.read())
                labels.append(1 if label == "pos" else 0)

    return texts, labels

# Split the dataset into training and testing sets 
movie_texts, movie_labels = load_panglee()
train_texts, test_texts, train_labels, test_labels = train_test_split(
    movie_texts,
    movie_labels,
    test_size=0.2,
    random_state=42,
    stratify=movie_labels
)
print("Movie reviews:", len(movie_texts))

Movie reviews: 2000


In [35]:
# Data Preporcessing- Load Yelp Dataset

# Read the all_reviews.txt file inside the ExtraCredit folder and return the reviews and labels
def load_yelp(path="../Data/ExtraCredit/Yelp/all_reviews.txt"):
    with open(path, encoding="utf-8") as f:
        data = f.read()
    matches = re.compile(
        r"\{\{\{\s*(\d)star\s*\}\}\}\s*\[\[\[\s*(.*?)\s*\]\]\]", 
        re.DOTALL
    ).findall(data)

    texts = [text.strip() for (_, text) in matches]
    labels = [1 if int(r)>=4 else 0 for (r, _) in matches]

    return texts, labels

# Split the dataset into training and testing sets
yelp_texts, yelp_labels = load_yelp()
y_train_texts, y_test_texts, y_train_labels, y_test_labels = train_test_split(
    yelp_texts,
    yelp_labels,
    test_size=0.2,
    random_state=42,
    stratify=yelp_labels
)
print("Yelp reviews:", len(yelp_texts), "Positives:", sum(yelp_labels), "Negatives:", len(yelp_labels)-sum(yelp_labels))

Yelp reviews: 10391 Positives: 6066 Negatives: 4325


In [36]:
# Generate Vocabulary

stoi, itos = build_vocab(movie_texts, max_size=VOCAB_SIZE)
print("Vocab size:", len(itos))

Vocab size: 10002


In [37]:
# Define Dataset and DataLoader

# Dataset class for tokenized text inputs and corresponding labels
class TextDataset(Dataset):
    def __init__(self, texts, labels, stoi):
        self.texts = texts
        self.labels = labels
        self.stoi  = stoi
    def __len__(self):
        return len(self.texts)
    def __getitem__(self, idx):
        x = torch.tensor(encode(self.texts[idx], self.stoi), dtype=torch.long)
        y = torch.tensor(self.labels[idx], dtype=torch.long)
        return x,y

train_ds = TextDataset(train_texts, train_labels, stoi)
test_ds  = TextDataset(test_texts,  test_labels,  stoi)
y_train_ds = TextDataset(y_train_texts, y_train_labels, stoi)
y_test_ds  = TextDataset(y_test_texts,  y_test_labels,  stoi)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False)
y_train_loader = DataLoader(y_train_ds, batch_size=BATCH_SIZE, shuffle=True)
y_test_loader  = DataLoader(y_test_ds,  batch_size=BATCH_SIZE, shuffle=False)

In [38]:
# Define Models

# DNN model with average pooled embeddings
class DNN(nn.Module):
    def __init__(self, vocab_size, emb_dim, hidden_dim, dropout):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, emb_dim, padding_idx=0)
        self.fc1 = nn.Linear(emb_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, 2)
        self.drop = nn.Dropout(dropout)
    def forward(self, x):
        pooled = self.embed(x).mean(dim=1)
        out = self.drop(torch.relu(self.fc1(pooled)))
        return self.fc2(out)

# Bidirectional LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, vocab_size, emb_dim, hidden_dim, dropout):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, emb_dim, padding_idx=0)
        self.lstm = nn.LSTM(emb_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_dim*2, 2)
        self.drop = nn.Dropout(dropout)
    def forward(self, x):
        emb, _ = self.lstm(self.embed(x))
        # Concatenate last forward and first backward hidden states
        out = self.drop(torch.cat([emb[:, -1, :self.lstm.hidden_size],
                         emb[:,  0, self.lstm.hidden_size:]], dim=1))
        return self.fc(out)

# CNN Model with multiple filter sizes
class CNNModel(nn.Module):
    def __init__(self, vocab_size, emb_dim, hidden_dim, dropout, kernel_sizes=[3,4,5], num_filters=100):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, emb_dim, padding_idx=0)
        # Create convolution layers with varying kernel sizes
        self.convs = nn.ModuleList([
            nn.Conv2d(1, num_filters, (k, emb_dim)) for k in kernel_sizes
        ])
        self.fc = nn.Linear(num_filters*len(kernel_sizes), 2)
        self.drop = nn.Dropout(dropout)
    def forward(self, x):
        emb = self.embed(x).unsqueeze(1)  # [B, 1, L, E]
        outs = [torch.relu(conv(emb)).squeeze(3) for conv in self.convs]
        pools = [torch.max(o, dim=2)[0] for o in outs]
        cat = self.drop(torch.cat(pools, dim=1))
        return self.fc(cat)

# Positional encoding used in Transformer
class PositionalEncoding(nn.Module):
    def __init__(self, emb_dim, max_len=5000):
        super().__init__()
        pe = torch.zeros(max_len, emb_dim)
        pos = torch.arange(0, max_len).unsqueeze(1)
        div = torch.exp(torch.arange(0, emb_dim, 2) * -(np.log(10000.0)/emb_dim))
        pe[:, 0::2] = torch.sin(pos*div)
        pe[:, 1::2] = torch.cos(pos*div)
        self.register_buffer('pe', pe.unsqueeze(0))
    def forward(self, x):
        return x + self.pe[:, :x.size(1)].to(x.device)

# Transformer Model for text classification
class TransformerModel(nn.Module):
    def __init__(self, vocab_size, emb_dim, hidden_dim, dropout, nhead=4, num_layers=2):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, emb_dim, padding_idx=0)
        self.pe = PositionalEncoding(emb_dim)
        # Define Encoder layer
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=emb_dim, nhead=nhead, dim_feedforward=hidden_dim, dropout=dropout, batch_first=True
        )
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers)
        self.fc = nn.Linear(emb_dim, 2)
    def forward(self, x):
        emb = self.pe(self.embed(x))
        out = self.encoder(emb).mean(dim=1)
        return self.fc(out)

In [40]:
# Define Training and Evaluation

# Train the model for one epoch
def train_epoch(model, loader, optimizer, criterion):
    model.train()
    total_loss = 0
    # Iterate over batches from the DataLoader
    for x,y in loader:
        x,y = x.to(device), y.to(device)
        optimizer.zero_grad()
        logits = model(x)
        loss = criterion(logits, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    # Return average loss for the epoch
    return total_loss/len(loader)

# Evaluate the model on test data
def eval_model(model, loader, criterion):
    model.eval()
    total_loss, correct = 0, 0
    # Disable gradient tracking during evaluation
    with torch.no_grad():
        for x,y in loader:
            x,y = x.to(device), y.to(device)
            logits = model(x)
            total_loss += criterion(logits, y).item()
            preds = logits.argmax(dim=1)
            correct += (preds==y).sum().item()
    # Return average loss and accuracy
    return total_loss/len(loader), correct/len(loader.dataset)

In [None]:
import time

# Train and Evaluate Models for Movie Reviews

# Define models
models = {
    "DNN": DNN(len(itos), EMBED_DIM, HIDDEN_DIM, DROPOUT_PROB),
    "LSTM": LSTMModel(len(itos), EMBED_DIM, HIDDEN_DIM, DROPOUT_PROB),
    "CNN": CNNModel(len(itos), EMBED_DIM, HIDDEN_DIM, DROPOUT_PROB),
    "Transformer": TransformerModel(len(itos), EMBED_DIM, HIDDEN_DIM, DROPOUT_PROB)
}

# Define the loss function for classification
criterion = nn.CrossEntropyLoss()

# Iterate over each model to train and evaluate
for name, model in models.items():
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=LR)
    start_time = time.time()
    # Train the model for EPOCHS
    for epoch in range(1, EPOCHS+1):
        train_loss = train_epoch(model, train_loader, optimizer, criterion)
    train_time = time.time() - start_time
    val_loss, val_acc = eval_model(model, test_loader, criterion)
    print(f"[PangLee] {name} — train_time: {train_time:.2f}s, loss: {val_loss:.3f}, acc: {val_acc:.3f}")

[PangLee] DNN — train_time: 2.10s, loss: 0.666, acc: 0.568
[PangLee] LSTM — train_time: 92.90s, loss: 0.807, acc: 0.542
[PangLee] CNN — train_time: 14.88s, loss: 0.676, acc: 0.570
[PangLee] Transformer — train_time: 73.59s, loss: 0.704, acc: 0.590


In [None]:
# Train and Evaluate Models for Yelp Reviews

# Iterate over each model to train and evaluate
for name, model in models.items():
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=LR)
    start_time = time.time()
    # Train the model for EPOCHS
    for epoch in range(1, EPOCHS+1):
        train_epoch(model, y_train_loader, optimizer, criterion)
    train_time = time.time() - start_time
    val_loss, val_acc = eval_model(model, y_test_loader, criterion)
    print(f"[Yelp] {name} — train_time: {train_time:.2f}s, loss={val_loss:.3f}, acc={val_acc:.3f}")

[Yelp] DNN — train_time: 5.43s, loss=0.431, acc=0.810
[Yelp] LSTM — train_time: 104.29s, loss=0.566, acc=0.767
[Yelp] CNN — train_time: 66.98s, loss=0.381, acc=0.830
[Yelp] Transformer — train_time: 364.74s, loss=0.461, acc=0.814


In [41]:
# Load pre-trained GloVe embeddings and initialize the embedding matrix for the model

import gensim.downloader as api
import torch

# Load 100-d GloVe vectors
wv = api.load("glove-wiki-gigaword-100")

vocab_size = len(itos)
emb_matrix = torch.randn(vocab_size, EMBED_DIM)
emb_matrix[0] = torch.zeros(EMBED_DIM)

# Replace random embeddings with GloVe vectors where available
for idx, token in enumerate(itos):
    if token in wv:
        emb_matrix[idx] = torch.tensor(wv[token])

model = DNN(vocab_size, EMBED_DIM, HIDDEN_DIM, DROPOUT_PROB).to(device)

In [42]:
# PangLee Movie Review Dataset Feature‐Engineering Experiments

from sklearn.feature_extraction.text import TfidfVectorizer

# Dataset class that stores TF-IDF features and labels as tensors
class FeatureDataset(Dataset):
    def __init__(self, features, labels):
        if not torch.is_tensor(features):
            features = torch.tensor(features, dtype=torch.float32)
        self.features = features
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return self.features.size(0)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

# DNN model that takes averaged document embeddings
class DNN_Avg(nn.Module):
    def __init__(self, emb_dim, hidden_dim, dropout):
        super().__init__()
        self.fc1  = nn.Linear(emb_dim, hidden_dim)
        self.drop = nn.Dropout(dropout)
        self.fc2  = nn.Linear(hidden_dim, 2)
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.drop(x)
        return self.fc2(x)

# Create a TF-IDF vectorizer and transform the training and test texts
vectorizer = TfidfVectorizer(vocabulary=itos)
tfidf_train = vectorizer.fit_transform(train_texts)
tfidf_test = vectorizer.transform(test_texts)
emb_matrix_np = emb_matrix.cpu().numpy()  

# Normalize and scale the embedding matrix
mu, sigma = emb_matrix.mean(), emb_matrix.std()
emb_matrix = (emb_matrix - mu) / (sigma + 1e-9)    
emb_matrix *= 0.01               
model.embed.weight.data.copy_(emb_matrix)

# Compute document embeddings
doc_emb_train = tfidf_train.dot(emb_matrix_np)
doc_emb_test = tfidf_test.dot(emb_matrix_np)

# Create datasets and dataloaders for TF-IDF features
tfidf_train_ds = FeatureDataset(doc_emb_train, train_labels)
tfidf_test_ds = FeatureDataset(doc_emb_test,  test_labels)
tfidf_train_loader = DataLoader(tfidf_train_ds, batch_size=BATCH_SIZE, shuffle=True)
tfidf_test_loader = DataLoader(tfidf_test_ds,  batch_size=BATCH_SIZE, shuffle=False)

results_feat = {}

configs = {
    "rand-emb": {"type":"emb","init":"random","freeze":False},
    "glove-fz": {"type":"emb","init":"glove100d","freeze":True},
    "glove-ft": {"type":"emb","init":"glove100d","freeze":False},
    "tfidf-avg":{"type":"tfidf-emb"}
}

# Run experiment for each configuration
for name, cfg in configs.items():
    print(f"\n=== PangLee Experiment: {name} ===")
    if cfg["type"] == "emb":
        model = DNN(len(itos), EMBED_DIM, HIDDEN_DIM, DROPOUT_PROB).to(device)
        if cfg["init"] == "glove100d":
            model.embed.weight.data.copy_(emb_matrix)
        if cfg["freeze"]:
            model.embed.weight.requires_grad = False
        else:
            model.embed.weight.requires_grad = True
        train_ld, test_ld = train_loader, test_loader
    else:
        model = DNN_Avg(EMBED_DIM, HIDDEN_DIM, DROPOUT_PROB).to(device)
        train_ld, test_ld = tfidf_train_loader, tfidf_test_loader

    optimizer = optim.Adam(
        filter(lambda p: p.requires_grad, model.parameters()), lr=LR
    )
    
    start = time.time()
    for epoch in range(1, EPOCHS+1):
        train_epoch(model, train_ld, optimizer, criterion)
    elapsed = time.time() - start

    loss, acc = eval_model(model, test_ld, criterion)
    print(f"{name}: train_time={elapsed:.2f}s, loss={loss:.3f}, acc={acc:.3f}")
    results_feat[name] = {"time": elapsed, "loss": loss, "acc": acc}

print("\nPangLee Feature‑Engineering Summary:")
for name, res in results_feat.items():
    print(f"{name}: time={res['time']:.2f}s, acc={res['acc']:.3f}")


=== PangLee Experiment: rand-emb ===
rand-emb: train_time=2.43s, loss=0.668, acc=0.600

=== PangLee Experiment: glove-fz ===
glove-fz: train_time=1.86s, loss=0.693, acc=0.500

=== PangLee Experiment: glove-ft ===
glove-ft: train_time=2.22s, loss=0.606, acc=0.693

=== PangLee Experiment: tfidf-avg ===
tfidf-avg: train_time=0.16s, loss=0.563, acc=0.708

PangLee Feature‑Engineering Summary:
rand-emb: time=2.43s, acc=0.600
glove-fz: time=1.86s, acc=0.500
glove-ft: time=2.22s, acc=0.693
tfidf-avg: time=0.16s, acc=0.708


In [None]:
# Yelp Dataset Feature‐Engineering Experiments

vectorizer_y = TfidfVectorizer(vocabulary=itos)
tfidf_y_train = vectorizer_y.fit_transform(y_train_texts)
tfidf_y_test = vectorizer_y.transform(y_test_texts)

emb_matrix_np = emb_matrix.cpu().numpy()

doc_emb_y_train = tfidf_y_train.dot(emb_matrix_np)
doc_emb_y_test = tfidf_y_test.dot(emb_matrix_np)

y_tfidf_train_ds = FeatureDataset(doc_emb_y_train, y_train_labels)
y_tfidf_test_ds = FeatureDataset(doc_emb_y_test,  y_test_labels)
y_tfidf_train_loader = DataLoader(y_tfidf_train_ds, batch_size=BATCH_SIZE, shuffle=True)
y_tfidf_test_loader = DataLoader(y_tfidf_test_ds,  batch_size=BATCH_SIZE, shuffle=False)

results_yelp = {}

for name, cfg in configs.items():
    print(f"\n=== Yelp Experiment: {name} ===")
    
    if cfg["type"] == "emb":
        model = DNN(len(itos), EMBED_DIM, HIDDEN_DIM, DROPOUT_PROB).to(device)
        if cfg["init"] == "glove100d":
            model.embed.weight.data.copy_(emb_matrix)
        if cfg["freeze"]:
            model.embed.weight.requires_grad = False
        else:
            model.embed.weight.requires_grad = True
        train_ld, test_ld = y_train_loader, y_test_loader
    else:
        model = DNN_Avg(EMBED_DIM, HIDDEN_DIM, DROPOUT_PROB).to(device)
        train_ld, test_ld = y_tfidf_train_loader, y_tfidf_test_loader

    optimizer = optim.Adam(
        filter(lambda p: p.requires_grad, model.parameters()), 
        lr=LR
    )
    
    start = time.time()
    for epoch in range(1, EPOCHS+1):
        train_epoch(model, train_ld, optimizer, criterion)
    elapsed = time.time() - start
    
    loss, acc = eval_model(model, test_ld, criterion)
    print(f"{name}: train_time={elapsed:.2f}s, loss={loss:.3f}, acc={acc:.3f}")
    results_yelp[name] = {"time": elapsed, "loss": loss, "acc": acc}

print("\nYelp Feature‑Engineering Summary:")
for name, res in results_yelp.items():
    print(f"{name}: time={res['time']:.2f}s, acc={res['acc']:.3f}")



=== Yelp Experiment: rand-emb ===
rand-emb: train_time=6.08s, loss=0.452, acc=0.792

=== Yelp Experiment: glove-fz ===
glove-fz: train_time=3.19s, loss=0.664, acc=0.605

=== Yelp Experiment: glove-ft ===
glove-ft: train_time=5.56s, loss=0.392, acc=0.837

=== Yelp Experiment: tfidf-avg ===
tfidf-avg: train_time=0.85s, loss=0.546, acc=0.729

Yelp Feature‑Engineering Summary:
rand-emb: time=6.08s, acc=0.792
glove-fz: time=3.19s, acc=0.605
glove-ft: time=5.56s, acc=0.837
tfidf-avg: time=0.85s, acc=0.729


In [44]:
# Improved System: Transformer + GloVe‑ft

# Initialize TransformerModel with GloVe‑fine-tuning
model = TransformerModel(
    vocab_size=len(itos),
    emb_dim=EMBED_DIM,
    hidden_dim=HIDDEN_DIM,
    dropout=DROPOUT_PROB,
    nhead=4,
    num_layers=2
).to(device)

# Copy precomputed GloVe emb_matrix and enable fine-tuning
model.embed.weight.data.copy_(emb_matrix)
model.embed.weight.requires_grad = True

# Optimizer and criterion
optimizer = optim.Adam(model.parameters(), lr=LR)
criterion = nn.CrossEntropyLoss()

# Train and time on movie reviews and Evaluate
EPOCHS = 20
start_time = time.time()
for epoch in range(1, EPOCHS+1):
    train_loss = train_epoch(model, train_loader, optimizer, criterion)
train_time = time.time() - start_time
val_loss, val_acc = eval_model(model, test_loader, criterion)
print(f"Transformer+GloVe‑ft on Movie Reviews: "
      f"train_time={train_time:.2f}s, loss={val_loss:.3f}, acc={val_acc:.3f}")

# Repeat on Yelp Reviews
EPOCHS = 10
start_time = time.time()
for epoch in range(1, EPOCHS+1):
    train_epoch(model, y_train_loader, optimizer, criterion)
y_train_time = time.time() - start_time

y_loss, y_acc = eval_model(model, y_test_loader, criterion)
print(f"Transformer+GloVe‑ft on Yelp Reviews: "
      f"train_time={y_train_time:.2f}s, loss={y_loss:.3f}, acc={y_acc:.3f}")

Transformer+GloVe‑ft on Movie Reviews: train_time=306.12s, loss=2.092, acc=0.675
Transformer+GloVe‑ft on Yelp Reviews: train_time=724.93s, loss=0.644, acc=0.818


In [46]:
# Error Analysis on Movie Review: Find 5 Misclassified Examples

model.eval()
misclassified = []

with torch.no_grad():
    for i, text in enumerate(test_texts):
        # Encode and move to device
        x = torch.tensor(encode(text, stoi), dtype=torch.long).unsqueeze(0).to(device)
        logits = model(x)
        pred = logits.argmax(dim=1).item()
        true = test_labels[i]
        if pred != true:
            misclassified.append((i, text, true, pred))
        if len(misclassified) >= 5:
            break

# Print the first 5 misclassified examples
for idx, (i, text, true, pred) in enumerate(misclassified, 1):
    print(f"Example {idx}")
    print(f"Index: {i}")
    print(f"True label: {'POS' if true==1 else 'NEG'}")
    print(f"Predicted label: {'POS' if pred==1 else 'NEG'}")
    print("Text snippet:")
    print(text.replace("\n"," "), "...\n")

Example 1
Index: 1
True label: NEG
Predicted label: POS
Text snippet:
 " spawn " features good guys , bad guys , lots of fighting , bloody violence , a leather-clad machine gun chick , gooey , self-healing bullet holes , scatological humor and a man-eating monster .  it not only appears to have been tailor made for a swarm of 12- and 13-year-old boys , it appears to have been made by them .  in a classic example of telling and not showing , " spawn " opens with a truckload of mumbo jumbo about forces of darkness , forces of light and how " men are the ones who create evil on earth . "  so much for a message .  the movie then lurches forward into the plight of al simmons ( michael jai white ) , a government assassin/operative who is murdered by diabolical boss jason wynn ( martin sheen , who plays all of his scenes like an oscar clip ) while on a top secret mission in a north korean biological weapons plant .  simmons goes to hell and back , after making a deal with satan himself -- if 

In [28]:
# Error Analysis on Yelp Reviews: Find 5 Misclassified Examples

model.eval()
misclassified_yelp = []

with torch.no_grad():
    for i, text in enumerate(y_test_texts):
        # Encode text and move to device
        x = torch.tensor(encode(text, stoi), dtype=torch.long).unsqueeze(0).to(device)
        logits = model(x)
        pred = logits.argmax(dim=1).item()
        true = y_test_labels[i]
        if pred != true:
            misclassified_yelp.append((i, text, true, pred))
        if len(misclassified_yelp) >= 5:
            break

# Print the first 5 misclassified examples
for idx, (i, text, true, pred) in enumerate(misclassified_yelp, 1):
    print(f"Example {idx}")
    print(f"Index: {i}")
    print(f"True label: {'POSITIVE' if true==1 else 'NEGATIVE'}")
    print(f"Predicted label: {'POSITIVE' if pred==1 else 'NEGATIVE'}")
    print("Text snippet:")
    print(text.replace("\n"," "), "...\n")

Example 1
Index: 0
True label: NEGATIVE
Predicted label: POSITIVE
Text snippet:
Severely overrated place. Their sugar-coated corn pancake topped with green onions was the most confusing appetizer I'd ever had. Their side dishes lack variety and freshness, chicken cutlet from their bentos was awfully hard, and their dak-bokkum-tang was a disaster. I tried here because my favorite teacher was a friend of the manager, but sorry I'm not going there again. I get why people like it though; a good place to start if you have zero tolerance for the authentic savory Korean flavor and all you need is an instagram photo of 'exotic' food. ...

Example 2
Index: 6
True label: NEGATIVE
Predicted label: POSITIVE
Text snippet:
I thought this was going to be the Za's from a couple years back but it is not. They took a proven concept and have butchered it with subpar ingredients and the use of a microwave. They used to cook the pasta with the sauce along with the ingredients which led to a great product t