In [18]:
import torch
if torch.cuda.is_available():
    print("GPU is available!")
    device = torch.device("cuda")
else:
    print("GPU not detected. Check your CUDA installation.")

import os
import pandas as pd
import numpy as np
import bz2
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW, get_scheduler, TrainingArguments, Trainer, EarlyStoppingCallback, TrainerCallback, pipeline, set_seed, BertModel
from transformers import BertForSequenceClassification, BertTokenizer

from huggingface_hub.inference_api import InferenceApi
from datasets import load_dataset, Dataset

from torch.utils.data import DataLoader
import torch.nn as nn

import sqlite3 
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.utils.class_weight import compute_class_weight
import optuna


from customhead import CustomClassificationHead
import tensorboardX
import gc

from transformers import AutoTokenizer, AutoModelForCausalLM

import matplotlib.pyplot as plt




GPU is available!


In [None]:
# API_TOKEN = 
# os.environ["HF_TOKEN"] = ""

In [20]:
#data loading block

df = pd.read_csv("movies.csv")

df = df.dropna()



def categorize_rating(rating):
    if rating <= 3:
        return 0  # Negative
    elif 4 <= rating <= 6:
        return 1  # Neutral
    else:
        return 2  # Positive
    
df['labels'] = df['RATING'].apply(categorize_rating)


#  Debug: Check label distribution
print("Unique labels in dataset:", df["labels"].unique())  # Should output [0,1,2]

# 

Unique labels in dataset: [1 2 0]


In [21]:
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=3)  # If labels are in range [0-9]


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [22]:
def freeze_bert_layers(model, freeze_percent=50):
    """
    Freezes the first 'freeze_percent' of BERT layers while leaving the rest trainable.
    
    Args:
    - model: Pretrained BERT model.
    - freeze_percent: Percentage of layers to freeze (0-100).
    """
    total_layers = len(model.bert.encoder.layer)  # Total transformer layers (12 for BERT-base)
    num_freeze = int((freeze_percent / 100) * total_layers)  # Number of layers to freeze

    # Freeze embeddings layer (always)
    for param in model.bert.embeddings.parameters():
        param.requires_grad = False
    
    # Freeze the first 'num_freeze' layers
    for layer in model.bert.encoder.layer[:num_freeze]:
        for param in layer.parameters():
            param.requires_grad = False
    
    print(f"Frozen {num_freeze}/{total_layers} encoder layers ({freeze_percent}%).")


class IMDbDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=512):
        self.texts = list(texts)  
        self.labels = [int(label) for label in labels]  # Ensure labels are integers
        self.tokenizer = tokenizer
        self.max_length = max_length

        # Debugging: Print first few samples
        print("[DEBUG] First 3 items in dataset:")
        for i in range(min(3, len(self.texts))):
            print(f"Text {i}: {self.texts[i]} | Label: {self.labels[i]} | Type: {type(self.labels[i])}")

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]

        # Debugging
        if idx < 5:
            print(f"[DEBUG] Fetching index {idx}: Text={text}, Label={label}, Type={type(label)}")

        encoding = self.tokenizer(
            text,
            padding="max_length",
            truncation=True,
            max_length=self.max_length,
            return_tensors="pt"
        )

        return (
            encoding["input_ids"].squeeze(0),
            encoding["attention_mask"].squeeze(0),
            torch.tensor(label, dtype=torch.long)  # Ensure it's a `long` tensor
        )


** Using Only BERT for Classification: fine tunes the entire BERT model directly, with varying percentages of frozen layers, trained end to end using our dataset. This model already includes a classification head (a linear layer on top of BERT’s [CLS] token representation), and you fine-tune the entire model.**

In [None]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.utils.class_weight import compute_class_weight

# ✅ Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ✅ Load dataset and drop missing values
df = pd.read_csv("movies.csv").dropna()

# ✅ Convert RATING into 3-Class Labels
def categorize_rating(rating):
    if rating <= 3:
        return 0  # Negative
    elif 4 <= rating <= 6:
        return 1  # Neutral
    else:
        return 2  # Positive

df['labels'] = df['RATING'].apply(categorize_rating)

# ✅ Debug: Check label distribution
print("Unique labels in dataset:", df["labels"].unique())  # Should output [0,1,2]

# ✅ Split dataset into train and test sets
train_texts, test_texts, train_labels, test_labels = train_test_split(
    df['REVIEW'].tolist(), df['labels'].tolist(), test_size=0.2, random_state=42
)

# ✅ Load BERT tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# ✅ Define Dataset Class
class IMDbDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=512):
        self.texts = list(texts)  
        self.labels = [int(label) for label in labels]  # 🔥 Ensure labels are integers
        self.tokenizer = tokenizer
        self.max_length = max_length

        # ✅ Debugging: Print first few samples
        print("[DEBUG] First 3 items in dataset:")
        for i in range(min(3, len(self.texts))):
            print(f"Text {i}: {self.texts[i]} | Label: {self.labels[i]} | Type: {type(self.labels[i])}")

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]

        # ✅ Debugging
        if idx < 5:
            print(f"[DEBUG] Fetching index {idx}: Text={text}, Label={label}, Type={type(label)}")

        encoding = self.tokenizer(
            text,
            padding="max_length",
            truncation=True,
            max_length=self.max_length,
            return_tensors="pt"
        )

        return (
            encoding["input_ids"].squeeze(0),
            encoding["attention_mask"].squeeze(0),
            torch.tensor(label, dtype=torch.long)  # 🔥 Ensure it's a `long` tensor
        )

# ✅ Create Dataset Instances
train_dataset = IMDbDataset(train_texts, train_labels, tokenizer)
test_dataset = IMDbDataset(test_texts, test_labels, tokenizer)

# ✅ Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# ✅ Function to Freeze BERT Layers
def freeze_bert_layers(model, freeze_percent=50):
    total_layers = len(model.bert.encoder.layer)  # Total transformer layers (12 for BERT-base)
    num_freeze = int((freeze_percent / 100) * total_layers)  # Number of layers to freeze

    # 🔒 Always freeze embeddings layer
    for param in model.bert.embeddings.parameters():
        param.requires_grad = False
    
    # 🔒 Freeze first 'num_freeze' layers
    for layer in model.bert.encoder.layer[:num_freeze]:
        for param in layer.parameters():
            param.requires_grad = False
    
    print(f"✅ Frozen {num_freeze}/{total_layers} encoder layers ({freeze_percent}%).")

# ✅ Freezing Percentages to Test
freeze_pcts = [0, 25, 50, 75, 100]
results = []  # Store results for comparison

# ✅ Loop Over Freezing Percentages
for pct in freeze_pcts:
    print(f"\n🔹 Training with {pct}% of BERT layers frozen.")

    # ✅ Load Fresh BERT Model for Each Experiment
    model = BertForSequenceClassification.from_pretrained(
        "bert-base-uncased", 
        num_labels=3,  # ✅ Match it with 3-class classification
        hidden_dropout_prob=0.3,  
        attention_probs_dropout_prob=0.3
    ).to(device)

    # ✅ Freeze Layers Based on Percentage
    freeze_bert_layers(model, freeze_percent=pct)

    # ✅ Define Optimizer
    optimizer = AdamW(model.parameters(), lr=3e-5)

    # ✅ Compute Class Weights Only If Using 3-Class Labels
    class_weights = compute_class_weight('balanced', classes=np.unique(train_labels), y=train_labels)
    class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)
    loss_fn = nn.CrossEntropyLoss(weight=class_weights)

    # ✅ Training Loop
    num_epochs = 10
    best_accuracy = 0
    patience = 3  # Stop training if accuracy doesn't improve for 3 epochs
    counter = 0

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0

        for input_ids, attention_mask, labels in train_loader:
            input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device).long()

            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask=attention_mask)
            loss = loss_fn(outputs.logits, labels)

            loss.backward()
            optimizer.step()

            total_loss += loss.item()
        
        avg_loss = total_loss / len(train_loader)
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}")

        # ✅ Evaluate Every Epoch
        model.eval()
        predictions, true_labels = [], []
        with torch.no_grad():
            for input_ids, attention_mask, labels in test_loader:
                input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)

                outputs = model(input_ids, attention_mask=attention_mask)
                preds = torch.argmax(outputs.logits, dim=1)

                predictions.extend(preds.cpu().numpy())
                true_labels.extend(labels.cpu().numpy())

        accuracy = accuracy_score(true_labels, predictions)
        print(f"Test Accuracy: {accuracy:.4f}")

        # ✅ Early Stopping: Save Best Model
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            counter = 0
            torch.save(model.state_dict(), f"best_model_{pct}.pth")
        else:
            counter += 1
            if counter >= patience:
                print("Early stopping triggered.")
                break  

    # ✅ Store Results for Comparison
    results.append((pct, best_accuracy))

# ✅ Compare Results
print("\n📈 Final Results:")
for pct, acc in results:
    print(f"Frozen {pct}% Layers → Accuracy: {acc:.4f}")

# ✅ Plot Accuracy vs. Frozen Layers
freeze_pcts, accuracies = zip(*results)

plt.plot(freeze_pcts, accuracies, marker='o', linestyle='-')
plt.xlabel("Frozen Layers (%)")
plt.ylabel("Test Accuracy")
plt.title("Impact of Freezing BERT Layers on Accuracy")
plt.show()


Unique labels in dataset: [1 2 0]


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[DEBUG] First 3 items in dataset:
Text 0: I was expecting this movie to be bad, but in a good way, if you get me. But, boy, this is an embarassement. No fun, no real action. The thing is painfull. I d give it a ZERO if possible. Worst movie ever. | Label: 0 | Type: <class 'int'>
Text 1: Father Peter (Guy Pearce ) is an exorcist who smokes and swears. He drives a car that needs a muffler. Peter was trained by the late Father Louis (Keith David) who died on his last exorcism as well as the boy who was possessed. He is assigned Father Daniel as an intern exorcist who is immediately thrust into the lion's den. They are working on a kid named Charley (pencils are involved later) who once played with a Ouija Board at a skating rink.The film has a twist which Charley gives away. It was okay if you have never seen an exorcist film before.Guide: F-word. No sex or nudity. | Label: 1 | Type: <class 'int'>
Text 2: I never write reviews. This movie was so bad I had to say something. This year the a



[DEBUG] Fetching index 1: Text=Father Peter (Guy Pearce ) is an exorcist who smokes and swears. He drives a car that needs a muffler. Peter was trained by the late Father Louis (Keith David) who died on his last exorcism as well as the boy who was possessed. He is assigned Father Daniel as an intern exorcist who is immediately thrust into the lion's den. They are working on a kid named Charley (pencils are involved later) who once played with a Ouija Board at a skating rink.The film has a twist which Charley gives away. It was okay if you have never seen an exorcist film before.Guide: F-word. No sex or nudity., Label=1, Type=<class 'int'>
[DEBUG] Fetching index 3: Text=It's not a fast paced action film, it's a beautifully slow, gorgeous film that covers a real-life story. For anyone who likes history, or archaeology, this is absolutely fascinating. It's so nice to see films like this these days when most are action or thriller., Label=2, Type=<class 'int'>
[DEBUG] Fetching index 0: Tex

**Use BERT to Encode Reviews and Use an MLP on Top. Here, BERT is used as a feature extractor. Instead of training it end-to-end, we extract embeddings and pass them to an MLP for classification.**

- Use a pre-trained BERT model (without its classification head).
- Extract [CLS] token representations as features.
- Pass these embeddings to an MLP classifier.

In [None]:
class MLPClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_labels):
        super(MLPClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.3)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim // 2)  # Second hidden layer
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.3)
        self.fc3 = nn.Linear(hidden_dim // 2, num_labels)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        x = self.fc3(x)
        return x

    
def extract_features(text_list, tokenizer, model, batch_size=4):  # Reduce batch size
    model.eval()
    features = []

    for i in range(0, len(text_list), batch_size):
        batch_texts = text_list[i:i+batch_size]

        try:
            inputs = tokenizer(batch_texts, padding=True, truncation=True, return_tensors="pt")
            inputs = {key: val.to(device) for key, val in inputs.items()}

            with torch.no_grad():
                outputs = model(**inputs)

            batch_features = outputs.last_hidden_state[:, 0, :].to(device)
            features.append(batch_features)

        except RuntimeError as e:
            if "CUDA out of memory" in str(e):
                print(f"[ERROR] CUDA OOM at batch {i//batch_size + 1}, trying smaller batch size...")
                return None  # Return None so you can handle failure outside

            else:
                raise e  # Re-raise any other error

    return torch.cat(features, dim=0)


In [None]:
gc.collect()
torch.cuda.empty_cache()
print("Before model init:")
print("Memory allocated:", torch.cuda.memory_allocated())
print("Memory reserved:", torch.cuda.memory_reserved())


Before model init:
Memory allocated: 0
Memory reserved: 0


In [None]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from transformers import BertTokenizer, BertModel, AdamW
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.utils.class_weight import compute_class_weight
import collections

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load dataset and drop missing values
df = pd.read_csv("movies.csv").dropna()

# Convert RATING into 3-Class Labels
def categorize_rating(rating):
    if rating <= 3:
        return 0  # Negative
    elif 4 <= rating <= 6:
        return 1  # Neutral
    else:
        return 2  # Positive

df['labels'] = df['RATING'].apply(categorize_rating)

# Split dataset into train and test sets
train_texts, test_texts, train_labels, test_labels = train_test_split(
    df['REVIEW'].tolist(), df['labels'].tolist(), test_size=0.2, random_state=42
)

# label_counts = collections.Counter(train_labels.numpy())
# Load BERT tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Define MLP Classifier
class MLPClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_labels):
        super(MLPClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, num_labels)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Function to Freeze BERT Layers
def freeze_bert_layers(model, freeze_percent=50):
    total_layers = len(model.encoder.layer)
    num_freeze = int((freeze_percent / 100) * total_layers)

    for param in model.embeddings.parameters():
        param.requires_grad = False

    for layer in model.encoder.layer[:num_freeze]:
        for param in layer.parameters():
            param.requires_grad = False

    print(f"Frozen {num_freeze}/{total_layers} encoder layers ({freeze_percent}%).")

# Function to Extract Features from BERT
def extract_features(text_list, tokenizer, model, batch_size=4):
    model.eval()
    features = []

    for i in range(0, len(text_list), batch_size):
        batch_texts = text_list[i:i+batch_size]

        inputs = tokenizer(batch_texts, padding=True, truncation=True, return_tensors="pt")
        inputs = {key: val.to(device) for key, val in inputs.items()}

        with torch.no_grad():
            outputs = model(**inputs)

        batch_features = outputs.last_hidden_state[:, 0, :].to(device)
        features.append(batch_features)

    return torch.cat(features, dim=0)

torch.cuda.empty_cache()
with torch.no_grad():
    bert = BertModel.from_pretrained("bert-base-uncased").to(device)  # Load BERT once
    train_features = extract_features(train_texts, tokenizer, bert)
    test_features = extract_features(test_texts, tokenizer, bert)

    train_labels = torch.tensor(train_labels, dtype=torch.long, device=device)
    test_labels = torch.tensor(test_labels, dtype=torch.long, device=device)

torch.cuda.empty_cache()  # Free up memory

# Freezing Percentages to Test
freeze_pcts = [0, 25, 50, 75, 100]
results = []  # Store results for comparison

# Loop Over Freezing Percentages
for pct in freeze_pcts:
    print(f"\nTraining with {pct}% of BERT layers frozen.")

    # Reload a fresh BERT model for each experiment
    bert = BertModel.from_pretrained("bert-base-uncased").to(device)
    freeze_bert_layers(bert, freeze_percent=pct)

    # Define a fresh MLP model
    mlp = MLPClassifier(input_dim=768, hidden_dim=256, num_labels=3).to(device)

    # Define Optimizer
    optimizer = AdamW(mlp.parameters(), lr=1e-5)

    # Compute Class Weights
    class_weights = compute_class_weight('balanced', classes=np.unique(train_labels.cpu().numpy()), y=train_labels.cpu().numpy())
    class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)
    loss_fn = nn.CrossEntropyLoss(weight=class_weights)

    # Training Loop
    num_epochs = 10
    best_accuracy = 0
    patience = 3
    counter = 0

    for epoch in range(num_epochs):
        mlp.train()
        optimizer.zero_grad()

        outputs = mlp(train_features)
        loss = loss_fn(outputs, train_labels)
        loss.backward()
        optimizer.step()

        print(f"Epoch {epoch+1}, Loss: {loss.item()}")

        # Evaluate Every Epoch
        with torch.no_grad():
            mlp.eval()
            predictions = mlp(test_features)
            preds = torch.argmax(predictions, dim=1)
            accuracy = accuracy_score(test_labels.cpu().numpy(), preds.cpu().numpy())

        print(f"Test Accuracy: {accuracy:.4f}")

        if accuracy > best_accuracy:
            best_accuracy = accuracy
            counter = 0
            torch.save(mlp.state_dict(), f"best_model_{pct}.pth")
        else:
            counter += 1
            if counter >= patience:
                print("Early stopping triggered.")
                break  

    results.append((pct, best_accuracy))

print("\nFinal Results:", results)



Training with 0% of BERT layers frozen.
Frozen 0/12 encoder layers (0%).
Epoch 1, Loss: 1.1053599119186401
Test Accuracy: 0.3796
Epoch 2, Loss: 1.1046231985092163
Test Accuracy: 0.3805
Epoch 3, Loss: 1.1038827896118164
Test Accuracy: 0.3824
Epoch 4, Loss: 1.103144645690918
Test Accuracy: 0.3927
Epoch 5, Loss: 1.1024106740951538
Test Accuracy: 0.3964
Epoch 6, Loss: 1.1016817092895508
Test Accuracy: 0.3936
Epoch 7, Loss: 1.1009575128555298
Test Accuracy: 0.3955
Epoch 8, Loss: 1.1002378463745117
Test Accuracy: 0.3955
Early stopping triggered.

Training with 25% of BERT layers frozen.




Frozen 3/12 encoder layers (25%).
Epoch 1, Loss: 1.1165156364440918
Test Accuracy: 0.4142
Epoch 2, Loss: 1.1152135133743286
Test Accuracy: 0.4142
Epoch 3, Loss: 1.1139253377914429
Test Accuracy: 0.4142
Epoch 4, Loss: 1.1126583814620972
Test Accuracy: 0.4133
Early stopping triggered.

Training with 50% of BERT layers frozen.




Frozen 6/12 encoder layers (50%).
Epoch 1, Loss: 1.1032973527908325
Test Accuracy: 0.3768
Epoch 2, Loss: 1.1023502349853516
Test Accuracy: 0.3758
Epoch 3, Loss: 1.1014057397842407
Test Accuracy: 0.3758
Epoch 4, Loss: 1.1004703044891357
Test Accuracy: 0.3777
Epoch 5, Loss: 1.0995451211929321
Test Accuracy: 0.3768
Epoch 6, Loss: 1.098631739616394
Test Accuracy: 0.3739
Epoch 7, Loss: 1.0977303981781006
Test Accuracy: 0.3777
Early stopping triggered.

Training with 75% of BERT layers frozen.




Frozen 9/12 encoder layers (75%).
Epoch 1, Loss: 1.118080973625183
Test Accuracy: 0.3543
Epoch 2, Loss: 1.1169896125793457
Test Accuracy: 0.3552
Epoch 3, Loss: 1.115904450416565
Test Accuracy: 0.3552
Epoch 4, Loss: 1.1148324012756348
Test Accuracy: 0.3552
Epoch 5, Loss: 1.1137744188308716
Test Accuracy: 0.3543
Early stopping triggered.

Training with 100% of BERT layers frozen.




Frozen 12/12 encoder layers (100%).
Epoch 1, Loss: 1.109156847000122
Test Accuracy: 0.2502
Epoch 2, Loss: 1.1084381341934204
Test Accuracy: 0.2512
Epoch 3, Loss: 1.1077128648757935
Test Accuracy: 0.2530
Epoch 4, Loss: 1.1069871187210083
Test Accuracy: 0.2568
Epoch 5, Loss: 1.1062629222869873
Test Accuracy: 0.2587
Epoch 6, Loss: 1.1055408716201782
Test Accuracy: 0.2596
Epoch 7, Loss: 1.1048216819763184
Test Accuracy: 0.2615
Epoch 8, Loss: 1.1041057109832764
Test Accuracy: 0.2727
Epoch 9, Loss: 1.1033928394317627
Test Accuracy: 0.2802
Epoch 10, Loss: 1.1026840209960938
Test Accuracy: 0.2849

Final Results: [(0, 0.3964386129334583), (25, 0.41424554826616683), (50, 0.3776944704779756), (75, 0.35520149953139646), (100, 0.2849109653233365)]




In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

# TF-IDF Vectorizer
vectorizer = TfidfVectorizer(max_features=5000)  # Adjust max features
X_train_tfidf = vectorizer.fit_transform(train_texts)
X_test_tfidf = vectorizer.transform(test_texts)

# MLP Classifier
mlp = MLPClassifier(hidden_layer_sizes=(256, 128), max_iter=10, activation="relu", solver="adam", random_state=42)
mlp.fit(X_train_tfidf, train_labels.cpu().numpy())

# Evaluate
test_acc = mlp.score(X_test_tfidf, test_labels.cpu().numpy())
print(f"MLP Accuracy: {test_acc:.4f}")


MLP Accuracy: 0.7413


