In [2]:
import pandas as pd
import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModel
from sklearn.metrics import f1_score, accuracy_score
from tqdm.auto import tqdm

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MODEL_NAME = "bert-base-uncased"
MAX_LEN = 64        # lyrics truncated/padded to 256 tokens
NUM_CLASSES = 2      # low / mid / high valence

ModuleNotFoundError: No module named 'torch'

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
df = pd.read_csv("/content/drive/MyDrive/muse_d.csv")

df["lyrics"] = df["lyrics"].astype(str)
df["valence_tags"] = pd.to_numeric(df["valence_tags"], errors="coerce")
df=df[["lyrics","valence_tags"]]

def valence_to_label(v):
    if v <= 5:   return int(0)          # low‐valence
    elif v > 5:  return int(1)          # high

df["label"] = df["valence_tags"].apply(valence_to_label)
df.dropna(inplace=True)
df.head(10)

  df = pd.read_csv("/content/drive/MyDrive/muse_d.csv")


Unnamed: 0,lyrics,valence_tags,label
0,Id stoop to that Sure I would Id stoop to that...,5.043333,1.0
1,Every breath you take and every move you make ...,5.14,1.0
2,I'm just an American boy Raised on MTV And I'v...,4.625,0.0
3,"Load up all your friends man, I'm tripping lik...",3.936667,0.0
4,"Summer comes, winter fades Here we are, just t...",5.2325,1.0
5,Spastic gyrations And abbreviated bathing suit...,4.865,0.0
6,"Cut him up, boy You've got to cut him up, boy ...",4.99,0.0
7,I don't believe in Jesus I don't believe in Hi...,4.22,0.0
8,"Scene One, Curtain up See the couple, coffee s...",5.086,1.0
9,Whatever you became Blame it on my fame Always...,4.115862,0.0


In [None]:
import re
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

def clean_lyrics(text):
    text = str(text)
    text = re.sub(r"\s+", " ", text)  # Collapse whitespace
    text = re.sub(r"’", "'", text)
    text = re.sub(r"[^a-zA-Z0-9',.!?()\s]", "", text)  # Keep basic punctuation
    return text.strip().lower()

def preprocess_train_val(df, val_size=0.2, random_state=42):
    df = df.dropna(subset=["lyrics", "label"]).copy()
    df["lyrics"] = df["lyrics"].apply(clean_lyrics)

    train_df, val_df = train_test_split(
        df,
        test_size=val_size,
        stratify=df["label"],
        random_state=random_state
    )

    return train_df, val_df

train_df, val_df= preprocess_train_val(df, val_size=0.1)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

class LyricsDataset(Dataset):
    def __init__(self, df):
        self.texts  = df["lyrics"].tolist()
        self.labels = df["label"].tolist()

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        encoding = tokenizer(
            self.texts[idx],
            truncation=True,
            max_length=MAX_LEN,
            padding="max_length",
            return_tensors="pt"
        )
        return {
            "input_ids":      encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "labels":         torch.tensor(self.labels[idx], dtype=torch.long)
        }



train_ds = LyricsDataset(train_df.reset_index(drop=True))
val_ds   = LyricsDataset(val_df.reset_index(drop=True))

In [None]:
BATCH_SIZE = 32

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False)

In [None]:
import torch
import torch.nn as nn
from transformers import AutoModel

class AttentionPooling(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.attn = nn.Sequential(
            nn.Linear(dim, 128),
            nn.Tanh(),
            nn.Linear(128, 1)
        )

    def forward(self, x):  # x: (B, L, D)
        weights = self.attn(x).squeeze(-1)             # (B, L)
        weights = torch.softmax(weights, dim=1)        # (B, L)
        pooled = torch.sum(x * weights.unsqueeze(-1), dim=1)  # (B, D)
        return pooled

class BertLyricsClassifier(nn.Module):
    def __init__(self, num_labels=NUM_CLASSES, model_name=MODEL_NAME, freeze_bert=False):
        super().__init__()
        self.bert = AutoModel.from_pretrained(model_name)
        if freeze_bert:
            for p in self.bert.parameters():
                p.requires_grad = False

        self.pool = AttentionPooling(768)
        self.norm = nn.LayerNorm(768)
        self.head = nn.Sequential(
            nn.Linear(768, 256),
            nn.GELU(),
            nn.Dropout(0.3),
            nn.Linear(256, num_labels)
        )

    def forward(self, input_ids, attention_mask):
        out = self.bert(input_ids=input_ids, attention_mask=attention_mask).last_hidden_state  # (B, L, 768)
        pooled = self.pool(out)       # (B, 768)
        pooled = self.norm(pooled)
        return self.head(pooled)      # logits


In [None]:
'''import torch
import torch.nn as nn
from transformers import AutoModel

class BertLyricsClassifier(nn.Module):
    def __init__(self, num_labels=NUM_CLASSES, model_name=MODEL_NAME, freeze_bert=False):
        super().__init__()
        self.bert = AutoModel.from_pretrained(model_name)
        if freeze_bert:
            for p in self.bert.parameters():
                p.requires_grad = False

        self.pool = nn.AdaptiveAvgPool1d(1)
        self.norm = nn.LayerNorm(768)
        self.head = nn.Sequential(
            nn.Linear(768, 256),
            nn.GELU(),
            nn.Dropout(0.3),
            nn.Linear(256, num_labels)
        )

    def forward(self, input_ids, attention_mask):
        out = self.bert(input_ids=input_ids, attention_mask=attention_mask).last_hidden_state  # (B, L, 768)
        out = out.permute(0, 2, 1)             # (B, 768, L)
        pooled = self.pool(out).squeeze(-1)    # (B, 768)
        pooled = self.norm(pooled)
        return self.head(pooled)               # logits'''

'import torch\nimport torch.nn as nn\nfrom transformers import AutoModel\n\nclass BertLyricsClassifier(nn.Module):\n    def __init__(self, num_labels=NUM_CLASSES, model_name=MODEL_NAME, freeze_bert=False):\n        super().__init__()\n        self.bert = AutoModel.from_pretrained(model_name)\n        if freeze_bert:\n            for p in self.bert.parameters():\n                p.requires_grad = False\n\n        self.pool = nn.AdaptiveAvgPool1d(1)\n        self.norm = nn.LayerNorm(768)\n        self.head = nn.Sequential(\n            nn.Linear(768, 256),\n            nn.GELU(),\n            nn.Dropout(0.3),\n            nn.Linear(256, num_labels)\n        )\n\n    def forward(self, input_ids, attention_mask):\n        out = self.bert(input_ids=input_ids, attention_mask=attention_mask).last_hidden_state  # (B, L, 768)\n        out = out.permute(0, 2, 1)             # (B, 768, L)\n        pooled = self.pool(out).squeeze(-1)    # (B, 768)\n        pooled = self.norm(pooled)\n        retur

In [None]:
'''class BertLyricsClassifier(nn.Module):
    def __init__(self, num_labels=NUM_CLASSES, freeze_bert=False):
        super().__init__()
        self.bert = AutoModel.from_pretrained(MODEL_NAME)
        if freeze_bert:
            for p in self.bert.parameters():
                p.requires_grad = False

        self.enc = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(
                d_model=768, nhead=12, dim_feedforward=2048, dropout=0.1
            ),
            num_layers=2
        )
        self.pool = nn.AdaptiveAvgPool1d(1)
        self.norm = nn.LayerNorm(768)
        self.head = nn.Sequential(
            nn.Linear(768, 256),
            nn.GELU(),
            nn.Dropout(0.3),
            nn.Linear(256, num_labels)
        )

    def forward(self, input_ids, attention_mask):
        out = self.bert(input_ids=input_ids, attention_mask=attention_mask).last_hidden_state
        out = self.enc(out)                     # (B, L, 768)
        out = out.permute(0, 2, 1)             # to (B, 768, L)
        pooled = self.pool(out).squeeze(-1)    # (B, 768)
        pooled = self.norm(pooled)
        return self.head(pooled)               # logits'''

'class BertLyricsClassifier(nn.Module):\n    def __init__(self, num_labels=NUM_CLASSES, freeze_bert=False):\n        super().__init__()\n        self.bert = AutoModel.from_pretrained(MODEL_NAME)\n        if freeze_bert:\n            for p in self.bert.parameters():\n                p.requires_grad = False\n\n        self.enc = nn.TransformerEncoder(\n            nn.TransformerEncoderLayer(\n                d_model=768, nhead=12, dim_feedforward=2048, dropout=0.1\n            ),\n            num_layers=2\n        )\n        self.pool = nn.AdaptiveAvgPool1d(1)\n        self.norm = nn.LayerNorm(768)\n        self.head = nn.Sequential(\n            nn.Linear(768, 256),\n            nn.GELU(),\n            nn.Dropout(0.3),\n            nn.Linear(256, num_labels)\n        )\n\n    def forward(self, input_ids, attention_mask):\n        out = self.bert(input_ids=input_ids, attention_mask=attention_mask).last_hidden_state\n        out = self.enc(out)                     # (B, L, 768)\n        o

In [None]:
model = BertLyricsClassifier(freeze_bert=False).to(DEVICE)

In [None]:
EPOCHS = 3

# Discriminative LRs: tiny for BERT, bigger for new layers
base_params   = [p for n,p in model.named_parameters() if n.startswith("bert")]
head_params   = [p for n,p in model.named_parameters() if not n.startswith("bert")]

optimizer = torch.optim.AdamW([
    {"params": base_params, "lr": 1e-5},
    {"params": head_params, "lr": 3e-5}
], weight_decay=1e-2)

criterion = nn.CrossEntropyLoss()
scheduler = torch.optim.lr_scheduler.LinearLR(
    optimizer, start_factor=1.0, end_factor=0.1, total_iters=EPOCHS*len(train_loader)
)

In [None]:
def evaluate():
    model.eval()
    val_loss, preds, truths = 0, [], []
    with torch.no_grad():
        for batch in val_loader:
            ids  = batch["input_ids"].to(DEVICE)
            mask = batch["attention_mask"].to(DEVICE)
            y    = batch["labels"].to(DEVICE)

            logits = model(ids, mask)
            loss   = criterion(logits, y)
            val_loss += loss.item() * ids.size(0)

            preds.extend(torch.argmax(logits, dim=1).cpu().tolist())
            truths.extend(y.cpu().tolist())

    val_loss /= len(val_loader.dataset)
    acc  = accuracy_score(truths, preds)
    f1   = f1_score(truths, preds, average="macro")
    return val_loss, acc, f1

for epoch in range(1, EPOCHS+1):
    model.train()
    epoch_loss = 0
    pbar = tqdm(train_loader, desc=f"Epoch {epoch}")
    for batch in pbar:
        ids  = batch["input_ids"].to(DEVICE)
        mask = batch["attention_mask"].to(DEVICE)
        y    = batch["labels"].to(DEVICE)

        logits = model(ids, mask)
        loss   = criterion(logits, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

        epoch_loss += loss.item() * ids.size(0)
        pbar.set_postfix({"train_loss": f"{loss.item():.4f}"})

    epoch_loss /= len(train_loader.dataset)
    val_loss, acc, f1 = evaluate()
    print(f"Epoch {epoch:02d} | "
          f"train_loss={epoch_loss:.4f} | val_loss={val_loss:.4f} "
          f"| val_acc={acc:.3f} | val_f1={f1:.3f}")


Epoch 1:   0%|          | 0/590 [00:00<?, ?it/s]

Epoch 01 | train_loss=0.6696 | val_loss=0.6464 | val_acc=0.635 | val_f1=0.629


Epoch 2:   0%|          | 0/590 [00:00<?, ?it/s]

Epoch 02 | train_loss=0.6202 | val_loss=0.6506 | val_acc=0.627 | val_f1=0.626


Epoch 3:   0%|          | 0/590 [00:00<?, ?it/s]

Epoch 03 | train_loss=0.5480 | val_loss=0.6531 | val_acc=0.644 | val_f1=0.640
