In [16]:
import pandas as pd
df = pd.read_csv("/home/kandarpa-sarkar/Downloads/text.csv")

In [17]:
df.head()

Unnamed: 0.1,Unnamed: 0,text,label
0,0,i just feel really helpless and heavy hearted,4
1,1,ive enjoyed being able to slouch about relax a...,0
2,2,i gave up my internship with the dmrg and am f...,4
3,3,i dont know i feel so lost,0
4,4,i am a kindergarten teacher and i am thoroughl...,4


In [18]:
texts = [text for text in df['text']]

In [19]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained('albert-base-v1')
tokens = tokenizer(texts, truncation=True, padding=True, max_length=True)

In [20]:
labels = [label for label in df['label']]

In [21]:
def train_test_val(encodings, labels):
    size = len(labels)
    train_end = int(size * 0.8)
    test_val = size - train_end
    test_end = test_val // 2

    train_enc = {k: v[:train_end] for k, v in encodings.items()}
    test_enc = {k: v[train_end:train_end + test_end] for k, v in encodings.items()}
    val_enc = {k: v[train_end + test_end:] for k, v in encodings.items()}

    train_lbl = labels[:train_end]
    test_lbl = labels[train_end:train_end + test_end]
    val_lbl = labels[train_end + test_end:]

    return (train_enc, train_lbl), (test_enc, test_lbl), (val_enc, val_lbl)

(train_enc, train_lbl), (test_enc, test_lbl), (val_enc, val_lbl) = train_test_val(tokens, labels)


In [22]:
import torch

class dataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [23]:
train_data = dataset(train_enc, train_lbl)
val_data = dataset(val_enc, val_lbl)

In [24]:
from transformers import AlbertForSequenceClassification

model = AlbertForSequenceClassification.from_pretrained("albert-base-v1", num_labels=5)

Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [25]:
for param in model.parameters():
    param.requires_grad = False

In [26]:
model

AlbertForSequenceClassification(
  (albert): AlbertModel(
    (embeddings): AlbertEmbeddings(
      (word_embeddings): Embedding(30000, 128, padding_idx=0)
      (position_embeddings): Embedding(512, 128)
      (token_type_embeddings): Embedding(2, 128)
      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): AlbertTransformer(
      (embedding_hidden_mapping_in): Linear(in_features=128, out_features=768, bias=True)
      (albert_layer_groups): ModuleList(
        (0): AlbertLayerGroup(
          (albert_layers): ModuleList(
            (0): AlbertLayer(
              (full_layer_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (attention): AlbertSdpaAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_feature

In [27]:
for param in model.classifier.parameters():
    param.requires_grad = True

In [28]:
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)

AlbertForSequenceClassification(
  (albert): AlbertModel(
    (embeddings): AlbertEmbeddings(
      (word_embeddings): Embedding(30000, 128, padding_idx=0)
      (position_embeddings): Embedding(512, 128)
      (token_type_embeddings): Embedding(2, 128)
      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): AlbertTransformer(
      (embedding_hidden_mapping_in): Linear(in_features=128, out_features=768, bias=True)
      (albert_layer_groups): ModuleList(
        (0): AlbertLayerGroup(
          (albert_layers): ModuleList(
            (0): AlbertLayer(
              (full_layer_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (attention): AlbertSdpaAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_feature

In [29]:
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name, param.shape)
total = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Trainable parameters: {total}")

classifier.weight torch.Size([5, 768])
classifier.bias torch.Size([5])
Trainable parameters: 3845


In [30]:
from torch.utils.data import DataLoader

train_loader = DataLoader(train_data, shuffle=True, batch_size=16, num_workers=2)
val_loader = DataLoader(val_data, shuffle=False, batch_size=16, num_workers=2)

In [31]:
import torch.optim as optim
import torch.nn.functional as F
import sys
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

def train(epochs, lr):
    model.train()
    optimizer = optim.AdamW(model.parameters(), lr=lr)

    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0

        val_loss = 0.0
        val_correct = 0
        val_total = 0

        print(f"\nEpoch {epoch + 1}/{epochs}")

        # === TRAINING LOOP ===
        for step, batch in enumerate(train_loader, 1):
            txt = batch['input_ids'].to(device)
            labels = batch['labels'].to(device)
            attention_mask = batch['attention_mask'].to(device)

            outputs = model(input_ids=txt, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            logits = outputs.logits

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            train_loss += loss.item()

            preds = torch.argmax(logits, dim=1)
            train_correct += (preds == labels).sum().item()
            train_total += labels.size(0)

            sys.stdout.write(
                f"\r[Step {step}/{len(train_loader)}] Train Loss: {loss.item():.4f}"
            )
            sys.stdout.flush()

        # === VALIDATION LOOP ===
        model.eval()
        with torch.no_grad():
            for step, batch_ in enumerate(val_loader, 1):
                txt_ = batch_['input_ids'].to(device)
                labels_ = batch_['labels'].to(device)
                attention_mask_ = batch_['attention_mask'].to(device)

                val_outputs = model(input_ids=txt_, attention_mask=attention_mask_, labels=labels_)
                val_loss += val_outputs.loss.item()

                val_preds = torch.argmax(val_outputs.logits, dim=1)
                val_correct += (val_preds == labels_).sum().item()
                val_total += labels_.size(0)

        avg_train_loss = train_loss / len(train_loader)
        avg_val_loss = val_loss / len(val_loader)

        train_acc = train_correct / train_total
        val_acc = val_correct / val_total

        print(f"\nEpoch Summary — Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | "
              f"Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f}")

    model.save_pretrained("/home/kandarpa-sarkar/Desktop/albertv1/model weights/albert-emotion-model")
    tokenizer.save_pretrained("/home/kandarpa-sarkar/Desktop/albertv1/model tokenizer/albert-emotion-model")


In [32]:
train(1, 1e-5)


Epoch 1/1
[Step 2/20841] Train Loss: 1.6126 

KeyboardInterrupt: 