In [11]:
import numpy as np
import pandas as pd
from transformers import AutoModel, AutoTokenizer
from sentence_transformers import SentenceTransformer
from imblearn.over_sampling import ADASYN
import torch
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cuda


In [12]:
model_name = "sentence-transformers/LaBSE"
tokenizer1 = AutoTokenizer.from_pretrained(model_name)
model1 = AutoModel.from_pretrained(model_name)

model1.to(device)

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(501153, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=Fals

In [5]:
data  = pd.read_csv('./Data/cleaned_PS_train.csv')
le = LabelEncoder()
data['label'] = le.fit_transform(data['label'])

val = pd.read_csv('./Data/merged_test.csv')
val['label'] = le.transform(val['label'])

print("Class mapping (Original label -> Encoded integer):")
for i, class_name in enumerate(le.classes_):
    print(f"{class_name} -> {i}")

# Print the classes array
print("\nOriginal classes:")
print(le.classes_)
 

Class mapping (Original label -> Encoded integer):
Negative -> 0
Neutral -> 1
None of the above -> 2
Opinionated -> 3
Positive -> 4
Sarcastic -> 5
Substantiated -> 6

Original classes:
['Negative' 'Neutral' 'None of the above' 'Opinionated' 'Positive'
 'Sarcastic' 'Substantiated']


In [13]:
model_name = "ai4bharat/indic-bert"
tokenizer2 = AutoTokenizer.from_pretrained(model_name)
model2 = AutoModel.from_pretrained(model_name)

model2.to(device)

AlbertModel(
  (embeddings): AlbertEmbeddings(
    (word_embeddings): Embedding(200000, 128, padding_idx=0)
    (position_embeddings): Embedding(512, 128)
    (token_type_embeddings): Embedding(2, 128)
    (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0, inplace=False)
  )
  (encoder): AlbertTransformer(
    (embedding_hidden_mapping_in): Linear(in_features=128, out_features=768, bias=True)
    (albert_layer_groups): ModuleList(
      (0): AlbertLayerGroup(
        (albert_layers): ModuleList(
          (0): AlbertLayer(
            (full_layer_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (attention): AlbertSdpaAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (attention_dropout): Dropout(p=0, inplace=False)
 

Error during conversion: ChunkedEncodingError(ProtocolError('Response ended prematurely'))


In [14]:
def extract_embeddings(model, tokenizer, texts, device = "cuda" if torch.cuda.is_available() else "cpu", max_length=256):
    model.eval()
    embeddings = []
    with torch.no_grad():
        for text in tqdm(texts, desc="Extracting Embeddings"):
            encoding = tokenizer(
                text,
                add_special_tokens=True,
                max_length=max_length,
                padding='max_length',
                truncation=True,
                return_attention_mask=True,
                return_tensors='pt'
            ).to(device)

            # Extract embeddings
            outputs = model(**encoding, output_hidden_states=True)
            hidden_states = outputs.hidden_states[-1]  # Last hidden layer
            embedding = hidden_states.mean(dim=1).squeeze(0)  # Mean pooling
            embeddings.append(embedding.cpu().numpy())

    return torch.tensor(embeddings)


In [15]:
data.shape, val.shape

((4896, 2), (544, 3))

In [16]:
print("Extracting embeddings for training data...")

Extracting embeddings for training data...


In [18]:
X_embeddings1 = extract_embeddings(model1,tokenizer1,data["content"])
X_embeddings2 = extract_embeddings(model2,tokenizer2,data["content"])

val_embeddings1 = extract_embeddings(model1,tokenizer1,val["content"])
val_embeddings2 = extract_embeddings(model2,tokenizer2,val["content"])

Extracting Embeddings: 100%|██████████| 4896/4896 [01:40<00:00, 48.55it/s]
  return torch.tensor(embeddings)
Extracting Embeddings: 100%|██████████| 4896/4896 [01:16<00:00, 63.95it/s]
Extracting Embeddings: 100%|██████████| 544/544 [00:09<00:00, 54.76it/s]
Extracting Embeddings: 100%|██████████| 544/544 [00:08<00:00, 63.07it/s]


In [19]:
print(X_embeddings1.shape, X_embeddings2.shape, val_embeddings1.shape, val_embeddings2.shape)
X = np.concatenate((X_embeddings1, X_embeddings2), axis=1)
val_X = np.concatenate((val_embeddings1, val_embeddings2), axis=1)
y = data['label'].values
y_val = val['label'].values
print(X.shape, y.shape, val_X.shape, y_val.shape)

torch.Size([4896, 768]) torch.Size([4896, 768]) torch.Size([544, 768]) torch.Size([544, 768])
(4896, 1536) (4896,) (544, 1536) (544,)


In [20]:
from sklearn.feature_extraction.text import TfidfVectorizer


tfidf_vectorizer = TfidfVectorizer()
X_tfidf = tfidf_vectorizer.fit_transform(data["content"])

# Create TF-IDF vectors for validation data using the same vocabulary
val_tfidf = tfidf_vectorizer.transform(val["content"])

# Convert to dense arrays for concatenation with embeddings
X_tfidf_dense = X_tfidf.toarray()
val_tfidf_dense = val_tfidf.toarray()

print(f"TF-IDF features shape: {X_tfidf_dense.shape}, {val_tfidf_dense.shape}")

# Create combined feature vectors with embeddings and TF-IDF
X_combined = np.hstack((X, X_tfidf_dense))
val_X_combined = np.hstack((val_X, val_tfidf_dense))

print(f"Combined features shape: {X_combined.shape}, {val_X_combined.shape}")

TF-IDF features shape: (4896, 2540), (544, 2540)
Combined features shape: (4896, 4076), (544, 4076)


In [29]:
import lightgbm as lgb
from sklearn.model_selection import cross_val_score

In [30]:
print("Training XGBoost model...")
xgb_model = XGBClassifier(
    n_estimators=200,
    learning_rate=0.1,
    use_label_encoder=False,
    eval_metric='mlogloss'
)
xgb_model.fit(X_combined, y)

# Train LightGBM model
print("Training LightGBM model...")
lgb_model = lgb.LGBMClassifier(
    n_estimators=200,
    learning_rate=0.1,
)
lgb_model.fit(X_combined, y)

# Evaluate models on validation set
xgb_pred = xgb_model.predict(val_X_combined)
lgb_pred = lgb_model.predict(val_X_combined)

# Print results
print("\nXGBoost results:")
print(f"Accuracy: {accuracy_score(y_val, xgb_pred):.4f}")
print(classification_report(y_val, xgb_pred))

print("\nLightGBM results:")
print(f"Accuracy: {accuracy_score(y_val, lgb_pred):.4f}")
print(classification_report(y_val, lgb_pred))

# Perform cross-validation on training data
print("\nPerforming 5-fold cross-validation:")
xgb_cv = cross_val_score(xgb_model, X_combined, y, cv=5, scoring='accuracy')
lgb_cv = cross_val_score(lgb_model, X_combined, y, cv=5, scoring='accuracy')

print(f"XGBoost CV accuracy: {xgb_cv.mean():.4f} ± {xgb_cv.std():.4f}")
print(f"LightGBM CV accuracy: {lgb_cv.mean():.4f} ± {lgb_cv.std():.4f}")

Training XGBoost model...


Parameters: { "use_label_encoder" } are not used.



Training LightGBM model...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.076068 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 406050
[LightGBM] [Info] Number of data points in the train set: 4896, number of used features: 1923
[LightGBM] [Info] Start training from score -2.371490
[LightGBM] [Info] Start training from score -1.915535
[LightGBM] [Info] Start training from score -3.243900
[LightGBM] [Info] Start training from score -1.173663
[LightGBM] [Info] Start training from score -2.028475
[LightGBM] [Info] Start training from score -1.688239
[LightGBM] [Info] Start training from score -2.356289





XGBoost results:
Accuracy: 0.3934
              precision    recall  f1-score   support

           0       0.12      0.02      0.04        46
           1       0.26      0.11      0.16        70
           2       0.96      0.92      0.94        25
           3       0.39      0.78      0.52       171
           4       0.32      0.15      0.20        75
           5       0.41      0.33      0.36       106
           6       0.14      0.04      0.06        51

    accuracy                           0.39       544
   macro avg       0.37      0.34      0.33       544
weighted avg       0.35      0.39      0.33       544


LightGBM results:
Accuracy: 0.4007
              precision    recall  f1-score   support

           0       0.20      0.02      0.04        46
           1       0.28      0.16      0.20        70
           2       0.92      0.92      0.92        25
           3       0.38      0.76      0.51       171
           4       0.45      0.19      0.26        75
       

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.057767 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 403125
[LightGBM] [Info] Number of data points in the train set: 3916, number of used features: 1872
[LightGBM] [Info] Start training from score -2.370193
[LightGBM] [Info] Start training from score -1.916718
[LightGBM] [Info] Start training from score -3.248945
[LightGBM] [Info] Start training from score -1.173624
[LightGBM] [Info] Start training from score -2.028659
[LightGBM] [Info] Start training from score -1.688035
[LightGBM] [Info] Start training from score -2.353932




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.063666 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 403318
[LightGBM] [Info] Number of data points in the train set: 3917, number of used features: 1869
[LightGBM] [Info] Start training from score -2.370448
[LightGBM] [Info] Start training from score -1.915239
[LightGBM] [Info] Start training from score -3.242643
[LightGBM] [Info] Start training from score -1.173880
[LightGBM] [Info] Start training from score -2.028914
[LightGBM] [Info] Start training from score -1.688290
[LightGBM] [Info] Start training from score -2.356879




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.055814 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 403218
[LightGBM] [Info] Number of data points in the train set: 3917, number of used features: 1872
[LightGBM] [Info] Start training from score -2.370448
[LightGBM] [Info] Start training from score -1.915239
[LightGBM] [Info] Start training from score -3.242643
[LightGBM] [Info] Start training from score -1.173880
[LightGBM] [Info] Start training from score -2.028914
[LightGBM] [Info] Start training from score -1.688290
[LightGBM] [Info] Start training from score -2.356879




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.059306 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 403006
[LightGBM] [Info] Number of data points in the train set: 3917, number of used features: 1865
[LightGBM] [Info] Start training from score -2.373184
[LightGBM] [Info] Start training from score -1.915239
[LightGBM] [Info] Start training from score -3.242643
[LightGBM] [Info] Start training from score -1.173880
[LightGBM] [Info] Start training from score -2.026975
[LightGBM] [Info] Start training from score -1.688290
[LightGBM] [Info] Start training from score -2.356879




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.076716 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 403244
[LightGBM] [Info] Number of data points in the train set: 3917, number of used features: 1870
[LightGBM] [Info] Start training from score -2.373184
[LightGBM] [Info] Start training from score -1.915239
[LightGBM] [Info] Start training from score -3.242643
[LightGBM] [Info] Start training from score -1.173054
[LightGBM] [Info] Start training from score -2.028914
[LightGBM] [Info] Start training from score -1.688290
[LightGBM] [Info] Start training from score -2.356879
XGBoost CV accuracy: 0.3685 ± 0.0059
LightGBM CV accuracy: 0.3617 ± 0.0088




In [24]:
import torch
import torch.nn as nn

class ResidualAttentionBlock(nn.Module):
    def __init__(self, embed_dim, num_heads):
        super(ResidualAttentionBlock, self).__init__()
        self.attn = nn.MultiheadAttention(embed_dim=embed_dim, num_heads=num_heads, batch_first=True)
        self.norm1 = nn.LayerNorm(embed_dim)

        self.ffn = nn.Sequential(
            nn.Linear(embed_dim, embed_dim),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(embed_dim, embed_dim),
            nn.ReLU(),
            nn.Dropout(0.3)
        )
        self.norm2 = nn.LayerNorm(embed_dim)

    def forward(self, x):
        attn_out, _ = self.attn(x, x, x)
        x = self.norm1(x + attn_out)

        ffn_out = self.ffn(x)
        x = self.norm2(x + ffn_out)
        return x

class AttentionClassifier(nn.Module):
    def __init__(self, input_dim=4076, embed_dim=2048, num_heads=16, num_classes=7, num_blocks=8):
        super(AttentionClassifier, self).__init__()

        self.input_proj = nn.Linear(input_dim, embed_dim)
        self.blocks = nn.Sequential(
            *[ResidualAttentionBlock(embed_dim, num_heads) for _ in range(num_blocks)]
        )

        self.pool = nn.AdaptiveAvgPool1d(1)

        self.classifier = nn.Sequential(
            nn.Linear(embed_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.input_proj(x)
        x = x.unsqueeze(1)    

        x = self.blocks(x)             

        x = x.transpose(1, 2)          
        x = self.pool(x).squeeze(-1)   

        return self.classifier(x)


In [28]:
X_train = torch.tensor(X_combined, dtype=torch.float32)
y_train = torch.tensor(y, dtype=torch.long)
X_val = torch.tensor(val_X_combined, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.long)

print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_val shape: {X_val.shape}, y_val shape: {y_val.shape}")

train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

class_counts = torch.bincount(y_train)
class_weights = 1.0 / class_counts.float()
class_weights = class_weights / class_weights.sum()  # Normalize (optional)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AttentionClassifier().to(device)
criterion = nn.CrossEntropyLoss(weight=class_weights.to(device))
optimizer = optim.Adam(model.parameters(), lr=1e-5)


  y_val = torch.tensor(y_val, dtype=torch.long)


X_train shape: torch.Size([4896, 4076]), y_train shape: torch.Size([4896])
X_val shape: torch.Size([544, 4076]), y_val shape: torch.Size([544])


In [29]:
# class FocalLoss(nn.Module):
#     def __init__(self, gamma=2.0, weight=None):
#         super().__init__()
#         self.gamma = gamma
#         self.ce = nn.CrossEntropyLoss(weight=weight)

#     def forward(self, input, target):
#         logp = self.ce(input, target)
#         p = torch.exp(-logp)
#         loss = (1 - p) ** self.gamma * logp
#         return loss.mean(
    
# class_counts = torch.bincount(y_train)
# class_weights = 1.0 / class_counts.float()
# class_weights = class_weights / class_weights.sum()  # Normalize (optional)
# criterion = FocalLoss(gamma=2.0, weight=class_weights.to(device))

In [30]:
from sklearn.metrics import f1_score
num_epochs = 100

for epoch in range(1, num_epochs + 1):
    model.train()
    total_loss = 0
    train_loader_tqdm = tqdm(train_loader, desc=f"Epoch {epoch:02d}")

    for x_batch, y_batch in train_loader_tqdm:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()
        logits = model(x_batch)
        loss = criterion(logits, y_batch)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        train_loader_tqdm.set_postfix(loss=loss.item())

    # Validation
    model.eval()
    correct, total = 0, 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for x_batch, y_batch in val_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            logits = model(x_batch)
            preds = torch.argmax(logits, dim=1)

            all_preds.extend(preds.cpu().tolist())
            all_labels.extend(y_batch.cpu().tolist())

            correct += (preds == y_batch).sum().item()
            total += y_batch.size(0)

    acc = correct / total * 100
    macro_f1 = f1_score(all_labels, all_preds, average='macro')
    weighted_f1 = f1_score(all_labels, all_preds, average='weighted')
    micro_f1 = f1_score(all_labels, all_preds, average='micro')
    print(f"Epoch {epoch:02d} | Train Loss: {total_loss:.4f} | Val Acc: {acc:.2f}% | Macro F1: {macro_f1:.4f} | Weighted F1: {weighted_f1:.4f} | Micro F1: {micro_f1:.4f}")

print("\n📊 Classification Report on Validation Set:")
print(classification_report(all_labels, all_preds, digits=4))

Epoch 01: 100%|██████████| 153/153 [00:47<00:00,  3.20it/s, loss=1.82]


Epoch 01 | Train Loss: 270.2272 | Val Acc: 14.34% | Macro F1: 0.1692 | Weighted F1: 0.0747 | Micro F1: 0.1434


Epoch 02: 100%|██████████| 153/153 [00:45<00:00,  3.33it/s, loss=1.45]


Epoch 02 | Train Loss: 253.3594 | Val Acc: 30.88% | Macro F1: 0.2457 | Weighted F1: 0.2605 | Micro F1: 0.3088


Epoch 03: 100%|██████████| 153/153 [00:45<00:00,  3.36it/s, loss=1.62]


Epoch 03 | Train Loss: 248.6413 | Val Acc: 21.88% | Macro F1: 0.2658 | Weighted F1: 0.1807 | Micro F1: 0.2188


Epoch 04: 100%|██████████| 153/153 [00:45<00:00,  3.38it/s, loss=1.72]


Epoch 04 | Train Loss: 243.0827 | Val Acc: 31.80% | Macro F1: 0.3291 | Weighted F1: 0.3093 | Micro F1: 0.3180


Epoch 05: 100%|██████████| 153/153 [00:46<00:00,  3.32it/s, loss=1.54]


Epoch 05 | Train Loss: 241.0861 | Val Acc: 25.74% | Macro F1: 0.2811 | Weighted F1: 0.2351 | Micro F1: 0.2574


Epoch 06: 100%|██████████| 153/153 [00:45<00:00,  3.33it/s, loss=1.83] 


Epoch 06 | Train Loss: 234.9593 | Val Acc: 26.29% | Macro F1: 0.3156 | Weighted F1: 0.2689 | Micro F1: 0.2629


Epoch 07: 100%|██████████| 153/153 [00:45<00:00,  3.34it/s, loss=1.36]


Epoch 07 | Train Loss: 230.3950 | Val Acc: 27.57% | Macro F1: 0.3182 | Weighted F1: 0.2804 | Micro F1: 0.2757


Epoch 08: 100%|██████████| 153/153 [00:45<00:00,  3.37it/s, loss=1.53]


Epoch 08 | Train Loss: 228.4694 | Val Acc: 28.31% | Macro F1: 0.3148 | Weighted F1: 0.2765 | Micro F1: 0.2831


Epoch 09: 100%|██████████| 153/153 [00:45<00:00,  3.35it/s, loss=1.3] 


Epoch 09 | Train Loss: 226.2617 | Val Acc: 26.29% | Macro F1: 0.3248 | Weighted F1: 0.2739 | Micro F1: 0.2629


Epoch 10: 100%|██████████| 153/153 [00:46<00:00,  3.26it/s, loss=1.42]


Epoch 10 | Train Loss: 219.6361 | Val Acc: 26.84% | Macro F1: 0.3240 | Weighted F1: 0.2794 | Micro F1: 0.2684


Epoch 11: 100%|██████████| 153/153 [00:47<00:00,  3.23it/s, loss=1.14] 


Epoch 11 | Train Loss: 216.2816 | Val Acc: 27.94% | Macro F1: 0.3391 | Weighted F1: 0.2921 | Micro F1: 0.2794


Epoch 12: 100%|██████████| 153/153 [00:46<00:00,  3.28it/s, loss=1.42] 


Epoch 12 | Train Loss: 213.1016 | Val Acc: 27.39% | Macro F1: 0.3231 | Weighted F1: 0.2828 | Micro F1: 0.2739


Epoch 13: 100%|██████████| 153/153 [00:48<00:00,  3.13it/s, loss=1.42] 


Epoch 13 | Train Loss: 207.6548 | Val Acc: 29.96% | Macro F1: 0.3333 | Weighted F1: 0.2969 | Micro F1: 0.2996


Epoch 14: 100%|██████████| 153/153 [00:44<00:00,  3.41it/s, loss=1.05] 


Epoch 14 | Train Loss: 202.5686 | Val Acc: 29.04% | Macro F1: 0.3344 | Weighted F1: 0.2983 | Micro F1: 0.2904


Epoch 15: 100%|██████████| 153/153 [00:45<00:00,  3.33it/s, loss=1.16] 


Epoch 15 | Train Loss: 197.8411 | Val Acc: 27.21% | Macro F1: 0.3055 | Weighted F1: 0.2579 | Micro F1: 0.2721


Epoch 16: 100%|██████████| 153/153 [00:45<00:00,  3.39it/s, loss=1.33] 


Epoch 16 | Train Loss: 191.7177 | Val Acc: 24.82% | Macro F1: 0.2970 | Weighted F1: 0.2302 | Micro F1: 0.2482


Epoch 17: 100%|██████████| 153/153 [00:45<00:00,  3.36it/s, loss=1.35] 


Epoch 17 | Train Loss: 186.3753 | Val Acc: 28.86% | Macro F1: 0.3190 | Weighted F1: 0.2883 | Micro F1: 0.2886


Epoch 18: 100%|██████████| 153/153 [00:46<00:00,  3.32it/s, loss=1.49] 


Epoch 18 | Train Loss: 179.3837 | Val Acc: 31.80% | Macro F1: 0.3464 | Weighted F1: 0.3257 | Micro F1: 0.3180


Epoch 19: 100%|██████████| 153/153 [00:45<00:00,  3.33it/s, loss=1.33] 


Epoch 19 | Train Loss: 171.5350 | Val Acc: 28.31% | Macro F1: 0.3306 | Weighted F1: 0.2959 | Micro F1: 0.2831


Epoch 20: 100%|██████████| 153/153 [00:46<00:00,  3.30it/s, loss=0.884]


Epoch 20 | Train Loss: 165.6290 | Val Acc: 34.01% | Macro F1: 0.3658 | Weighted F1: 0.3500 | Micro F1: 0.3401


Epoch 21: 100%|██████████| 153/153 [00:46<00:00,  3.30it/s, loss=0.902]


Epoch 21 | Train Loss: 154.5306 | Val Acc: 28.49% | Macro F1: 0.3220 | Weighted F1: 0.2964 | Micro F1: 0.2849


Epoch 22: 100%|██████████| 153/153 [00:45<00:00,  3.38it/s, loss=0.752]


Epoch 22 | Train Loss: 146.0263 | Val Acc: 27.94% | Macro F1: 0.3204 | Weighted F1: 0.2903 | Micro F1: 0.2794


Epoch 23: 100%|██████████| 153/153 [00:48<00:00,  3.18it/s, loss=1.11] 


Epoch 23 | Train Loss: 138.6976 | Val Acc: 33.82% | Macro F1: 0.3683 | Weighted F1: 0.3428 | Micro F1: 0.3382


Epoch 24: 100%|██████████| 153/153 [00:45<00:00,  3.36it/s, loss=0.959]


Epoch 24 | Train Loss: 129.3907 | Val Acc: 34.74% | Macro F1: 0.3554 | Weighted F1: 0.3406 | Micro F1: 0.3474


Epoch 25: 100%|██████████| 153/153 [00:47<00:00,  3.23it/s, loss=0.736]


Epoch 25 | Train Loss: 119.0068 | Val Acc: 33.27% | Macro F1: 0.3497 | Weighted F1: 0.3428 | Micro F1: 0.3327


Epoch 26: 100%|██████████| 153/153 [00:45<00:00,  3.34it/s, loss=0.596]


Epoch 26 | Train Loss: 112.3408 | Val Acc: 33.46% | Macro F1: 0.3483 | Weighted F1: 0.3356 | Micro F1: 0.3346


Epoch 27: 100%|██████████| 153/153 [00:46<00:00,  3.29it/s, loss=0.715]


Epoch 27 | Train Loss: 105.6690 | Val Acc: 31.80% | Macro F1: 0.3456 | Weighted F1: 0.3260 | Micro F1: 0.3180


Epoch 28: 100%|██████████| 153/153 [00:52<00:00,  2.92it/s, loss=0.889]


Epoch 28 | Train Loss: 96.4322 | Val Acc: 29.96% | Macro F1: 0.3229 | Weighted F1: 0.3009 | Micro F1: 0.2996


Epoch 29: 100%|██████████| 153/153 [00:45<00:00,  3.38it/s, loss=0.837]


Epoch 29 | Train Loss: 91.1300 | Val Acc: 33.46% | Macro F1: 0.3501 | Weighted F1: 0.3440 | Micro F1: 0.3346


Epoch 30: 100%|██████████| 153/153 [00:46<00:00,  3.28it/s, loss=0.532]


Epoch 30 | Train Loss: 89.0608 | Val Acc: 29.60% | Macro F1: 0.3403 | Weighted F1: 0.3057 | Micro F1: 0.2960


Epoch 31: 100%|██████████| 153/153 [00:45<00:00,  3.37it/s, loss=0.655]


Epoch 31 | Train Loss: 83.4843 | Val Acc: 34.01% | Macro F1: 0.3557 | Weighted F1: 0.3310 | Micro F1: 0.3401


Epoch 32: 100%|██████████| 153/153 [00:49<00:00,  3.12it/s, loss=0.241]


Epoch 32 | Train Loss: 80.2493 | Val Acc: 32.17% | Macro F1: 0.3536 | Weighted F1: 0.3296 | Micro F1: 0.3217


Epoch 33: 100%|██████████| 153/153 [00:45<00:00,  3.36it/s, loss=0.253]


Epoch 33 | Train Loss: 69.1266 | Val Acc: 31.99% | Macro F1: 0.3421 | Weighted F1: 0.3224 | Micro F1: 0.3199


Epoch 34: 100%|██████████| 153/153 [00:44<00:00,  3.43it/s, loss=0.354]


Epoch 34 | Train Loss: 70.1352 | Val Acc: 32.17% | Macro F1: 0.3397 | Weighted F1: 0.3288 | Micro F1: 0.3217


Epoch 35: 100%|██████████| 153/153 [00:44<00:00,  3.40it/s, loss=1.07] 


Epoch 35 | Train Loss: 63.7056 | Val Acc: 33.27% | Macro F1: 0.3385 | Weighted F1: 0.3276 | Micro F1: 0.3327


Epoch 36: 100%|██████████| 153/153 [00:45<00:00,  3.37it/s, loss=0.31] 


Epoch 36 | Train Loss: 62.4704 | Val Acc: 33.27% | Macro F1: 0.3373 | Weighted F1: 0.3267 | Micro F1: 0.3327


Epoch 37: 100%|██████████| 153/153 [00:44<00:00,  3.43it/s, loss=0.184]


Epoch 37 | Train Loss: 60.0180 | Val Acc: 34.56% | Macro F1: 0.3549 | Weighted F1: 0.3505 | Micro F1: 0.3456


Epoch 38: 100%|██████████| 153/153 [00:44<00:00,  3.42it/s, loss=0.45] 


Epoch 38 | Train Loss: 58.9053 | Val Acc: 32.35% | Macro F1: 0.3520 | Weighted F1: 0.3298 | Micro F1: 0.3235


Epoch 39: 100%|██████████| 153/153 [00:44<00:00,  3.40it/s, loss=0.749]


Epoch 39 | Train Loss: 55.3545 | Val Acc: 35.48% | Macro F1: 0.3738 | Weighted F1: 0.3551 | Micro F1: 0.3548


Epoch 40: 100%|██████████| 153/153 [00:44<00:00,  3.42it/s, loss=0.33] 


Epoch 40 | Train Loss: 53.5792 | Val Acc: 33.82% | Macro F1: 0.3580 | Weighted F1: 0.3396 | Micro F1: 0.3382


Epoch 41: 100%|██████████| 153/153 [00:44<00:00,  3.41it/s, loss=0.151]


Epoch 41 | Train Loss: 52.9881 | Val Acc: 34.38% | Macro F1: 0.3444 | Weighted F1: 0.3363 | Micro F1: 0.3438


Epoch 42: 100%|██████████| 153/153 [00:46<00:00,  3.32it/s, loss=0.232]


Epoch 42 | Train Loss: 50.4627 | Val Acc: 33.27% | Macro F1: 0.3437 | Weighted F1: 0.3324 | Micro F1: 0.3327


Epoch 43: 100%|██████████| 153/153 [00:44<00:00,  3.44it/s, loss=0.414]


Epoch 43 | Train Loss: 52.6302 | Val Acc: 33.46% | Macro F1: 0.3490 | Weighted F1: 0.3334 | Micro F1: 0.3346


Epoch 44: 100%|██████████| 153/153 [00:45<00:00,  3.35it/s, loss=0.156]


Epoch 44 | Train Loss: 47.6817 | Val Acc: 33.09% | Macro F1: 0.3471 | Weighted F1: 0.3339 | Micro F1: 0.3309


Epoch 45: 100%|██████████| 153/153 [00:44<00:00,  3.41it/s, loss=0.0616]


Epoch 45 | Train Loss: 44.1890 | Val Acc: 34.38% | Macro F1: 0.3622 | Weighted F1: 0.3425 | Micro F1: 0.3438


Epoch 46: 100%|██████████| 153/153 [00:44<00:00,  3.45it/s, loss=0.0841]


Epoch 46 | Train Loss: 47.6132 | Val Acc: 34.74% | Macro F1: 0.3488 | Weighted F1: 0.3421 | Micro F1: 0.3474


Epoch 47: 100%|██████████| 153/153 [00:47<00:00,  3.20it/s, loss=0.282]


Epoch 47 | Train Loss: 44.5600 | Val Acc: 33.64% | Macro F1: 0.3541 | Weighted F1: 0.3386 | Micro F1: 0.3364


Epoch 48: 100%|██████████| 153/153 [00:46<00:00,  3.28it/s, loss=0.392] 


Epoch 48 | Train Loss: 44.9358 | Val Acc: 32.17% | Macro F1: 0.3418 | Weighted F1: 0.3252 | Micro F1: 0.3217


Epoch 49: 100%|██████████| 153/153 [00:44<00:00,  3.40it/s, loss=0.182]


Epoch 49 | Train Loss: 44.7291 | Val Acc: 34.38% | Macro F1: 0.3437 | Weighted F1: 0.3376 | Micro F1: 0.3438


Epoch 50: 100%|██████████| 153/153 [00:44<00:00,  3.46it/s, loss=0.196] 


Epoch 50 | Train Loss: 41.4266 | Val Acc: 35.48% | Macro F1: 0.3647 | Weighted F1: 0.3534 | Micro F1: 0.3548


Epoch 51: 100%|██████████| 153/153 [00:47<00:00,  3.22it/s, loss=0.306] 


Epoch 51 | Train Loss: 38.6295 | Val Acc: 33.64% | Macro F1: 0.3634 | Weighted F1: 0.3483 | Micro F1: 0.3364


Epoch 52: 100%|██████████| 153/153 [00:48<00:00,  3.13it/s, loss=0.264] 


Epoch 52 | Train Loss: 39.4858 | Val Acc: 34.19% | Macro F1: 0.3522 | Weighted F1: 0.3410 | Micro F1: 0.3419


Epoch 53: 100%|██████████| 153/153 [00:53<00:00,  2.87it/s, loss=0.56]  


Epoch 53 | Train Loss: 38.6785 | Val Acc: 34.56% | Macro F1: 0.3525 | Weighted F1: 0.3410 | Micro F1: 0.3456


Epoch 54: 100%|██████████| 153/153 [00:57<00:00,  2.68it/s, loss=0.567] 


Epoch 54 | Train Loss: 43.3054 | Val Acc: 33.27% | Macro F1: 0.3393 | Weighted F1: 0.3288 | Micro F1: 0.3327


Epoch 55: 100%|██████████| 153/153 [00:56<00:00,  2.72it/s, loss=0.294] 


Epoch 55 | Train Loss: 36.0473 | Val Acc: 29.41% | Macro F1: 0.3257 | Weighted F1: 0.2999 | Micro F1: 0.2941


Epoch 56: 100%|██████████| 153/153 [00:54<00:00,  2.81it/s, loss=0.501] 


Epoch 56 | Train Loss: 37.5422 | Val Acc: 33.82% | Macro F1: 0.3332 | Weighted F1: 0.3253 | Micro F1: 0.3382


Epoch 57: 100%|██████████| 153/153 [00:53<00:00,  2.85it/s, loss=0.518] 


Epoch 57 | Train Loss: 42.3188 | Val Acc: 34.01% | Macro F1: 0.3647 | Weighted F1: 0.3416 | Micro F1: 0.3401


Epoch 58: 100%|██████████| 153/153 [00:55<00:00,  2.76it/s, loss=0.0497]


Epoch 58 | Train Loss: 36.0574 | Val Acc: 35.48% | Macro F1: 0.3550 | Weighted F1: 0.3452 | Micro F1: 0.3548


Epoch 59: 100%|██████████| 153/153 [00:59<00:00,  2.58it/s, loss=0.155] 


Epoch 59 | Train Loss: 35.9582 | Val Acc: 35.48% | Macro F1: 0.3656 | Weighted F1: 0.3492 | Micro F1: 0.3548


Epoch 60: 100%|██████████| 153/153 [01:21<00:00,  1.88it/s, loss=0.237] 


Epoch 60 | Train Loss: 36.8198 | Val Acc: 33.82% | Macro F1: 0.3477 | Weighted F1: 0.3337 | Micro F1: 0.3382


Epoch 61: 100%|██████████| 153/153 [00:49<00:00,  3.08it/s, loss=0.315] 


Epoch 61 | Train Loss: 35.4634 | Val Acc: 35.85% | Macro F1: 0.3556 | Weighted F1: 0.3430 | Micro F1: 0.3585


Epoch 62: 100%|██████████| 153/153 [00:50<00:00,  3.00it/s, loss=0.161] 


Epoch 62 | Train Loss: 36.6385 | Val Acc: 33.64% | Macro F1: 0.3450 | Weighted F1: 0.3314 | Micro F1: 0.3364


Epoch 63: 100%|██████████| 153/153 [00:53<00:00,  2.86it/s, loss=0.218] 


Epoch 63 | Train Loss: 32.5303 | Val Acc: 32.72% | Macro F1: 0.3459 | Weighted F1: 0.3282 | Micro F1: 0.3272


Epoch 64: 100%|██████████| 153/153 [00:58<00:00,  2.63it/s, loss=0.218] 


Epoch 64 | Train Loss: 32.0268 | Val Acc: 36.21% | Macro F1: 0.3481 | Weighted F1: 0.3461 | Micro F1: 0.3621


Epoch 65: 100%|██████████| 153/153 [00:50<00:00,  3.01it/s, loss=0.243] 


Epoch 65 | Train Loss: 34.5622 | Val Acc: 34.01% | Macro F1: 0.3481 | Weighted F1: 0.3334 | Micro F1: 0.3401


Epoch 66: 100%|██████████| 153/153 [00:49<00:00,  3.10it/s, loss=0.325] 


Epoch 66 | Train Loss: 33.0457 | Val Acc: 34.01% | Macro F1: 0.3677 | Weighted F1: 0.3496 | Micro F1: 0.3401


Epoch 67: 100%|██████████| 153/153 [00:48<00:00,  3.16it/s, loss=0.0643]


Epoch 67 | Train Loss: 33.3990 | Val Acc: 35.11% | Macro F1: 0.3701 | Weighted F1: 0.3541 | Micro F1: 0.3511


Epoch 68: 100%|██████████| 153/153 [00:51<00:00,  2.99it/s, loss=0.186] 


Epoch 68 | Train Loss: 31.6165 | Val Acc: 33.64% | Macro F1: 0.3550 | Weighted F1: 0.3407 | Micro F1: 0.3364


Epoch 69: 100%|██████████| 153/153 [00:48<00:00,  3.15it/s, loss=0.0976]


Epoch 69 | Train Loss: 28.6452 | Val Acc: 36.76% | Macro F1: 0.3643 | Weighted F1: 0.3554 | Micro F1: 0.3676


Epoch 70: 100%|██████████| 153/153 [00:48<00:00,  3.17it/s, loss=0.211] 


Epoch 70 | Train Loss: 30.4919 | Val Acc: 36.03% | Macro F1: 0.3648 | Weighted F1: 0.3530 | Micro F1: 0.3603


Epoch 71: 100%|██████████| 153/153 [00:48<00:00,  3.13it/s, loss=0.404] 


Epoch 71 | Train Loss: 30.4264 | Val Acc: 34.19% | Macro F1: 0.3508 | Weighted F1: 0.3376 | Micro F1: 0.3419


Epoch 72: 100%|██████████| 153/153 [00:48<00:00,  3.18it/s, loss=0.2]   


Epoch 72 | Train Loss: 36.6004 | Val Acc: 34.56% | Macro F1: 0.3647 | Weighted F1: 0.3531 | Micro F1: 0.3456


Epoch 73: 100%|██████████| 153/153 [00:47<00:00,  3.22it/s, loss=0.127] 


Epoch 73 | Train Loss: 34.0139 | Val Acc: 31.80% | Macro F1: 0.3561 | Weighted F1: 0.3315 | Micro F1: 0.3180


Epoch 74: 100%|██████████| 153/153 [00:42<00:00,  3.59it/s, loss=0.193] 


Epoch 74 | Train Loss: 34.0752 | Val Acc: 33.27% | Macro F1: 0.3457 | Weighted F1: 0.3308 | Micro F1: 0.3327


Epoch 75: 100%|██████████| 153/153 [00:42<00:00,  3.61it/s, loss=0.119]  


Epoch 75 | Train Loss: 26.9817 | Val Acc: 29.96% | Macro F1: 0.3295 | Weighted F1: 0.3084 | Micro F1: 0.2996


Epoch 76: 100%|██████████| 153/153 [00:46<00:00,  3.30it/s, loss=0.146] 


Epoch 76 | Train Loss: 30.3195 | Val Acc: 34.01% | Macro F1: 0.3693 | Weighted F1: 0.3521 | Micro F1: 0.3401


Epoch 77: 100%|██████████| 153/153 [00:48<00:00,  3.18it/s, loss=0.139] 


Epoch 77 | Train Loss: 27.9152 | Val Acc: 36.58% | Macro F1: 0.3597 | Weighted F1: 0.3512 | Micro F1: 0.3658


Epoch 78: 100%|██████████| 153/153 [00:47<00:00,  3.24it/s, loss=0.106] 


Epoch 78 | Train Loss: 27.3608 | Val Acc: 35.85% | Macro F1: 0.3523 | Weighted F1: 0.3442 | Micro F1: 0.3585


Epoch 79: 100%|██████████| 153/153 [00:47<00:00,  3.25it/s, loss=0.176] 


Epoch 79 | Train Loss: 28.0815 | Val Acc: 32.17% | Macro F1: 0.3367 | Weighted F1: 0.3200 | Micro F1: 0.3217


Epoch 80: 100%|██████████| 153/153 [00:46<00:00,  3.27it/s, loss=0.176] 


Epoch 80 | Train Loss: 33.5670 | Val Acc: 36.95% | Macro F1: 0.3565 | Weighted F1: 0.3516 | Micro F1: 0.3695


Epoch 81: 100%|██████████| 153/153 [00:47<00:00,  3.22it/s, loss=0.268] 


Epoch 81 | Train Loss: 30.3365 | Val Acc: 35.29% | Macro F1: 0.3602 | Weighted F1: 0.3467 | Micro F1: 0.3529


Epoch 82: 100%|██████████| 153/153 [00:46<00:00,  3.27it/s, loss=0.116] 


Epoch 82 | Train Loss: 27.5081 | Val Acc: 36.03% | Macro F1: 0.3706 | Weighted F1: 0.3565 | Micro F1: 0.3603


Epoch 83: 100%|██████████| 153/153 [00:46<00:00,  3.28it/s, loss=0.084] 


Epoch 83 | Train Loss: 26.0569 | Val Acc: 33.64% | Macro F1: 0.3520 | Weighted F1: 0.3346 | Micro F1: 0.3364


Epoch 84: 100%|██████████| 153/153 [00:47<00:00,  3.23it/s, loss=0.0648]


Epoch 84 | Train Loss: 27.8736 | Val Acc: 37.13% | Macro F1: 0.3666 | Weighted F1: 0.3599 | Micro F1: 0.3713


Epoch 85: 100%|██████████| 153/153 [00:46<00:00,  3.29it/s, loss=0.277] 


Epoch 85 | Train Loss: 24.9131 | Val Acc: 34.74% | Macro F1: 0.3672 | Weighted F1: 0.3472 | Micro F1: 0.3474


Epoch 86: 100%|██████████| 153/153 [00:47<00:00,  3.25it/s, loss=0.28]  


Epoch 86 | Train Loss: 29.2309 | Val Acc: 36.03% | Macro F1: 0.3465 | Weighted F1: 0.3376 | Micro F1: 0.3603


Epoch 87: 100%|██████████| 153/153 [00:47<00:00,  3.25it/s, loss=0.2]   


Epoch 87 | Train Loss: 30.0988 | Val Acc: 36.21% | Macro F1: 0.3626 | Weighted F1: 0.3491 | Micro F1: 0.3621


Epoch 88: 100%|██████████| 153/153 [00:47<00:00,  3.25it/s, loss=0.284] 


Epoch 88 | Train Loss: 25.8593 | Val Acc: 34.74% | Macro F1: 0.3532 | Weighted F1: 0.3423 | Micro F1: 0.3474


Epoch 89: 100%|██████████| 153/153 [00:47<00:00,  3.22it/s, loss=0.235] 


Epoch 89 | Train Loss: 29.2105 | Val Acc: 34.93% | Macro F1: 0.3487 | Weighted F1: 0.3405 | Micro F1: 0.3493


Epoch 90: 100%|██████████| 153/153 [00:47<00:00,  3.23it/s, loss=0.138] 


Epoch 90 | Train Loss: 28.0952 | Val Acc: 33.27% | Macro F1: 0.3530 | Weighted F1: 0.3398 | Micro F1: 0.3327


Epoch 91: 100%|██████████| 153/153 [00:46<00:00,  3.31it/s, loss=0.118] 


Epoch 91 | Train Loss: 26.0652 | Val Acc: 34.01% | Macro F1: 0.3559 | Weighted F1: 0.3426 | Micro F1: 0.3401


Epoch 92: 100%|██████████| 153/153 [00:46<00:00,  3.28it/s, loss=0.173] 


Epoch 92 | Train Loss: 26.5692 | Val Acc: 33.64% | Macro F1: 0.3480 | Weighted F1: 0.3321 | Micro F1: 0.3364


Epoch 93: 100%|██████████| 153/153 [00:47<00:00,  3.25it/s, loss=0.107] 


Epoch 93 | Train Loss: 23.1636 | Val Acc: 35.48% | Macro F1: 0.3537 | Weighted F1: 0.3347 | Micro F1: 0.3548


Epoch 94: 100%|██████████| 153/153 [00:47<00:00,  3.23it/s, loss=0.246] 


Epoch 94 | Train Loss: 26.5312 | Val Acc: 33.64% | Macro F1: 0.3682 | Weighted F1: 0.3408 | Micro F1: 0.3364


Epoch 95: 100%|██████████| 153/153 [00:47<00:00,  3.25it/s, loss=0.0394]


Epoch 95 | Train Loss: 28.9197 | Val Acc: 33.82% | Macro F1: 0.3501 | Weighted F1: 0.3366 | Micro F1: 0.3382


Epoch 96: 100%|██████████| 153/153 [00:46<00:00,  3.32it/s, loss=0.109]  


Epoch 96 | Train Loss: 25.4722 | Val Acc: 37.50% | Macro F1: 0.3562 | Weighted F1: 0.3451 | Micro F1: 0.3750


Epoch 97: 100%|██████████| 153/153 [00:46<00:00,  3.27it/s, loss=0.173]  


Epoch 97 | Train Loss: 29.6572 | Val Acc: 32.90% | Macro F1: 0.3378 | Weighted F1: 0.3251 | Micro F1: 0.3290


Epoch 98: 100%|██████████| 153/153 [00:46<00:00,  3.28it/s, loss=0.097] 


Epoch 98 | Train Loss: 24.8375 | Val Acc: 32.90% | Macro F1: 0.3530 | Weighted F1: 0.3335 | Micro F1: 0.3290


Epoch 99: 100%|██████████| 153/153 [00:47<00:00,  3.25it/s, loss=0.177] 


Epoch 99 | Train Loss: 26.0676 | Val Acc: 31.43% | Macro F1: 0.3296 | Weighted F1: 0.3179 | Micro F1: 0.3143


Epoch 100: 100%|██████████| 153/153 [00:46<00:00,  3.30it/s, loss=0.13]  


Epoch 100 | Train Loss: 34.1714 | Val Acc: 35.66% | Macro F1: 0.3596 | Weighted F1: 0.3504 | Micro F1: 0.3566

📊 Classification Report on Validation Set:
              precision    recall  f1-score   support

           0     0.0784    0.0870    0.0825        46
           1     0.2609    0.2571    0.2590        70
           2     0.7931    0.9200    0.8519        25
           3     0.4241    0.4737    0.4475       171
           4     0.3556    0.2133    0.2667        75
           5     0.3361    0.3868    0.3596       106
           6     0.2973    0.2157    0.2500        51

    accuracy                         0.3566       544
   macro avg     0.3636    0.3648    0.3596       544
weighted avg     0.3523    0.3566    0.3504       544

