In [9]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/biblio-management/Bibliometrix data.csv
/kaggle/input/cleaned-train/processed_train.csv
/kaggle/input/cleaned-test/processed_test.csv


In [10]:
train_data = pd.read_csv ("/kaggle/input/cleaned-train/processed_train.csv")
test_data = pd.read_csv ("/kaggle/input/cleaned-test/processed_test.csv")

In [11]:
train_data.info()
test_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 520 entries, 0 to 519
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   ID      520 non-null    object
 1   SO      520 non-null    object
dtypes: object(2)
memory usage: 8.2+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 130 entries, 0 to 129
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   ID      130 non-null    object
 1   SO      130 non-null    object
dtypes: object(2)
memory usage: 2.2+ KB


In [12]:
import random
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from tqdm.notebook import tqdm
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, get_linear_schedule_with_warmup
from sklearn.metrics import f1_score, accuracy_score
from sklearn.model_selection import train_test_split

# Set seeds for reproducibility
seed_val = 808
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)
torch.backends.cudnn.deterministic = True

# Assume training_set DataFrame is loaded and ready
X_train_full = train_data['ID']
y_train_full = train_data['SO']

# Split the training_set into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.2, random_state=808)

# Assume test_set DataFrame is loaded and ready
X_test = test_data['ID']
y_test = test_data['SO']

# Encode Labels
possible_labels = y_train_full.unique()
label_dict = {}
for index, possible_label in enumerate(possible_labels):
    label_dict[possible_label] = index

y_train = y_train.replace(label_dict)
y_val = y_val.replace(label_dict)
y_test = y_test.replace(label_dict)

# Tokenization
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
# Encoding Data
encoded_data_train = tokenizer.batch_encode_plus(X_train.values, add_special_tokens=True, return_attention_mask=True, 
                                                 padding=True, max_length=60, return_tensors='pt')
encoded_data_val = tokenizer.batch_encode_plus(X_val.values, add_special_tokens=True, return_attention_mask=True, 
                                               padding=True, max_length=60, return_tensors='pt')
encoded_data_test = tokenizer.batch_encode_plus(X_test.values, add_special_tokens=True, return_attention_mask=True, 
                                                padding=True, max_length=60, return_tensors='pt')
# Separate Input IDs, Attention Masks, Labels 
input_ids_train = encoded_data_train['input_ids']
attention_masks_train = encoded_data_train['attention_mask']
labels_train = torch.tensor(y_train.values)
input_ids_val = encoded_data_val['input_ids']
attention_masks_val = encoded_data_val['attention_mask']
labels_val = torch.tensor(y_val.values)
input_ids_test = encoded_data_test['input_ids']
attention_masks_test = encoded_data_test['attention_mask']
labels_test = torch.tensor(y_test.values)

# Create Tensor Dataset
dataset_train = TensorDataset(input_ids_train, attention_masks_train, labels_train)
dataset_val = TensorDataset(input_ids_val, attention_masks_val, labels_val)
dataset_test = TensorDataset(input_ids_test, attention_masks_test, labels_test)

# Define the batch size
batch_size = 16

# Create DataLoaders
dataloader_train = DataLoader(dataset_train, sampler=RandomSampler(dataset_train), batch_size=batch_size)
dataloader_val = DataLoader(dataset_val, sampler=SequentialSampler(dataset_val), batch_size=batch_size)
dataloader_test = DataLoader(dataset_test, sampler=SequentialSampler(dataset_test), batch_size=batch_size)

# Load the Pretrained BERT model
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=len(label_dict), output_attentions=False, 
                                                      output_hidden_states=False)

# Create the optimizer
optimizer = AdamW(model.parameters(), lr=1e-5, eps=1e-8)

# Number of training epochs 
epochs = 40

# Total number of training steps
total_steps = len(dataloader_train) * epochs

# Set up the learning rate scheduler
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Defining the function to evaluate the model
def evaluate(dataloader):
    model.eval()
    loss_val_total = 0
    predictions, true_vals = [], []
    for batch in dataloader:
        batch = tuple(b.to(device) for b in batch)
        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                  'labels':         batch[2],}
        with torch.no_grad():        
            outputs = model(**inputs)
        loss = outputs[0]
        logits = outputs[1]
        loss_val_total += loss.item()
        logits = logits.detach().cpu().numpy()
        label_ids = inputs['labels'].cpu().numpy()
        predictions.append(logits)
        true_vals.append(label_ids)

    loss_val_avg = loss_val_total/len(dataloader)
    predictions = np.concatenate(predictions, axis=0)
    true_vals = np.concatenate(true_vals, axis=0)
    return loss_val_avg, predictions, true_vals

best_val_loss = float('inf')
best_val_f1 = -float('inf')
best_val_accuracy = -float('inf')
epochs_no_improve = 0
early_stopping_patience = 5  

# Training loop with early stopping
for epoch in tqdm(range(1, epochs+1)):
    model.train()
    loss_train_total = 0
    progress_bar = tqdm(dataloader_train, desc='Epoch {:1d}'.format(epoch), leave=False, disable=False)

    for batch in progress_bar:
        model.zero_grad()
        batch = tuple(b.to(device) for b in batch)
        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                  'labels':         batch[2],}       

        outputs = model(**inputs)
        loss = outputs[0]
        torch.cuda.empty_cache()
        loss_train_total += loss.item()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()
        progress_bar.set_postfix({'training_loss': '{:.3f}'.format(loss.item()/len(batch))})
        
    tqdm.write(f'\nEpoch {epoch}')
    loss_train_avg = loss_train_total/len(dataloader_train)            
    tqdm.write(f'Training loss: {loss_train_avg}')

    val_loss, predictions, true_vals = evaluate(dataloader_val)
    val_f1 = f1_score(true_vals, np.argmax(predictions, axis=1), average='macro')
    val_accuracy = accuracy_score(true_vals, np.argmax(predictions, axis=1))

    tqdm.write(f'Validation Loss: {val_loss}')
    tqdm.write(f'F1 Score (macro): {val_f1}')
    tqdm.write(f'Validation Accuracy: {val_accuracy}')
    improvement = False
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_valid_loss_weights = model.state_dict().copy()
        torch.save(model.state_dict(), 'best_val_loss_model.pt')
        improvement = True
    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        best_f1_weights = model.state_dict().copy()
        torch.save(model.state_dict(), 'best_val_f1_model.pt')
        improvement = True
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        best_accuracy_weights = model.state_dict().copy()
        torch.save(model.state_dict(), 'best_val_accuracy_model.pt')
        improvement = True
    if improvement:
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1
    if epochs_no_improve >= early_stopping_patience:
        print("Early stopping triggered.")
        break
        
# Evaluation stage on test data for the best model weights based on validation loss, F1 Score, and accuracy
model.load_state_dict(best_valid_loss_weights)
val_loss, predictions_val, true_vals_val = evaluate(dataloader_test)
test_f1_val = f1_score(predictions_val.argmax(axis=1), true_vals_val, average='macro')
test_accuracy_val = accuracy_score(predictions_val.argmax(axis=1), true_vals_val)
print("Model with best validation loss:")
print("Test F1 Score (macro):", test_f1_val)

model.load_state_dict(best_f1_weights)
val_loss, predictions_f1, true_vals_f1 = evaluate(dataloader_test)
test_f1_f1 = f1_score(predictions_f1.argmax(axis=1), true_vals_f1, average='macro')
test_accuracy_f1 = accuracy_score(predictions_f1.argmax(axis=1), true_vals_f1)
print("Model with best F1 Score:")
print("Test F1 Score (macro):", test_f1_f1)

model.load_state_dict(best_accuracy_weights)
val_loss, predictions_accuracy, true_vals_accuracy = evaluate(dataloader_test)
test_f1_accuracy = f1_score(predictions_accuracy.argmax(axis=1), true_vals_accuracy, average='macro')
test_accuracy_accuracy = accuracy_score(predictions_accuracy.argmax(axis=1), true_vals_accuracy)
print("Model with best accuracy:")
print("Test F1 Score (macro):", test_f1_accuracy)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly i

  0%|          | 0/40 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/26 [00:00<?, ?it/s]


Epoch 1
Training loss: 4.7917390419886665
Validation Loss: 4.630096026829311
F1 Score (macro): 0.006371816649952586
Validation Accuracy: 0.14423076923076922


Epoch 2:   0%|          | 0/26 [00:00<?, ?it/s]


Epoch 2
Training loss: 4.5623228549957275
Validation Loss: 4.474183286939349
F1 Score (macro): 0.007495694733979878
Validation Accuracy: 0.15384615384615385


Epoch 3:   0%|          | 0/26 [00:00<?, ?it/s]


Epoch 3
Training loss: 4.413364758858314
Validation Loss: 4.392590182168143
F1 Score (macro): 0.006637806637806638
Validation Accuracy: 0.125


Epoch 4:   0%|          | 0/26 [00:00<?, ?it/s]


Epoch 4
Training loss: 4.305730984761165
Validation Loss: 4.3373101098196845
F1 Score (macro): 0.006953498478922207
Validation Accuracy: 0.14423076923076922


Epoch 5:   0%|          | 0/26 [00:00<?, ?it/s]


Epoch 5
Training loss: 4.219302672606248
Validation Loss: 4.301808016640799
F1 Score (macro): 0.008330652398449009
Validation Accuracy: 0.16346153846153846


Epoch 6:   0%|          | 0/26 [00:00<?, ?it/s]


Epoch 6
Training loss: 4.141314607400161
Validation Loss: 4.293782642909458
F1 Score (macro): 0.008793803380193465
Validation Accuracy: 0.16346153846153846


Epoch 7:   0%|          | 0/26 [00:00<?, ?it/s]


Epoch 7
Training loss: 4.05183219909668
Validation Loss: 4.250149829047067
F1 Score (macro): 0.00871578811951243
Validation Accuracy: 0.16346153846153846


Epoch 8:   0%|          | 0/26 [00:00<?, ?it/s]


Epoch 8
Training loss: 4.01509690284729
Validation Loss: 4.234695775168283
F1 Score (macro): 0.009229616922575113
Validation Accuracy: 0.17307692307692307


Epoch 9:   0%|          | 0/26 [00:00<?, ?it/s]


Epoch 9
Training loss: 3.962422114152175
Validation Loss: 4.197130611964634
F1 Score (macro): 0.009514026463179006
Validation Accuracy: 0.17307692307692307


Epoch 10:   0%|          | 0/26 [00:00<?, ?it/s]


Epoch 10
Training loss: 3.8819489662463846
Validation Loss: 4.202595744814191
F1 Score (macro): 0.009000415780076796
Validation Accuracy: 0.16346153846153846


Epoch 11:   0%|          | 0/26 [00:00<?, ?it/s]


Epoch 11
Training loss: 3.823839517740103
Validation Loss: 4.189733266830444
F1 Score (macro): 0.00965364775239499
Validation Accuracy: 0.17307692307692307


Epoch 12:   0%|          | 0/26 [00:00<?, ?it/s]


Epoch 12
Training loss: 3.771931153077346
Validation Loss: 4.191723108291626
F1 Score (macro): 0.015314519633819797
Validation Accuracy: 0.18269230769230768


Epoch 13:   0%|          | 0/26 [00:00<?, ?it/s]


Epoch 13
Training loss: 3.7096719650121837
Validation Loss: 4.187514577593122
F1 Score (macro): 0.016417414423396476
Validation Accuracy: 0.21153846153846154


Epoch 14:   0%|          | 0/26 [00:00<?, ?it/s]


Epoch 14
Training loss: 3.6849987598565908
Validation Loss: 4.185874700546265
F1 Score (macro): 0.013275613275613277
Validation Accuracy: 0.18269230769230768


Epoch 15:   0%|          | 0/26 [00:00<?, ?it/s]


Epoch 15
Training loss: 3.6178146050526547
Validation Loss: 4.17342403956822
F1 Score (macro): 0.013681776106366741
Validation Accuracy: 0.18269230769230768


Epoch 16:   0%|          | 0/26 [00:00<?, ?it/s]


Epoch 16
Training loss: 3.549061500109159
Validation Loss: 4.178245340074811
F1 Score (macro): 0.016340069353853785
Validation Accuracy: 0.20192307692307693


Epoch 17:   0%|          | 0/26 [00:00<?, ?it/s]


Epoch 17
Training loss: 3.5227262515288134
Validation Loss: 4.189133984701974
F1 Score (macro): 0.016310579172567292
Validation Accuracy: 0.19230769230769232


Epoch 18:   0%|          | 0/26 [00:00<?, ?it/s]


Epoch 18
Training loss: 3.473017903474661
Validation Loss: 4.168622936521258
F1 Score (macro): 0.015002876941315359
Validation Accuracy: 0.20192307692307693


Epoch 19:   0%|          | 0/26 [00:00<?, ?it/s]


Epoch 19
Training loss: 3.4220586006457987
Validation Loss: 4.1614640440259665
F1 Score (macro): 0.014527845036319613
Validation Accuracy: 0.20192307692307693


Epoch 20:   0%|          | 0/26 [00:00<?, ?it/s]


Epoch 20
Training loss: 3.411489560053899
Validation Loss: 4.186472858701434
F1 Score (macro): 0.014145713320935203
Validation Accuracy: 0.18269230769230768


Epoch 21:   0%|          | 0/26 [00:00<?, ?it/s]


Epoch 21
Training loss: 3.35160454419943
Validation Loss: 4.191263096673148
F1 Score (macro): 0.01493182249882401
Validation Accuracy: 0.19230769230769232


Epoch 22:   0%|          | 0/26 [00:00<?, ?it/s]


Epoch 22
Training loss: 3.318536254075857
Validation Loss: 4.176703350884574
F1 Score (macro): 0.01596045197740113
Validation Accuracy: 0.21153846153846154


Epoch 23:   0%|          | 0/26 [00:00<?, ?it/s]


Epoch 23
Training loss: 3.286200312467722
Validation Loss: 4.179150138582502
F1 Score (macro): 0.015266081871345028
Validation Accuracy: 0.20192307692307693


Epoch 24:   0%|          | 0/26 [00:00<?, ?it/s]


Epoch 24
Training loss: 3.2458109122056227
Validation Loss: 4.1627539566584995
F1 Score (macro): 0.015673400673400676
Validation Accuracy: 0.21153846153846154
Early stopping triggered.
Model with best validation loss:
Test Accuracy: 0.2
Test F1 Score (macro): 0.011822876346854763
Model with best F1 Score:
Test Accuracy: 0.2
Test F1 Score (macro): 0.011822876346854763
Model with best accuracy:
Test Accuracy: 0.2
Test F1 Score (macro): 0.011822876346854763
