In [1]:
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import random
import optuna
from optuna.samplers import TPESampler
from sklearn.model_selection import train_test_split
from transformers import BertModel, AutoModel, BertTokenizerFast, AdamW, BertModel, AutoModelForSequenceClassification

import functionality
import importlib
importlib.reload(functionality)
from functionality import *

  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
random_seed = 42
random.seed(random_seed)

torch.manual_seed(random_seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(random_seed)

In [3]:
etl = ETL()

In [None]:
data = etl.loading('./data/etl_data.parquet')
preprocessed_data = Data_Preparation(data).data_transformation()

In [4]:
preprocessed_data = etl.loading('./data/preprocessed_data.parquet')

#Data split
train_data, test_data = train_test_split(preprocessed_data, test_size=0.20, random_state=42, stratify=preprocessed_data['category_encoded'])

train_data, val_data = train_test_split(train_data, test_size=0.2, random_state=42, stratify=train_data['category_encoded'])

In [5]:
train_dataset = CustomTextDataset(train_data['processed_text'], train_data['category_encoded'], tokenizer, max_length=150)
val_dataset = CustomTextDataset(val_data['processed_text'], val_data['category_encoded'], tokenizer, max_length=150)
test_dataset = CustomTextDataset(test_data['processed_text'], test_data['category_encoded'], tokenizer, max_length=150)

**Model Comparison**

In [8]:
train_sampler = sampler(train_data)
train_loader = DataLoader(
    train_dataset, 
    batch_size=32, 
    sampler=train_sampler
)

val_sampler = sampler(val_data)
val_loader = DataLoader(
    val_dataset,
    batch_size = 32,
    sampler = val_sampler
)

test_sampler = sampler(test_data)
test_loader = DataLoader(
    test_dataset, 
    batch_size=32, 
    sampler=test_sampler
)

In [11]:
run = neptune.init_run(
    project="yatskopolina1/News",
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJiNTYzMDJkMi00YThkLTRiYWYtOGU5ZC02MGFiOGEzNjkzYTIifQ==",
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
bert_model = BertModel.from_pretrained("bert-base-uncased")
bert_model.to(device)

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

callbacks = [
    ModelCheckpointCallback(checkpoint_path="model/best_model.pth"),
    EarlyStoppingCallBack(patience=3),
    LoggingCallBack(run=run)
]

criterion = nn.CrossEntropyLoss().to(device)



[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/yatskopolina1/News/e/NEW-32


In [9]:
import os 
bert_embeddings_train, train_labels = [], []
bert_embeddings_test, test_labels = [], []

bert_embeddings_train, train_labels = extract_embeddings(bert_model, train_loader, device)
bert_embeddings_test, test_labels = extract_embeddings(bert_model, test_loader, device)

X_train = bert_embeddings_train.numpy()
y_train = train_labels.numpy()
X_test = bert_embeddings_test.numpy()
y_test = test_labels.numpy()

if not os.path.exists('./data'):
    os.makedirs('./data')

np.save('./data/X_train.npy', X_train)
np.save('./data/y_train.npy', y_train)
np.save('./data/X_test.npy', X_test)
np.save('./data/y_test.npy', y_test)

In [24]:
del bert_embeddings_train
del train_labels
del test_labels
del bert_embeddings_test

**a. K-Nearest Neighbors (KNN)**

In [12]:
X_train = np.load('data/X_train.npy')
y_train = np.load('data/y_train.npy')
X_test = np.load('data/X_test.npy')
y_test = np.load('data/y_test.npy')

In [17]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import f1_score

# KNN Classifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# Evaluate on test set
y_pred = knn.predict(X_test[:100])
print(f'KNN Classifier\nF1 score: {f1_score(y_test[:100], y_pred, average="macro"):.2f}')

KNN Classifier
F1 score: 0.24


**XGBoost**

In [14]:
from xgboost import XGBClassifier


xgb = XGBClassifier(n_jobs=-1)
xgb.fit(X_train, y_train)


y_pred = xgb.predict(X_test)
print(f'XGBoost Classifier\nF1 score: {f1_score(y_test, y_pred, average="macro"):.2f}')

XGBoost Classifier
F1 score: 0.46


In [19]:
del X_test
del X_train
del y_test
del y_train

**Recurrent Neural Network (RNN)**

In [39]:
rnn_model = RNNClassifier(input_dim =768, hidden_dim=128, output_dim=27)
optimizer = optim.AdamW(rnn_model.parameters(), lr=2e-5)

rnn_trained = train_model_with_callbacks(
    model=rnn_model,
    bert_model = bert_model,
    train_loader=train_loader,
    val_loader=val_loader,
    epochs=3,
    optimizer=optimizer,
    criterion = criterion,
    device=device,
    callbacks=callbacks,
    use_bert_embeddings = True
)

Epoch 1/3


Training Epoch: 100%|██████████| 4191/4191 [19:25<00:00,  3.60it/s]


Train Loss: 2.0007, Train F1: 0.5103
Epoch 2/3


Training Epoch: 100%|██████████| 4191/4191 [19:08<00:00,  3.65it/s]


Train Loss: 1.3378, Train F1: 0.6229


Evaluation: 100%|██████████| 1048/1048 [04:36<00:00,  3.79it/s]


Val Loss: 1.4087, Val F1: 0.5913, Val accuracy: 0.5954
Epoch 3/3


Training Epoch: 100%|██████████| 4191/4191 [18:58<00:00,  3.68it/s]


Train Loss: 1.2260, Train F1: 0.6440


In [40]:
avg_loss, f1, accuracy = eval(rnn_trained, bert_model, test_loader, optimizer, criterion, device, use_bert_embeddings=True)
print(f'RNN Classifier\nF1 score: {f1:.2f}\nAverage Loss: {avg_loss:.2f}\nAccuracy: {accuracy:.2f}')

Evaluation: 100%|██████████| 1310/1310 [05:45<00:00,  3.79it/s]

RNN Classifier
F1 score: 0.59
Average Loss: 1.38
Accuracy: 0.60





**Long Short-Term Memory (LSTM)**

In [41]:
lstm_model = LSTM(input_dim=768, hidden_dim=128, output_dim=27)
optimizer = optim.AdamW(lstm_model.parameters(), lr=2e-5)

lstm_trained = train_model_with_callbacks(
    model=lstm_model,
    bert_model = bert_model,
    train_loader=train_loader,
    val_loader=val_loader,
    epochs=3,
    optimizer=optimizer,
    criterion = criterion,
    device=device,
    callbacks=callbacks,
    use_bert_embeddings = True
)

Epoch 1/3


Training Epoch: 100%|██████████| 4191/4191 [18:48<00:00,  3.71it/s]


Train Loss: 1.9481, Train F1: 0.5400
Epoch 2/3


Training Epoch: 100%|██████████| 4191/4191 [18:53<00:00,  3.70it/s]


Train Loss: 1.3341, Train F1: 0.6285


Evaluation: 100%|██████████| 1048/1048 [04:39<00:00,  3.75it/s]


Val Loss: 1.4639, Val F1: 0.5814, Val accuracy: 0.5862
Epoch 3/3


Training Epoch: 100%|██████████| 4191/4191 [19:18<00:00,  3.62it/s]


Train Loss: 1.2409, Train F1: 0.6456


In [42]:
avg_loss, f1, accuracy = eval(lstm_trained, bert_model, test_loader, optimizer, criterion, device, use_bert_embeddings=True)
print(f'LSTM Classifier\nF1 score: {f1:.2f}\nAverage Loss: {avg_loss:.2f}\nAccuracy: {accuracy:.2f}')

Evaluation: 100%|██████████| 1310/1310 [06:08<00:00,  3.56it/s]


LSTM Classifier
F1 score: 0.59
Average Loss: 1.42
Accuracy: 0.59


**LSTM + Attention**

In [44]:
att_lstm_model = AttentionLSTM(input_dim=768, hidden_dim=128, output_dim=27)
optimizer = optim.AdamW(att_lstm_model.parameters(), lr=2e-5) 

att_lstm_trained = train_model_with_callbacks(
    model=att_lstm_model,
    bert_model = bert_model,
    train_loader=train_loader,
    val_loader=val_loader,
    epochs=3,
    optimizer=optimizer,
    criterion = criterion,
    device=device,
    callbacks=callbacks,
    use_bert_embeddings = True
)

Epoch 1/3


Training Epoch:   0%|          | 0/4191 [00:00<?, ?it/s]

Training Epoch: 100%|██████████| 4191/4191 [19:54<00:00,  3.51it/s]


Train Loss: 1.9125, Train F1: 0.5422
Epoch 2/3


Training Epoch: 100%|██████████| 4191/4191 [19:48<00:00,  3.53it/s]


Train Loss: 1.3100, Train F1: 0.6319


Evaluation: 100%|██████████| 1048/1048 [07:24<00:00,  2.36it/s]

Val Loss: 1.4401, Val F1: 0.5869, Val accuracy: 0.5928
Early stopping triggered!





In [45]:
avg_loss, f1, accuracy = eval(att_lstm_trained, bert_model, test_loader, optimizer, criterion, device, use_bert_embeddings=True)
print(f'LSTM Classifier + Attention\nF1 score: {f1:.2f}\nAverage Loss: {avg_loss:.2f}\nAccuracy: {accuracy:.2f}')

Evaluation: 100%|██████████| 1310/1310 [08:57<00:00,  2.44it/s]

LSTM Classifier + Attention
F1 score: 0.59
Average Loss: 1.44
Accuracy: 0.59





**BERT 2-layers for update**

In [19]:
torch.cuda.empty_cache()

In [31]:
bert_classifier = BERT(bert_model)

optimizer = AdamW(bert_classifier.parameters(), lr=2e-5)

bert_classifier_trained = train_model_with_callbacks(
    model=bert_classifier,
    bert_model = bert_model,
    train_loader=train_loader,
    val_loader=val_loader,
    epochs=3,
    optimizer=optimizer,
    criterion = criterion,
    device=device,
    callbacks=callbacks,
    use_bert_embeddings = False
)




Epoch 1/3


Training Epoch: 100%|██████████| 4191/4191 [29:43<00:00,  2.35it/s]


Train Loss: 1.5753, Train F1: 0.5488
Epoch 2/3


Training Epoch: 100%|██████████| 4191/4191 [34:20<00:00,  2.03it/s]  


Train Loss: 1.2433, Train F1: 0.6316


Evaluation: 100%|██████████| 1048/1048 [04:53<00:00,  3.57it/s]


Val Loss: 1.2771, Val F1: 0.6173
Model saved at epoch 2
Epoch 3/3


Training Epoch: 100%|██████████| 4191/4191 [26:03<00:00,  2.68it/s]


Train Loss: 1.1123, Train F1: 0.6655


In [54]:
avg_loss, f1, accuracy = eval(bert_classifier_trained, bert_model, test_loader, optimizer, criterion, device, use_bert_embeddings=False)
print(f'BERT Classifier\nF1 score: {f1:.2f}\nAverage Loss: {avg_loss:.2f}\nAccuracy: {accuracy:.2f}')

Evaluation: 100%|██████████| 1310/1310 [08:49<00:00,  2.47it/s]


BERT Classifier
F1 score: 0.63
Average Loss: 1.29
Accuracy: 0.63


In [38]:
print(f'BERT Classifier\nF1 score: {f1:.2f}\nAverage Loss: {avg_loss:.2f}\nAccuracy: {accuracy:.2f}')

BERT Classifier
F1 score: 0.63
Average Loss: 1.28
Accuracy: 0.64


**BERT 1-layer for update**

In [53]:
bert_classifier = BERT(bert_model)
bert_classifier.set_trainable(model.bert.encoder.layer[23], True)

optimizer = AdamW(bert_classifier.parameters(), lr=2e-5)

bert_classifier_trained = train_model_with_callbacks(
    model=bert_classifier,
    bert_model = bert_model,
    train_loader=train_loader,
    val_loader=val_loader,
    epochs=3,
    optimizer=optimizer,
    criterion = criterion,
    device=device,
    callbacks=callbacks,
    use_bert_embeddings = False
)



Epoch 1/3


Training Epoch: 100%|██████████| 4191/4191 [29:08<00:00,  2.40it/s]


Train Loss: 1.2055, Train F1: 0.6615
Epoch 2/3


Training Epoch: 100%|██████████| 4191/4191 [26:42<00:00,  2.62it/s] 


Train Loss: 0.9989, Train F1: 0.6982


Evaluation: 100%|██████████| 1048/1048 [07:15<00:00,  2.41it/s]

Val Loss: 1.2902, Val F1: 0.6328, Val accuracy: 0.6335
Early stopping triggered!





In [55]:
avg_loss, f1, accuracy = eval(bert_classifier_trained, bert_model, test_loader, optimizer, criterion, device, use_bert_embeddings=False)
print(f'BERT Classifier\nF1 score: {f1:.2f}\nAverage Loss: {avg_loss:.2f}\nAccuracy: {accuracy:.2f}')

Evaluation: 100%|██████████| 1310/1310 [06:10<00:00,  3.54it/s]


BERT Classifier
F1 score: 0.63
Average Loss: 1.29
Accuracy: 0.63


**BERT frozen**

In [69]:
bert_classifier = BERT(bert_model)

optimizer = AdamW(bert_classifier.parameters(), lr=2e-5)

bert_classifier_trained = train_model_with_callbacks(
    model=bert_classifier,
    bert_model = bert_model,
    train_loader=train_loader,
    val_loader=val_loader,
    epochs=3,
    optimizer=optimizer,
    criterion = criterion,
    device=device,
    callbacks=callbacks,
    use_bert_embeddings = False
)

Epoch 1/3


Training Epoch: 100%|██████████| 4191/4191 [28:25<00:00,  2.46it/s]


Train Loss: 3.2469, Train F1: 0.0749
Epoch 2/3


Training Epoch:   1%|          | 49/4191 [00:14<19:50,  3.48it/s]


KeyboardInterrupt: 

**BERT 1-layer + bigger fine-tunning stack**

In [78]:
bert_classifier = BERT_bigger(bert_model)
bert_classifier.unfreeze(start_layer=11, end_layer=11)

callbacks = [
    ModelCheckpointCallback(checkpoint_path="model/best_model.pth"),
    EarlyStoppingCallBack(patience=3),
    LoggingCallBack(run=run)
]

criterion = nn.CrossEntropyLoss().to(device)

optimizer = AdamW(bert_classifier.parameters(), lr=2e-5)

bert_classifier_trained = train_model_with_callbacks(
    model=bert_classifier,
    bert_model = bert_model,
    train_loader=train_loader,
    val_loader=val_loader,
    epochs=1,
    optimizer=optimizer,
    criterion = criterion,
    device=device,
    callbacks=callbacks,
    use_bert_embeddings = False
)



Epoch 1/1


Training Epoch: 100%|██████████| 4191/4191 [34:58<00:00,  2.00it/s]


Train Loss: 2.3501, Train F1: 0.3226


**OPTUNA**

In [6]:
torch.cuda.empty_cache()

In [9]:
study = optuna.create_study(direction="minimize", sampler=TPESampler())
study.optimize(lambda trial: optuna_objective(trial, train_loader, val_loader), n_trials=10)

print(f"Best trial: {study.best_trial.params}")

[I 2025-01-18 12:54:48,065] A new study created in memory with name: no-name-cbf5a811-2455-4b47-ab76-cc226ca4947b


[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/yatskopolina1/News/e/NEW-36
Epoch 1/1


Training Epoch: 100%|██████████| 4191/4191 [58:58<00:00,  1.18it/s]


Train Loss: 2.6548, Train F1: 0.2938


Evaluation: 100%|██████████| 1048/1048 [04:47<00:00,  3.64it/s]
[I 2025-01-18 13:58:38,877] Trial 0 finished with value: 1.9342077596269491 and parameters: {'learning_rate': 1.0778278690714714e-06, 'weight_decay': 0.03765380143498119, 'dropout_rate': 0.433869668575926}. Best is trial 0 with value: 1.9342077596269491.


[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/yatskopolina1/News/e/NEW-37
Epoch 1/1


Training Epoch: 100%|██████████| 4191/4191 [59:24<00:00,  1.18it/s]


Train Loss: 2.4761, Train F1: 0.3629


Evaluation: 100%|██████████| 1048/1048 [04:44<00:00,  3.68it/s]
[I 2025-01-18 15:02:50,854] Trial 1 finished with value: 1.703476466181624 and parameters: {'learning_rate': 1.5040812226764134e-06, 'weight_decay': 0.003130782205994873, 'dropout_rate': 0.411590195045326}. Best is trial 1 with value: 1.703476466181624.


[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/yatskopolina1/News/e/NEW-38
Epoch 1/1


Training Epoch: 100%|██████████| 4191/4191 [59:20<00:00,  1.18it/s]


Train Loss: 1.1188, Train F1: 0.6870


Evaluation: 100%|██████████| 1048/1048 [05:16<00:00,  3.31it/s]
[I 2025-01-18 16:07:30,384] Trial 2 finished with value: 1.2590044013700867 and parameters: {'learning_rate': 3.6665508640094245e-05, 'weight_decay': 0.00024244866042080518, 'dropout_rate': 0.11166825472311373}. Best is trial 2 with value: 1.2590044013700867.


[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/yatskopolina1/News/e/NEW-39
Epoch 1/1


Training Epoch: 100%|██████████| 4191/4191 [59:17<00:00,  1.18it/s]


Train Loss: 1.1758, Train F1: 0.6741


Evaluation: 100%|██████████| 1048/1048 [04:37<00:00,  3.77it/s]
[I 2025-01-18 17:11:29,347] Trial 3 finished with value: 1.270233296597277 and parameters: {'learning_rate': 3.211362714444366e-05, 'weight_decay': 0.021029752068793862, 'dropout_rate': 0.4149551475758383}. Best is trial 2 with value: 1.2590044013700867.


[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/yatskopolina1/News/e/NEW-40
Epoch 1/1


Training Epoch: 100%|██████████| 4191/4191 [58:15<00:00,  1.20it/s]  


Train Loss: 1.1690, Train F1: 0.6760


Evaluation: 100%|██████████| 1048/1048 [05:18<00:00,  3.29it/s]
[I 2025-01-18 18:15:07,016] Trial 4 finished with value: 1.2624036491997825 and parameters: {'learning_rate': 3.057552191742919e-05, 'weight_decay': 0.07971362978433515, 'dropout_rate': 0.41212995838583477}. Best is trial 2 with value: 1.2590044013700867.


[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/yatskopolina1/News/e/NEW-41
Epoch 1/1


Training Epoch: 100%|██████████| 4191/4191 [1:00:44<00:00,  1.15it/s]


Train Loss: 1.4062, Train F1: 0.6159


Evaluation: 100%|██████████| 1048/1048 [06:01<00:00,  2.90it/s]
[I 2025-01-18 19:21:55,786] Trial 5 finished with value: 1.2456107381873458 and parameters: {'learning_rate': 1.2089835241469762e-05, 'weight_decay': 1.4888046850151236e-05, 'dropout_rate': 0.46104245247032394}. Best is trial 5 with value: 1.2456107381873458.


[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/yatskopolina1/News/e/NEW-42
Epoch 1/1


Training Epoch:   1%|          | 33/4191 [00:36<1:14:56,  1.08s/it]



Training Epoch:   1%|          | 34/4191 [45:16<929:21:08, 804.83s/it]



Training Epoch:   1%|          | 36/4191 [45:23<457:06:03, 396.04s/it]

[neptune] [info   ] Communication with Neptune restored!
[neptune] [info   ] Communication with Neptune restored!
[neptune] [info   ] Communication with Neptune restored!
[neptune] [info   ] Communication with Neptune restored!
[neptune] [info   ] Communication with Neptune restored!
[neptune] [info   ] Communication with Neptune restored!


Training Epoch: 100%|██████████| 4191/4191 [1:45:33<00:00,  1.51s/it] 


Train Loss: 2.0431, Train F1: 0.4681


Evaluation: 100%|██████████| 1048/1048 [04:36<00:00,  3.79it/s]
[I 2025-01-18 21:12:08,094] Trial 6 finished with value: 1.4315670489241148 and parameters: {'learning_rate': 2.3599931967444015e-06, 'weight_decay': 0.06762422374072875, 'dropout_rate': 0.22689442529323536}. Best is trial 5 with value: 1.2456107381873458.


[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/yatskopolina1/News/e/NEW-43
Epoch 1/1


Training Epoch: 100%|██████████| 4191/4191 [57:24<00:00,  1.22it/s]


Train Loss: 1.1159, Train F1: 0.6873


Evaluation: 100%|██████████| 1048/1048 [04:36<00:00,  3.79it/s]
[I 2025-01-18 22:14:12,842] Trial 7 finished with value: 1.3310520941860804 and parameters: {'learning_rate': 6.992891811529071e-05, 'weight_decay': 5.4497477349196754e-05, 'dropout_rate': 0.28831782250404064}. Best is trial 5 with value: 1.2456107381873458.


[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/yatskopolina1/News/e/NEW-44
Epoch 1/1


Training Epoch: 100%|██████████| 4191/4191 [57:26<00:00,  1.22it/s]


Train Loss: 1.1362, Train F1: 0.6808


Evaluation: 100%|██████████| 1048/1048 [04:36<00:00,  3.78it/s]
[I 2025-01-18 23:16:19,063] Trial 8 finished with value: 1.3484378137433801 and parameters: {'learning_rate': 8.828357528153784e-05, 'weight_decay': 0.0001866218001581195, 'dropout_rate': 0.17996625435672559}. Best is trial 5 with value: 1.2456107381873458.


[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/yatskopolina1/News/e/NEW-45
Epoch 1/1


Training Epoch: 100%|██████████| 4191/4191 [57:28<00:00,  1.22it/s]


Train Loss: 2.2090, Train F1: 0.4320


Evaluation: 100%|██████████| 1048/1048 [04:38<00:00,  3.76it/s]
[I 2025-01-19 00:18:28,322] Trial 9 finished with value: 1.5182210913368763 and parameters: {'learning_rate': 1.9225309651194768e-06, 'weight_decay': 3.5292661192855804e-05, 'dropout_rate': 0.230778092498943}. Best is trial 5 with value: 1.2456107381873458.


Best trial: {'learning_rate': 1.2089835241469762e-05, 'weight_decay': 1.4888046850151236e-05, 'dropout_rate': 0.46104245247032394}


In [None]:
learning_rate = 1.2089835241469762e-05
weight_decay = 1.4888046850151236e-05
dropout_rate = 0.46104245247032394
EPOCHS = 4
BETAS = (0.9, 0.999)
EPS = 1e-8

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
bert_model = BertModel.from_pretrained("bert-base-uncased")
model = BERT(bert_model, num_classes=27, dropout_rate=dropout_rate).to(device)

bert_identifiers = ['embedding', 'encoder', 'pooler']
no_weight_decay_identifiers = ['bias', 'LayerNorm.weight']

grouped_model_parameters = [

    {'params': [param for name, param in model.named_parameters()
                if any(identifier in name for identifier in bert_identifiers) and
                not any(identifier_ in name for identifier_ in no_weight_decay_identifiers)],
        'lr': learning_rate,
        'betas': BETAS,
        'weight_decay': weight_decay,
        'eps': EPS},

    {'params': [param for name, param in model.named_parameters()
                if any(identifier in name for identifier in bert_identifiers) and
                any(identifier_ in name for identifier_ in no_weight_decay_identifiers)],
        'lr': learning_rate,
        'betas': BETAS,
        'weight_decay': 0.0,
        'eps': EPS},

    {'params': [param for name, param in model.named_parameters()
                if not any(identifier in name for identifier in bert_identifiers)],
        'lr': learning_rate,
        'betas': BETAS,
        'weight_decay': 0.0,
        'eps': EPS}
]

optimizer = torch.optim.AdamW(grouped_model_parameters)

total_steps = len(train_loader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=0,
    num_training_steps=total_steps
)


criterion = nn.CrossEntropyLoss().to(device)

run = neptune.init_run(
    project="yatskopolina1/News",
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJiNTYzMDJkMi00YThkLTRiYWYtOGU5ZC02MGFiOGEzNjkzYTIifQ==",
)

callbacks = [
    ModelCheckpointCallback(checkpoint_path="model/best_model.pth"),
    EarlyStoppingCallBack(patience=3),
    LoggingCallBack(run=run)
]

trained_model = train_model_with_callbacks(
        model=model,
        bert_model=bert_model,
        train_loader=train_loader,
        val_loader=val_loader,
        epochs=EPOCHS,
        optimizer=optimizer,
        criterion=criterion,
        device=device,
        callbacks=callbacks,
        use_bert_embeddings=False
    )

[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/yatskopolina1/News/e/NEW-46
Epoch 1/4


Training Epoch:   4%|▎         | 154/4191 [02:06<55:17,  1.22it/s]

In [None]:
avg_loss, f1, accuracy = eval(trained_model, bert_model, test_loader, optimizer, criterion, device, use_bert_embeddings=False)
print(f'BERT Classifier\nF1 score: {f1:.2f}\nAverage Loss: {avg_loss:.2f}\nAccuracy: {accuracy:.2f}')

In [None]:
learning_rate = 8.828357528153784e-05
weight_decay = 0.0001866218001581195
dropout_rate = 0.17996625435672559
EPOCHS = 4
BETAS = (0.9, 0.999)
EPS = 1e-8

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
bert_model = BertModel.from_pretrained("bert-base-uncased")
model = BERT(bert_model, num_classes=27, dropout_rate=dropout_rate).to(device)

bert_identifiers = ['embedding', 'encoder', 'pooler']
no_weight_decay_identifiers = ['bias', 'LayerNorm.weight']

grouped_model_parameters = [

    {'params': [param for name, param in model.named_parameters()
                if any(identifier in name for identifier in bert_identifiers) and
                not any(identifier_ in name for identifier_ in no_weight_decay_identifiers)],
        'lr': learning_rate,
        'betas': BETAS,
        'weight_decay': weight_decay,
        'eps': EPS},

    {'params': [param for name, param in model.named_parameters()
                if any(identifier in name for identifier in bert_identifiers) and
                any(identifier_ in name for identifier_ in no_weight_decay_identifiers)],
        'lr': learning_rate,
        'betas': BETAS,
        'weight_decay': 0.0,
        'eps': EPS},

    {'params': [param for name, param in model.named_parameters()
                if not any(identifier in name for identifier in bert_identifiers)],
        'lr': learning_rate,
        'betas': BETAS,
        'weight_decay': 0.0,
        'eps': EPS}
]

optimizer = torch.optim.AdamW(grouped_model_parameters)

total_steps = len(train_loader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=0,
    num_training_steps=total_steps
)


criterion = nn.CrossEntropyLoss().to(device)

run = neptune.init_run(
    project="yatskopolina1/News",
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJiNTYzMDJkMi00YThkLTRiYWYtOGU5ZC02MGFiOGEzNjkzYTIifQ==",
)

callbacks = [
    ModelCheckpointCallback(checkpoint_path="model/best_model.pth"),
    EarlyStoppingCallBack(patience=3),
    LoggingCallBack(run=run)
]

trained_model = train_model_with_callbacks(
        model=model,
        bert_model=bert_model,
        train_loader=train_loader,
        val_loader=val_loader,
        epochs=EPOCHS,
        optimizer=optimizer,
        criterion=criterion,
        device=device,
        callbacks=callbacks,
        use_bert_embeddings=False
    )

In [None]:
avg_loss, f1, accuracy = eval(trained_model, bert_model, test_loader, optimizer, criterion, device, use_bert_embeddings=False)
print(f'BERT Classifier\nF1 score: {f1:.2f}\nAverage Loss: {avg_loss:.2f}\nAccuracy: {accuracy:.2f}')