In [1]:
import torch 
import json
import numpy as np
import pandas as pd
import custom_utils

device = "cuda" if torch.cuda.is_available() else "cpu"
torch.set_default_device("cuda")

In [2]:
from sklearn.model_selection import train_test_split

# read
sentences, speakers, labels = custom_utils.read_data("training", "training_labels.json")

# split
df = pd.DataFrame({"sentences" : sentences, "speakers" : speakers, "labels" : labels})

train, test = train_test_split(df, test_size=0.2, random_state=69, stratify=df.labels)

train, valid = train_test_split(train, test_size=0.3, random_state=69, stratify=train.labels)


print(f"Train: {len(train)}\nTest: {len(test)}\nValid: {len(valid)}")
train.head()

Train: 40668
Test: 14525
Valid: 17430


Unnamed: 0,sentences,speakers,labels
70575,Yeah I think it should be a little distinct fr...,PM,0
19197,"Okay , my turn .",ID,0
32883,but when we want a scrolling wheel w we also n...,PM,1
66445,Yeah .,UI,0
67299,and it still looks very fancy .,PM,0


In [3]:
from sentence_transformers import SentenceTransformer

# hot encoder for speakers
switcher = {
    "PM" : [1,0,0,0],
    "ME" : [0,1,0,0],
    "UI" : [0,0,1,0],
    "ID" : [0,0,0,1]
}

# embed
bert = SentenceTransformer('all-MiniLM-L6-v2')

train_sentences = bert.encode(train['sentences'].to_numpy(), convert_to_tensor=True, show_progress_bar=True).to(device)
train_speaker = torch.Tensor([switcher[el] for el in train['speakers']]).to(device)
train_len = torch.Tensor([[len(sentence.split())] for sentence in train['sentences']]).to(device)
train_X = torch.cat((train_sentences, train_speaker, train_len), dim=1)
train_y = torch.tensor(train['labels'].to_numpy())

valid_sentences = bert.encode(valid['sentences'].to_numpy(), convert_to_tensor=True, show_progress_bar=True).to(device)
valid_speaker = torch.Tensor([switcher[el] for el in valid['speakers']]).to(device)
valid_len = torch.Tensor([[len(sentence.split())] for sentence in valid['sentences']]).to(device)
valid_X = torch.cat((valid_sentences, valid_speaker, valid_len), dim=1)
valid_y = torch.tensor(valid['labels'].to_numpy())

test_sentences = bert.encode(test['sentences'].to_numpy(), convert_to_tensor=True, show_progress_bar=True).to(device)
test_speaker = torch.Tensor([switcher[el] for el in test['speakers']]).to(device)
test_len = torch.Tensor([[len(sentence.split())] for sentence in test['sentences']]).to(device)
test_X = torch.cat((test_sentences, test_speaker, test_len), dim=1)
test_y = torch.tensor(test['labels'].to_numpy())

  from .autonotebook import tqdm as notebook_tqdm
Batches: 100%|██████████| 1271/1271 [00:29<00:00, 42.92it/s]
Batches: 100%|██████████| 545/545 [00:11<00:00, 49.38it/s]
Batches: 100%|██████████| 454/454 [00:19<00:00, 22.86it/s]


In [13]:
from torch.utils.data import TensorDataset, DataLoader

def data_loader(batch_size):
    # create tensor datasets
    trainset = TensorDataset((train_X).to(device), (train_y).to(device))
    validset = TensorDataset((valid_X).to(device), (valid_y).to(device))
    testset = TensorDataset((test_X).to(device), (test_y).to(device))

    # create dataloaders
    train_loader = DataLoader(trainset, shuffle=True, batch_size=batch_size, generator=torch.Generator(device=device))
    valid_loader = DataLoader(validset, shuffle=True, batch_size=batch_size, generator=torch.Generator(device=device))
    test_loader = DataLoader(testset, shuffle=True, batch_size=batch_size, generator=torch.Generator(device=device))
    
    return train_loader, valid_loader, test_loader

In [14]:
def MLP(params):
    # Model
    n_layers = params['n_layers']
    layers = []

    in_features = params['input_size']
    for i in range(n_layers):
        out_features = params[f'n_{i}_size']
        layers.append(torch.nn.Linear(in_features, out_features))
        layers.append(torch.nn.ReLU())
        
        # suggest dropout
        p = params['n_p']
        layers.append(torch.nn.Dropout(p))

        # updating next layer size
        in_features = out_features
        
    layers.append(torch.nn.Linear(in_features, params['output_size']))
    model = torch.nn.Sequential(*layers)
    return model

In [12]:
# from sklearn.utils.class_weight import compute_class_weight

# class_weights = compute_class_weight('balanced', classes=np.unique(train['labels'].to_numpy()), y=train['labels'].to_numpy())

# criterion = torch.nn.CrossEntropyLoss(weight=torch.tensor(class_weights).float()) 


In [6]:
import optuna
from torchmetrics.classification import F1Score
from sklearn.utils.class_weight import compute_class_weight

def train_MLP(trial):
    
    params = {
        "n_layers" : trial.suggest_int("n_layers", 3, 7),
        "input_size" : int(train_X.shape[1]),
        "output_size" : 2,
        "n_p" : trial.suggest_float("n_p", 0.4, 0.8),
        "lr" : trial.suggest_float("lr", 1e-4, 1e-3),
        "weight_decay" : trial.suggest_float("weight_decay", 1e-5, 1e-4),
        "batch_size" : trial.suggest_int("batch_size", 400, 600)
    }
    for i in range(trial.params["n_layers"]):
        params[f"n_{i}_size"] = trial.suggest_int(f"n_{i}_size", 200, 800)

    
    model = MLP(params)
    class_weights = compute_class_weight('balanced', classes=np.unique(train['labels'].to_numpy()), y=train['labels'].to_numpy())
    criterion = torch.nn.CrossEntropyLoss(weight=torch.tensor(class_weights).float()) 
    # criterion = torch.nn.CrossEntropyLoss() 
    optimizer = torch.optim.Adam(model.parameters(), lr=params["lr"], weight_decay=params["weight_decay"])
    f1 = F1Score(task='binary', num_classes=params['output_size']).to(device)

    train_loader, valid_loader, test_loader = data_loader(params["batch_size"])
    
    n_epochs = 20
    it = 0
    hst_train_loss = [] 
    hst_valid_loss = []
    hst_f1_score = []

    best_valid_loss = float("inf")
    patience = 10
    for epoch in range(n_epochs):
        if patience == 0: break
        for samples, labels in train_loader:
            if patience == 0: break
            it += 1

            # train step
            model.train()
            optimizer.zero_grad()
            out = model(samples)
            loss = criterion(out, labels)
            loss.backward()
            optimizer.step()
            

            if it % 100 == 0:
                model.eval()

                train_loss = loss.cpu().detach().numpy() / 1
                valid_loss = 0
                f1_score = 0
                for samples, labels in valid_loader:
                    out = model(samples)
                    loss = criterion(out, labels)
                    valid_loss += loss.cpu().detach().numpy() / len(valid_loader)
                    f1_score += f1(labels, out.argmax(dim=1)).cpu().detach().numpy() / len(valid_loader)
                
                # early stopping
                if valid_loss < best_valid_loss:
                    best_valid_loss = valid_loss
                    best_weights = model.state_dict()
                    patience = 10
                else:
                    patience -= 1 
                
                hst_train_loss.append(train_loss)
                hst_valid_loss.append(valid_loss)
                hst_f1_score.append(f1_score)

                print('Iter: {} | Train Loss: {} | Val Loss: {} | F1-score: {}'.format(it, train_loss, valid_loss, f1_score))
    
    # objective function criterion
    combined = sorted(zip(hst_valid_loss, hst_f1_score), key=lambda x : x[0])
    _, scores = zip(*combined)
    qtd = 3
    final_score = sum(scores[:qtd]) / qtd

    torch.save(best_weights, f"models/mlp_{trial.number}.pt")
    results = {
        "score" : final_score,
        "params" : params,
        "valid_loss" : hst_valid_loss,
        "train_loss" : hst_train_loss,
        "f1_score" : hst_f1_score, 
    }
    json.dump(results, open(f"models/mlp_results_{trial.number}.json", "w"))

    return final_score

In [7]:
study = optuna.create_study(direction='maximize')
study.optimize(train_MLP, n_trials=500)

[I 2023-12-04 04:20:51,893] A new study created in memory with name: no-name-ef86ba72-dd20-4c8c-9e04-48d7bed8f249


Iter: 100 | Train Loss: 0.36012235283851624 | Val Loss: 0.3319387937846937 | F1-score: 0.43481223206771036
Iter: 200 | Train Loss: 0.3128753900527954 | Val Loss: 0.3253439374660191 | F1-score: 0.46063435940366043
Iter: 300 | Train Loss: 0.29471731185913086 | Val Loss: 0.32281238860205597 | F1-score: 0.4495595034800079
Iter: 400 | Train Loss: 0.28942471742630005 | Val Loss: 0.32024350291804266 | F1-score: 0.5261493419346057
Iter: 500 | Train Loss: 0.3100268244743347 | Val Loss: 0.32028789504578237 | F1-score: 0.5242662116100913
Iter: 600 | Train Loss: 0.3141099214553833 | Val Loss: 0.31767960833875764 | F1-score: 0.48578170804600956
Iter: 700 | Train Loss: 0.3051414489746094 | Val Loss: 0.31737125233600005 | F1-score: 0.5308804135573537
Iter: 800 | Train Loss: 0.3383527398109436 | Val Loss: 0.31764967818009215 | F1-score: 0.5351550751610806
Iter: 900 | Train Loss: 0.29451027512550354 | Val Loss: 0.3179136436236532 | F1-score: 0.48060188167973583
Iter: 1000 | Train Loss: 0.30503761768341

[I 2023-12-04 04:21:13,925] Trial 0 finished with value: 0.526072474948147 and parameters: {'n_layers': 4, 'n_p': 0.6130724483887233, 'lr': 0.0005326806903942525, 'weight_decay': 1.9472376819179503e-05, 'batch_size': 465, 'n_0_size': 262, 'n_1_size': 476, 'n_2_size': 734, 'n_3_size': 406}. Best is trial 0 with value: 0.526072474948147.


Iter: 100 | Train Loss: 0.3177231252193451 | Val Loss: 0.3285485692322254 | F1-score: 0.3658629674464464
Iter: 200 | Train Loss: 0.3238745927810669 | Val Loss: 0.3248340729624033 | F1-score: 0.46205563098192215
Iter: 300 | Train Loss: 0.3223522901535034 | Val Loss: 0.3208871800452471 | F1-score: 0.4348042318597436
Iter: 400 | Train Loss: 0.29421064257621765 | Val Loss: 0.3199586998671293 | F1-score: 0.41932765301316977
Iter: 500 | Train Loss: 0.31475672125816345 | Val Loss: 0.3177069555968046 | F1-score: 0.4992232220247388
Iter: 600 | Train Loss: 0.34188562631607056 | Val Loss: 0.317690746858716 | F1-score: 0.5278472304344177
Iter: 700 | Train Loss: 0.2924685776233673 | Val Loss: 0.3164311954751611 | F1-score: 0.519834122620523
Iter: 800 | Train Loss: 0.3139229714870453 | Val Loss: 0.3164797741919756 | F1-score: 0.5457660825923085
Iter: 900 | Train Loss: 0.33876001834869385 | Val Loss: 0.3178023360669613 | F1-score: 0.556452208198607
Iter: 1000 | Train Loss: 0.2849796712398529 | Val Lo

[I 2023-12-04 04:21:39,015] Trial 1 finished with value: 0.5311491452157497 and parameters: {'n_layers': 5, 'n_p': 0.47246377406868323, 'lr': 0.0004275749981812202, 'weight_decay': 6.54390352066903e-05, 'batch_size': 546, 'n_0_size': 644, 'n_1_size': 292, 'n_2_size': 770, 'n_3_size': 504, 'n_4_size': 629}. Best is trial 1 with value: 0.5311491452157497.


Iter: 1500 | Train Loss: 0.27124524116516113 | Val Loss: 0.32667103223502636 | F1-score: 0.5370489470660686
Iter: 100 | Train Loss: 0.3056701421737671 | Val Loss: 0.3290070174513636 | F1-score: 0.44943404439333323
Iter: 200 | Train Loss: 0.3406127095222473 | Val Loss: 0.3244889314110215 | F1-score: 0.4157447106129414
Iter: 300 | Train Loss: 0.3658142685890198 | Val Loss: 0.3219162217668586 | F1-score: 0.45671174332902253
Iter: 400 | Train Loss: 0.31700652837753296 | Val Loss: 0.32027697724265025 | F1-score: 0.5262584919865066
Iter: 500 | Train Loss: 0.33127617835998535 | Val Loss: 0.31942362479261455 | F1-score: 0.5252802436416214
Iter: 600 | Train Loss: 0.3290652930736542 | Val Loss: 0.3185450708543931 | F1-score: 0.5462843977116251
Iter: 700 | Train Loss: 0.28788450360298157 | Val Loss: 0.3173508853525728 | F1-score: 0.5381506794207804
Iter: 800 | Train Loss: 0.27533718943595886 | Val Loss: 0.31709613993361196 | F1-score: 0.5229228445001551
Iter: 900 | Train Loss: 0.31597191095352173

[I 2023-12-04 04:22:07,345] Trial 2 finished with value: 0.5220779610109759 and parameters: {'n_layers': 4, 'n_p': 0.5639652928042218, 'lr': 0.0004571559304121832, 'weight_decay': 9.835686880557039e-05, 'batch_size': 476, 'n_0_size': 564, 'n_1_size': 344, 'n_2_size': 702, 'n_3_size': 234}. Best is trial 1 with value: 0.5311491452157497.


Iter: 1700 | Train Loss: 0.2792925238609314 | Val Loss: 0.3259388506412507 | F1-score: 0.4254783230858882
Iter: 100 | Train Loss: 0.38197845220565796 | Val Loss: 0.33137338490862583 | F1-score: 0.31765683073746537
Iter: 200 | Train Loss: 0.3190828561782837 | Val Loss: 0.32542076079468985 | F1-score: 0.4398550171601146
Iter: 300 | Train Loss: 0.3536498546600342 | Val Loss: 0.3240880087802286 | F1-score: 0.4681240822139539
Iter: 400 | Train Loss: 0.2960451543331146 | Val Loss: 0.32018251089673294 | F1-score: 0.5250719848432039
Iter: 500 | Train Loss: 0.3480081260204315 | Val Loss: 0.3191748975138915 | F1-score: 0.46058803796768183
Iter: 600 | Train Loss: 0.32987678050994873 | Val Loss: 0.3242669082001636 | F1-score: 0.3529970571398734
Iter: 700 | Train Loss: 0.32844996452331543 | Val Loss: 0.31863063103274303 | F1-score: 0.46874354858147466
Iter: 800 | Train Loss: 0.2845161259174347 | Val Loss: 0.3174322059279994 | F1-score: 0.4686481560531414
Iter: 900 | Train Loss: 0.30749571323394775 

[I 2023-12-04 04:22:32,576] Trial 3 finished with value: 0.5065753211577734 and parameters: {'n_layers': 6, 'n_p': 0.5170328312049456, 'lr': 0.0005525788700125892, 'weight_decay': 3.8381533529929835e-05, 'batch_size': 459, 'n_0_size': 200, 'n_1_size': 518, 'n_2_size': 305, 'n_3_size': 362, 'n_4_size': 492, 'n_5_size': 738}. Best is trial 1 with value: 0.5311491452157497.


Iter: 100 | Train Loss: 0.313728392124176 | Val Loss: 0.32763051535144 | F1-score: 0.42209292541850696
Iter: 200 | Train Loss: 0.32531455159187317 | Val Loss: 0.3272092784896042 | F1-score: 0.3232720458146297
Iter: 300 | Train Loss: 0.3225151300430298 | Val Loss: 0.3269605817216815 | F1-score: 0.3116848152695279
Iter: 400 | Train Loss: 0.27113303542137146 | Val Loss: 0.3304395865310323 | F1-score: 0.35516858281511243
Iter: 500 | Train Loss: 0.32022762298583984 | Val Loss: 0.3207049776207316 | F1-score: 0.4518768751260006
Iter: 600 | Train Loss: 0.31971603631973267 | Val Loss: 0.3174919061588519 | F1-score: 0.5477107176274966
Iter: 700 | Train Loss: 0.2989861071109772 | Val Loss: 0.3185915513472123 | F1-score: 0.49918922420704004
Iter: 800 | Train Loss: 0.33260512351989746 | Val Loss: 0.3197030921777089 | F1-score: 0.5025034519759092
Iter: 900 | Train Loss: 0.34826067090034485 | Val Loss: 0.3150547778967655 | F1-score: 0.5572954154375829
Iter: 1000 | Train Loss: 0.30290088057518005 | Va

[I 2023-12-04 04:23:00,034] Trial 4 finished with value: 0.5347317857573731 and parameters: {'n_layers': 7, 'n_p': 0.5453180354632154, 'lr': 0.0008978117009177701, 'weight_decay': 4.65490842053666e-05, 'batch_size': 544, 'n_0_size': 246, 'n_1_size': 368, 'n_2_size': 292, 'n_3_size': 739, 'n_4_size': 481, 'n_5_size': 499, 'n_6_size': 435}. Best is trial 4 with value: 0.5347317857573731.


Iter: 1500 | Train Loss: 0.2752302289009094 | Val Loss: 0.31894408030943433 | F1-score: 0.5610274004213738
Iter: 100 | Train Loss: 0.37394794821739197 | Val Loss: 0.3426791039796977 | F1-score: 0.007915139580384279
Iter: 200 | Train Loss: 0.3251515328884125 | Val Loss: 0.33218287504636324 | F1-score: 0.4475400409637353
Iter: 300 | Train Loss: 0.3579639792442322 | Val Loss: 0.3289546958911113 | F1-score: 0.45501208152526484
Iter: 400 | Train Loss: 0.3528248071670532 | Val Loss: 0.3274355194507501 | F1-score: 0.46185328333805775
Iter: 500 | Train Loss: 0.3370353579521179 | Val Loss: 0.3265674679707258 | F1-score: 0.5162761494135247
Iter: 600 | Train Loss: 0.3509226441383362 | Val Loss: 0.3239764578831501 | F1-score: 0.5086703109435546
Iter: 700 | Train Loss: 0.37057268619537354 | Val Loss: 0.3238557806381812 | F1-score: 0.522389680147171
Iter: 800 | Train Loss: 0.30169278383255005 | Val Loss: 0.32242449965232467 | F1-score: 0.5089870851773483
Iter: 900 | Train Loss: 0.3638790547847748 | 

[I 2023-12-04 04:23:28,088] Trial 5 finished with value: 0.5065378561998025 and parameters: {'n_layers': 5, 'n_p': 0.6025620934064149, 'lr': 0.00013837783407924937, 'weight_decay': 4.5684200423156565e-05, 'batch_size': 452, 'n_0_size': 792, 'n_1_size': 589, 'n_2_size': 206, 'n_3_size': 652, 'n_4_size': 209}. Best is trial 4 with value: 0.5347317857573731.


Iter: 1800 | Train Loss: 0.2944789528846741 | Val Loss: 0.31794334680606157 | F1-score: 0.5459363934321283
Iter: 100 | Train Loss: 0.3511278033256531 | Val Loss: 0.3288526296615601 | F1-score: 0.3731308003266653
Iter: 200 | Train Loss: 0.3265257179737091 | Val Loss: 0.3219054788351059 | F1-score: 0.4978406806786856
Iter: 300 | Train Loss: 0.3259563744068146 | Val Loss: 0.32375970979531604 | F1-score: 0.42099516689777383
Iter: 400 | Train Loss: 0.3151002526283264 | Val Loss: 0.32037148674329124 | F1-score: 0.511775878071785
Iter: 500 | Train Loss: 0.2977781295776367 | Val Loss: 0.32023540039857235 | F1-score: 0.5508445660273233
Iter: 600 | Train Loss: 0.31873321533203125 | Val Loss: 0.31684859196345005 | F1-score: 0.5232881118853887
Iter: 700 | Train Loss: 0.3402663767337799 | Val Loss: 0.3160031865040462 | F1-score: 0.5193877458572388
Iter: 800 | Train Loss: 0.3312075734138489 | Val Loss: 0.3177418172359466 | F1-score: 0.5561690221230188
Iter: 900 | Train Loss: 0.29906758666038513 | Va

[I 2023-12-04 04:23:56,496] Trial 6 finished with value: 0.5281327585379283 and parameters: {'n_layers': 4, 'n_p': 0.6584242644117335, 'lr': 0.0007901233341788291, 'weight_decay': 5.1638869876167445e-05, 'batch_size': 593, 'n_0_size': 533, 'n_1_size': 486, 'n_2_size': 699, 'n_3_size': 684}. Best is trial 4 with value: 0.5347317857573731.


Iter: 100 | Train Loss: 0.3505965769290924 | Val Loss: 0.32809797269957414 | F1-score: 0.23491416743823465
Iter: 200 | Train Loss: 0.3724530041217804 | Val Loss: 0.3238323560782842 | F1-score: 0.44709636058126173
Iter: 300 | Train Loss: 0.3227882981300354 | Val Loss: 0.3215429987226214 | F1-score: 0.5213304604802812
Iter: 400 | Train Loss: 0.33775994181632996 | Val Loss: 0.31920358708926616 | F1-score: 0.5231357157230377
Iter: 500 | Train Loss: 0.3222169280052185 | Val Loss: 0.31975000585828506 | F1-score: 0.5382101391042982
Iter: 600 | Train Loss: 0.2843838632106781 | Val Loss: 0.3182240699018752 | F1-score: 0.5197438197476524
Iter: 700 | Train Loss: 0.33718064427375793 | Val Loss: 0.3175196383680616 | F1-score: 0.4911918171814511
Iter: 800 | Train Loss: 0.3161628544330597 | Val Loss: 0.319003176689148 | F1-score: 0.5273530857903617
Iter: 900 | Train Loss: 0.32652604579925537 | Val Loss: 0.3178353582109724 | F1-score: 0.5404601582459041
Iter: 1000 | Train Loss: 0.2999943792819977 | Va

[I 2023-12-04 04:24:25,106] Trial 7 finished with value: 0.5231527271724883 and parameters: {'n_layers': 5, 'n_p': 0.624336368246498, 'lr': 0.0007808208607142803, 'weight_decay': 4.179932673722938e-05, 'batch_size': 505, 'n_0_size': 491, 'n_1_size': 326, 'n_2_size': 446, 'n_3_size': 277, 'n_4_size': 710}. Best is trial 4 with value: 0.5347317857573731.


Iter: 100 | Train Loss: 0.34748050570487976 | Val Loss: 0.32665016551812487 | F1-score: 0.45576997399330144
Iter: 200 | Train Loss: 0.31743553280830383 | Val Loss: 0.32351248264312743 | F1-score: 0.4789157330989837
Iter: 300 | Train Loss: 0.3316381871700287 | Val Loss: 0.32144168814023333 | F1-score: 0.48898784617582963
Iter: 400 | Train Loss: 0.33803310990333557 | Val Loss: 0.31936063269774123 | F1-score: 0.4566892474889756
Iter: 500 | Train Loss: 0.29527679085731506 | Val Loss: 0.32111096978187564 | F1-score: 0.4562829256057739
Iter: 600 | Train Loss: 0.3017221987247467 | Val Loss: 0.31841055949529007 | F1-score: 0.4367248664299648
Iter: 700 | Train Loss: 0.32889190316200256 | Val Loss: 0.32041359345118203 | F1-score: 0.43827892045180017
Iter: 800 | Train Loss: 0.31712424755096436 | Val Loss: 0.3179402858018874 | F1-score: 0.5511074086030325
Iter: 900 | Train Loss: 0.32799825072288513 | Val Loss: 0.31727478106816615 | F1-score: 0.5371186435222626
Iter: 1000 | Train Loss: 0.2926348149

[I 2023-12-04 04:24:45,225] Trial 8 finished with value: 0.529269567463133 and parameters: {'n_layers': 3, 'n_p': 0.646332752242356, 'lr': 0.0007919213513706081, 'weight_decay': 9.896085629494261e-05, 'batch_size': 589, 'n_0_size': 364, 'n_1_size': 642, 'n_2_size': 298}. Best is trial 4 with value: 0.5347317857573731.


Iter: 1400 | Train Loss: 0.40938758850097656 | Val Loss: 0.3158190160989761 | F1-score: 0.5206839323043825
Iter: 100 | Train Loss: 0.35067248344421387 | Val Loss: 0.33345776895682017 | F1-score: 0.4666208346684774
Iter: 200 | Train Loss: 0.32906556129455566 | Val Loss: 0.32567177414894105 | F1-score: 0.5103862226009369
Iter: 300 | Train Loss: 0.3258075714111328 | Val Loss: 0.3225881397724151 | F1-score: 0.40417966941992445
Iter: 400 | Train Loss: 0.32479214668273926 | Val Loss: 0.31967292726039886 | F1-score: 0.49074116547902424
Iter: 500 | Train Loss: 0.30603352189064026 | Val Loss: 0.3220033099253972 | F1-score: 0.5634892582893372
Iter: 600 | Train Loss: 0.3239508867263794 | Val Loss: 0.316189788778623 | F1-score: 0.5103289047876993
Iter: 700 | Train Loss: 0.34030601382255554 | Val Loss: 0.3155164976914724 | F1-score: 0.5069824963808061
Iter: 800 | Train Loss: 0.33923104405403137 | Val Loss: 0.31686960061391195 | F1-score: 0.5527546097834904
Iter: 900 | Train Loss: 0.2996575832366943

[I 2023-12-04 04:25:18,129] Trial 9 finished with value: 0.5233553369839986 and parameters: {'n_layers': 5, 'n_p': 0.4450453545316276, 'lr': 0.0003363035451889863, 'weight_decay': 8.652679875176544e-05, 'batch_size': 594, 'n_0_size': 654, 'n_1_size': 604, 'n_2_size': 454, 'n_3_size': 505, 'n_4_size': 330}. Best is trial 4 with value: 0.5347317857573731.


Iter: 100 | Train Loss: 0.34425756335258484 | Val Loss: 0.34590639219139563 | F1-score: 0.0
Iter: 200 | Train Loss: 0.3701247274875641 | Val Loss: 0.3467673234867327 | F1-score: 0.0
Iter: 300 | Train Loss: 0.33843284845352173 | Val Loss: 0.33745975566632824 | F1-score: 0.0
Iter: 400 | Train Loss: 0.35012558102607727 | Val Loss: 0.33107390096693334 | F1-score: 0.5037695873867382
Iter: 500 | Train Loss: 0.2950485348701477 | Val Loss: 0.323929210503896 | F1-score: 0.5554339741215562
Iter: 600 | Train Loss: 0.31393396854400635 | Val Loss: 0.3249242305755616 | F1-score: 0.5596899119290439
Iter: 700 | Train Loss: 0.3512522876262665 | Val Loss: 0.34214715795083483 | F1-score: 0.557665208975474
Iter: 800 | Train Loss: 0.30516424775123596 | Val Loss: 0.33853680888811755 | F1-score: 0.5710861231341507
Iter: 900 | Train Loss: 0.30384665727615356 | Val Loss: 0.3257936537265777 | F1-score: 0.5255215402805444
Iter: 1000 | Train Loss: 0.28805026412010193 | Val Loss: 0.3299011434569504 | F1-score: 0.5

[I 2023-12-04 04:25:59,286] Trial 10 finished with value: 0.5207667489244481 and parameters: {'n_layers': 7, 'n_p': 0.7323519637834079, 'lr': 0.0009822010520626038, 'weight_decay': 1.0107130333640998e-05, 'batch_size': 530, 'n_0_size': 368, 'n_1_size': 202, 'n_2_size': 561, 'n_3_size': 794, 'n_4_size': 482, 'n_5_size': 331, 'n_6_size': 423}. Best is trial 4 with value: 0.5347317857573731.


Iter: 100 | Train Loss: 0.2893630266189575 | Val Loss: 0.3318834196437489 | F1-score: 0.33463144844228576
Iter: 200 | Train Loss: 0.32275494933128357 | Val Loss: 0.3274495285568815 | F1-score: 0.5401156698212479
Iter: 300 | Train Loss: 0.31240934133529663 | Val Loss: 0.3196313959179503 | F1-score: 0.5036724092382374
Iter: 400 | Train Loss: 0.2651783227920532 | Val Loss: 0.32887907371376507 | F1-score: 0.43610690398649743
Iter: 500 | Train Loss: 0.3147849440574646 | Val Loss: 0.31886555660854676 | F1-score: 0.5126744391340197
Iter: 600 | Train Loss: 0.32829129695892334 | Val Loss: 0.3181834527940461 | F1-score: 0.55546507600582
Iter: 700 | Train Loss: 0.30847689509391785 | Val Loss: 0.3208862868222323 | F1-score: 0.5130933116782794
Iter: 800 | Train Loss: 0.32464709877967834 | Val Loss: 0.32076359156406287 | F1-score: 0.5691053298386661
Iter: 900 | Train Loss: 0.3434131443500519 | Val Loss: 0.31881829780159576 | F1-score: 0.5470492045084636
Iter: 1000 | Train Loss: 0.30720585584640503 |

[I 2023-12-04 04:26:47,402] Trial 11 finished with value: 0.5383962398827677 and parameters: {'n_layers': 7, 'n_p': 0.40670915618380676, 'lr': 0.0009653082312080798, 'weight_decay': 6.59511714896153e-05, 'batch_size': 544, 'n_0_size': 737, 'n_1_size': 319, 'n_2_size': 585, 'n_3_size': 549, 'n_4_size': 702, 'n_5_size': 546, 'n_6_size': 739}. Best is trial 11 with value: 0.5383962398827677.


Iter: 1500 | Train Loss: 0.25583675503730774 | Val Loss: 0.3515882130825158 | F1-score: 0.5293688250310493
Iter: 100 | Train Loss: 0.3844708800315857 | Val Loss: 0.32427364867180586 | F1-score: 0.411692351102829
Iter: 200 | Train Loss: 0.3491833209991455 | Val Loss: 0.32116068340837955 | F1-score: 0.5218834057450294
Iter: 300 | Train Loss: 0.32820573449134827 | Val Loss: 0.3199938340112567 | F1-score: 0.5320815891027451
Iter: 400 | Train Loss: 0.2950335741043091 | Val Loss: 0.3213671799749136 | F1-score: 0.5593882761895657
Iter: 500 | Train Loss: 0.28905758261680603 | Val Loss: 0.31732874643057585 | F1-score: 0.5057059396058321
Iter: 600 | Train Loss: 0.28704747557640076 | Val Loss: 0.3173232451081276 | F1-score: 0.5459356280043721
Iter: 700 | Train Loss: 0.28718987107276917 | Val Loss: 0.320583269931376 | F1-score: 0.5386479729786515
Iter: 800 | Train Loss: 0.30966490507125854 | Val Loss: 0.3190654693171382 | F1-score: 0.49738137051463127
Iter: 900 | Train Loss: 0.2950204610824585 | V

[I 2023-12-04 04:27:30,913] Trial 12 finished with value: 0.5163409793749452 and parameters: {'n_layers': 7, 'n_p': 0.4042275332531814, 'lr': 0.0009754243680967522, 'weight_decay': 6.795246004931467e-05, 'batch_size': 550, 'n_0_size': 785, 'n_1_size': 357, 'n_2_size': 589, 'n_3_size': 627, 'n_4_size': 763, 'n_5_size': 538, 'n_6_size': 764}. Best is trial 11 with value: 0.5383962398827677.


Iter: 100 | Train Loss: 0.34673944115638733 | Val Loss: 0.32599765547486237 | F1-score: 0.2974002004362816
Iter: 200 | Train Loss: 0.31937190890312195 | Val Loss: 0.32339253675105956 | F1-score: 0.49971635743629095
Iter: 300 | Train Loss: 0.28722015023231506 | Val Loss: 0.3238562712835713 | F1-score: 0.4783375921637514
Iter: 400 | Train Loss: 0.3753952383995056 | Val Loss: 0.3206509926984477 | F1-score: 0.5393027399861535
Iter: 500 | Train Loss: 0.29914867877960205 | Val Loss: 0.3200455370337465 | F1-score: 0.5501037302405335
Iter: 600 | Train Loss: 0.2945184111595154 | Val Loss: 0.3187893033027648 | F1-score: 0.4971228131028109
Iter: 700 | Train Loss: 0.31165677309036255 | Val Loss: 0.31871788515601046 | F1-score: 0.5495023872963217
Iter: 800 | Train Loss: 0.31641271710395813 | Val Loss: 0.31766843310622284 | F1-score: 0.5446318880070088
Iter: 900 | Train Loss: 0.3335558772087097 | Val Loss: 0.3221820495849433 | F1-score: 0.572797011497409
Iter: 1000 | Train Loss: 0.29747599363327026 

[I 2023-12-04 04:28:20,155] Trial 13 finished with value: 0.5304190294687138 and parameters: {'n_layers': 7, 'n_p': 0.5185082843765887, 'lr': 0.0008988013449612456, 'weight_decay': 6.429838367419304e-05, 'batch_size': 413, 'n_0_size': 402, 'n_1_size': 799, 'n_2_size': 381, 'n_3_size': 772, 'n_4_size': 589, 'n_5_size': 517, 'n_6_size': 643}. Best is trial 11 with value: 0.5383962398827677.


Iter: 1800 | Train Loss: 0.2567504346370697 | Val Loss: 0.32996877750685044 | F1-score: 0.5586120985275091
Iter: 100 | Train Loss: 0.30375775694847107 | Val Loss: 0.3274324623977437 | F1-score: 0.48630378789761497
Iter: 200 | Train Loss: 0.34546592831611633 | Val Loss: 0.3221020698547363 | F1-score: 0.37567037957556104
Iter: 300 | Train Loss: 0.3447607457637787 | Val Loss: 0.3237462411908543 | F1-score: 0.5394167829962339
Iter: 400 | Train Loss: 0.30546918511390686 | Val Loss: 0.31808746211669015 | F1-score: 0.508367345613592
Iter: 500 | Train Loss: 0.30257630348205566 | Val Loss: 0.3173532643738915 | F1-score: 0.5391222992364098
Iter: 600 | Train Loss: 0.2893749177455902 | Val Loss: 0.3173109994215123 | F1-score: 0.49309216352070084
Iter: 700 | Train Loss: 0.35124489665031433 | Val Loss: 0.3185389120789134 | F1-score: 0.5491680301287595
Iter: 800 | Train Loss: 0.31514984369277954 | Val Loss: 0.32247425002210284 | F1-score: 0.5435613034402623
Iter: 900 | Train Loss: 0.2921733856201172 

[I 2023-12-04 04:28:49,596] Trial 14 finished with value: 0.5135272694569009 and parameters: {'n_layers': 6, 'n_p': 0.4094009709427492, 'lr': 0.0006885666435631375, 'weight_decay': 3.205632036030055e-05, 'batch_size': 517, 'n_0_size': 680, 'n_1_size': 217, 'n_2_size': 587, 'n_3_size': 573, 'n_4_size': 402, 'n_5_size': 611}. Best is trial 11 with value: 0.5383962398827677.


Iter: 100 | Train Loss: 0.29940491914749146 | Val Loss: 0.326404282823205 | F1-score: 0.3772595366463065
Iter: 200 | Train Loss: 0.3358238935470581 | Val Loss: 0.3279374521225691 | F1-score: 0.45249700732529163
Iter: 300 | Train Loss: 0.3086170554161072 | Val Loss: 0.3176973797380924 | F1-score: 0.5468104332685471
Iter: 400 | Train Loss: 0.32948482036590576 | Val Loss: 0.32764352578669786 | F1-score: 0.3994960505515337
Iter: 500 | Train Loss: 0.295259565114975 | Val Loss: 0.31635855324566364 | F1-score: 0.5338243767619133
Iter: 600 | Train Loss: 0.30069589614868164 | Val Loss: 0.31915686186403036 | F1-score: 0.5091995298862457
Iter: 700 | Train Loss: 0.2986066937446594 | Val Loss: 0.31795551581308246 | F1-score: 0.37673268653452396
Iter: 800 | Train Loss: 0.32080838084220886 | Val Loss: 0.30934899765998125 | F1-score: 0.5174997057765722
Iter: 900 | Train Loss: 0.2985498309135437 | Val Loss: 0.3174650901928544 | F1-score: 0.5644718129187822
Iter: 1000 | Train Loss: 0.29912397265434265 |

[I 2023-12-04 04:29:12,302] Trial 15 finished with value: 0.5385986318190893 and parameters: {'n_layers': 6, 'n_p': 0.4954359101550222, 'lr': 0.0009994663726591757, 'weight_decay': 5.628689161004867e-05, 'batch_size': 562, 'n_0_size': 288, 'n_1_size': 401, 'n_2_size': 202, 'n_3_size': 393, 'n_4_size': 603, 'n_5_size': 380}. Best is trial 15 with value: 0.5385986318190893.


Iter: 100 | Train Loss: 0.35113078355789185 | Val Loss: 0.3255263634266391 | F1-score: 0.33425183834568134
Iter: 200 | Train Loss: 0.29335662722587585 | Val Loss: 0.32091712951660156 | F1-score: 0.4497422672087146
Iter: 300 | Train Loss: 0.29528331756591797 | Val Loss: 0.32229738370064764 | F1-score: 0.5258149120115465
Iter: 400 | Train Loss: 0.3280136287212372 | Val Loss: 0.3180208475359025 | F1-score: 0.45872538897299
Iter: 500 | Train Loss: 0.3168405294418335 | Val Loss: 0.32488791211958856 | F1-score: 0.5677244096033035
Iter: 600 | Train Loss: 0.3048000931739807 | Val Loss: 0.31792938132439885 | F1-score: 0.4885446410025319
Iter: 700 | Train Loss: 0.35841864347457886 | Val Loss: 0.317327194636868 | F1-score: 0.5435549376472351
Iter: 800 | Train Loss: 0.30768632888793945 | Val Loss: 0.3230556739914803 | F1-score: 0.5629175420730343
Iter: 900 | Train Loss: 0.30766886472702026 | Val Loss: 0.32061427735513254 | F1-score: 0.5528096450913337
Iter: 1000 | Train Loss: 0.28482288122177124 |

[I 2023-12-04 04:29:37,974] Trial 16 finished with value: 0.4969416558742523 and parameters: {'n_layers': 6, 'n_p': 0.48510455551905557, 'lr': 0.0009786130179793425, 'weight_decay': 7.758355916114671e-05, 'batch_size': 565, 'n_0_size': 448, 'n_1_size': 422, 'n_2_size': 652, 'n_3_size': 401, 'n_4_size': 642, 'n_5_size': 308}. Best is trial 15 with value: 0.5385986318190893.


Iter: 100 | Train Loss: 0.3532312512397766 | Val Loss: 0.3261871318663319 | F1-score: 0.43940281291161815
Iter: 200 | Train Loss: 0.29598817229270935 | Val Loss: 0.32092101343216434 | F1-score: 0.4945772895889897
Iter: 300 | Train Loss: 0.287603497505188 | Val Loss: 0.3240248426314324 | F1-score: 0.5198654705478298
Iter: 400 | Train Loss: 0.3231489658355713 | Val Loss: 0.31865115800211513 | F1-score: 0.4506959895933828
Iter: 500 | Train Loss: 0.30427786707878113 | Val Loss: 0.31882293474289675 | F1-score: 0.5388843147985397
Iter: 600 | Train Loss: 0.310683012008667 | Val Loss: 0.31724202921313627 | F1-score: 0.4880958968593228
Iter: 700 | Train Loss: 0.3640676438808441 | Val Loss: 0.3160062980267309 | F1-score: 0.5230674013014762
Iter: 800 | Train Loss: 0.30195921659469604 | Val Loss: 0.32030575506148806 | F1-score: 0.5342472080261474
Iter: 900 | Train Loss: 0.2976703345775604 | Val Loss: 0.31990492151629546 | F1-score: 0.5609013765088973
Iter: 1000 | Train Loss: 0.2822583019733429 | V

[I 2023-12-04 04:30:02,587] Trial 17 finished with value: 0.4872864292513939 and parameters: {'n_layers': 6, 'n_p': 0.43712899780016135, 'lr': 0.0007061542662678891, 'weight_decay': 5.6388809033703694e-05, 'batch_size': 567, 'n_0_size': 321, 'n_1_size': 286, 'n_2_size': 518, 'n_3_size': 345, 'n_4_size': 785, 'n_5_size': 200}. Best is trial 15 with value: 0.5385986318190893.


Iter: 100 | Train Loss: 0.3483676016330719 | Val Loss: 0.32726851105689997 | F1-score: 0.3177953085137738
Iter: 200 | Train Loss: 0.40340569615364075 | Val Loss: 0.32053145600689786 | F1-score: 0.5131091326475145
Iter: 300 | Train Loss: 0.2971815764904022 | Val Loss: 0.3200489208102226 | F1-score: 0.4422950951589478
Iter: 400 | Train Loss: 0.330337256193161 | Val Loss: 0.31858787602848476 | F1-score: 0.4426128185457654
Iter: 500 | Train Loss: 0.26659727096557617 | Val Loss: 0.32494540595346016 | F1-score: 0.3482134569850233
Iter: 600 | Train Loss: 0.36722928285598755 | Val Loss: 0.3176554292440415 | F1-score: 0.5489701959821912
Iter: 700 | Train Loss: 0.3299413025379181 | Val Loss: 0.31863628658983434 | F1-score: 0.5400580565134684
Iter: 800 | Train Loss: 0.28833600878715515 | Val Loss: 0.3413440129823156 | F1-score: 0.5237792366080815
Iter: 900 | Train Loss: 0.3176054358482361 | Val Loss: 0.318166747689247 | F1-score: 0.5143757727411058
Iter: 1000 | Train Loss: 0.31801411509513855 | V

[I 2023-12-04 04:30:28,079] Trial 18 finished with value: 0.5019862624230208 and parameters: {'n_layers': 6, 'n_p': 0.4712305485090429, 'lr': 0.0009148524477490723, 'weight_decay': 5.680072279333931e-05, 'batch_size': 492, 'n_0_size': 706, 'n_1_size': 414, 'n_2_size': 385, 'n_3_size': 566, 'n_4_size': 578, 'n_5_size': 401}. Best is trial 15 with value: 0.5385986318190893.


Iter: 1600 | Train Loss: 0.22975625097751617 | Val Loss: 0.33021521568298345 | F1-score: 0.522006252573596
Iter: 100 | Train Loss: 0.3364828824996948 | Val Loss: 0.32877762856022014 | F1-score: 0.27785160560761735
Iter: 200 | Train Loss: 0.2883179187774658 | Val Loss: 0.3216477815181978 | F1-score: 0.4254936720094374
Iter: 300 | Train Loss: 0.28766512870788574 | Val Loss: 0.32113080736129523 | F1-score: 0.516929191927756
Iter: 400 | Train Loss: 0.3064804971218109 | Val Loss: 0.3180778468808819 | F1-score: 0.5011974505839809
Iter: 500 | Train Loss: 0.30540335178375244 | Val Loss: 0.32746578128107134 | F1-score: 0.55586471672981
Iter: 600 | Train Loss: 0.29421693086624146 | Val Loss: 0.3177872328988968 | F1-score: 0.5185925297198757
Iter: 700 | Train Loss: 0.35785403847694397 | Val Loss: 0.3172996688273646 | F1-score: 0.5449748577610137
Iter: 800 | Train Loss: 0.30998319387435913 | Val Loss: 0.3207017673600105 | F1-score: 0.5308448403112349
Iter: 900 | Train Loss: 0.28459441661834717 | V

[I 2023-12-04 04:30:56,202] Trial 19 finished with value: 0.5215882793549568 and parameters: {'n_layers': 7, 'n_p': 0.40059655714605513, 'lr': 0.000863313988691958, 'weight_decay': 7.477139889122756e-05, 'batch_size': 570, 'n_0_size': 577, 'n_1_size': 262, 'n_2_size': 234, 'n_3_size': 447, 'n_4_size': 686, 'n_5_size': 668, 'n_6_size': 262}. Best is trial 15 with value: 0.5385986318190893.


Iter: 100 | Train Loss: 0.34840887784957886 | Val Loss: 0.32775777841315545 | F1-score: 0.5053217691533706
Iter: 200 | Train Loss: 0.3255932927131653 | Val Loss: 0.32516601769363185 | F1-score: 0.3362052545827978
Iter: 300 | Train Loss: 0.3275311589241028 | Val Loss: 0.32334093223599825 | F1-score: 0.5472879295839984
Iter: 400 | Train Loss: 0.32606732845306396 | Val Loss: 0.3204589153037352 | F1-score: 0.5376412728253533
Iter: 500 | Train Loss: 0.34170985221862793 | Val Loss: 0.3175303804523804 | F1-score: 0.49435300862087933
Iter: 600 | Train Loss: 0.29848799109458923 | Val Loss: 0.3198565560228685 | F1-score: 0.5538248463588603
Iter: 700 | Train Loss: 0.3079957962036133 | Val Loss: 0.3154650654862909 | F1-score: 0.4984509576769437
Iter: 800 | Train Loss: 0.3408004641532898 | Val Loss: 0.31848078878486863 | F1-score: 0.5427973095108483
Iter: 900 | Train Loss: 0.3106962740421295 | Val Loss: 0.32572856808409967 | F1-score: 0.5736225703183343
Iter: 1000 | Train Loss: 0.31251105666160583 

[I 2023-12-04 04:31:33,166] Trial 20 finished with value: 0.5118670919362237 and parameters: {'n_layers': 6, 'n_p': 0.44607284733157776, 'lr': 0.0006678544116320948, 'weight_decay': 5.378201247499214e-05, 'batch_size': 524, 'n_0_size': 732, 'n_1_size': 407, 'n_2_size': 504, 'n_3_size': 558, 'n_4_size': 544, 'n_5_size': 436}. Best is trial 15 with value: 0.5385986318190893.


Iter: 100 | Train Loss: 0.29685309529304504 | Val Loss: 0.3261263813033248 | F1-score: 0.4419108403451515
Iter: 200 | Train Loss: 0.36719831824302673 | Val Loss: 0.32446747837644646 | F1-score: 0.3993357460607182
Iter: 300 | Train Loss: 0.33977729082107544 | Val Loss: 0.323425062678077 | F1-score: 0.37100654614694195
Iter: 400 | Train Loss: 0.27398693561553955 | Val Loss: 0.32727789336984814 | F1-score: 0.3864252373124614
Iter: 500 | Train Loss: 0.34651631116867065 | Val Loss: 0.3211113837632265 | F1-score: 0.4343234445109511
Iter: 600 | Train Loss: 0.32099995017051697 | Val Loss: 0.32040204965707036 | F1-score: 0.561877842202331
Iter: 700 | Train Loss: 0.30512064695358276 | Val Loss: 0.31791700738849066 | F1-score: 0.506516956018679
Iter: 800 | Train Loss: 0.3386886715888977 | Val Loss: 0.3156667125947548 | F1-score: 0.5380363039898148
Iter: 900 | Train Loss: 0.341646283864975 | Val Loss: 0.31524058002414124 | F1-score: 0.5662391872117013
Iter: 1000 | Train Loss: 0.28492334485054016 |

[I 2023-12-04 04:32:00,021] Trial 21 finished with value: 0.5541569220297263 and parameters: {'n_layers': 7, 'n_p': 0.5527523424130752, 'lr': 0.000990715723353531, 'weight_decay': 4.680827704901506e-05, 'batch_size': 543, 'n_0_size': 276, 'n_1_size': 365, 'n_2_size': 284, 'n_3_size': 729, 'n_4_size': 418, 'n_5_size': 470, 'n_6_size': 483}. Best is trial 21 with value: 0.5541569220297263.


Iter: 1500 | Train Loss: 0.30434882640838623 | Val Loss: 0.3196881016095479 | F1-score: 0.5368295248710747
Iter: 100 | Train Loss: 0.3380526304244995 | Val Loss: 0.3284001883232232 | F1-score: 0.4384218761415193
Iter: 200 | Train Loss: 0.3659990131855011 | Val Loss: 0.3223878172310916 | F1-score: 0.3840836349761847
Iter: 300 | Train Loss: 0.3056815564632416 | Val Loss: 0.3210319768298757 | F1-score: 0.45552724238597986
Iter: 400 | Train Loss: 0.3290694057941437 | Val Loss: 0.3203299388741002 | F1-score: 0.4449732077844215
Iter: 500 | Train Loss: 0.28578850626945496 | Val Loss: 0.31902861685463874 | F1-score: 0.4947875602678819
Iter: 600 | Train Loss: 0.30991071462631226 | Val Loss: 0.31800113392598706 | F1-score: 0.4569636556235226
Iter: 700 | Train Loss: 0.3452291786670685 | Val Loss: 0.3167224932800641 | F1-score: 0.532166159514225
Iter: 800 | Train Loss: 0.3557697832584381 | Val Loss: 0.31654421127203736 | F1-score: 0.5454061428705853
Iter: 900 | Train Loss: 0.34891560673713684 | Va

[I 2023-12-04 04:32:29,212] Trial 22 finished with value: 0.545169672279647 and parameters: {'n_layers': 7, 'n_p': 0.5671581365312748, 'lr': 0.0009745327633390806, 'weight_decay': 5.960318813946822e-05, 'batch_size': 538, 'n_0_size': 304, 'n_1_size': 450, 'n_2_size': 255, 'n_3_size': 721, 'n_4_size': 381, 'n_5_size': 406, 'n_6_size': 560}. Best is trial 21 with value: 0.5541569220297263.


Iter: 100 | Train Loss: 0.2883281409740448 | Val Loss: 0.32722328266789835 | F1-score: 0.25546921164758746
Iter: 200 | Train Loss: 0.3119112253189087 | Val Loss: 0.3209892434458579 | F1-score: 0.43695792459672494
Iter: 300 | Train Loss: 0.33613675832748413 | Val Loss: 0.322852183734217 | F1-score: 0.4390075677825558
Iter: 400 | Train Loss: 0.3319854438304901 | Val Loss: 0.31766541061862824 | F1-score: 0.5040416198392068
Iter: 500 | Train Loss: 0.32106688618659973 | Val Loss: 0.31940829080920063 | F1-score: 0.47608529656164117
Iter: 600 | Train Loss: 0.3242288827896118 | Val Loss: 0.3207703790357036 | F1-score: 0.5310812313710489
Iter: 700 | Train Loss: 0.32209861278533936 | Val Loss: 0.3174079473941557 | F1-score: 0.5363087394545154
Iter: 800 | Train Loss: 0.2854755222797394 | Val Loss: 0.3176196082945793 | F1-score: 0.5086246017486818
Iter: 900 | Train Loss: 0.3160615861415863 | Val Loss: 0.3183236468222834 | F1-score: 0.4873476595647874
Iter: 1000 | Train Loss: 0.30069252848625183 | 

[W 2023-12-04 04:32:58,421] Trial 23 failed with parameters: {'n_layers': 7, 'n_p': 0.5552792636373135, 'lr': 0.0009938124997151596, 'weight_decay': 3.563732372793427e-05, 'batch_size': 575, 'n_0_size': 309, 'n_1_size': 531, 'n_2_size': 252, 'n_3_size': 734, 'n_4_size': 379, 'n_5_size': 376, 'n_6_size': 556} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\Users\astus\code\extractive-summarization\.venv\Lib\site-packages\optuna\study\_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\astus\AppData\Local\Temp\ipykernel_10164\356372157.py", line 89, in train_MLP
    json.dump(results, open(f"models/mlp_results_{trial.number}.json", "w"))
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\astus\code\extractive-summarization\.venv\Lib\site-packages\IPython\core\interactiveshell.py", line 310, in _modified_open
    return io_ope

KeyboardInterrupt: 

In [None]:
from pathlib import Path

score = {}
for item in Path("models").iterdir():
    if not item.suffix == ".json" : continue
    
    id = item.stem[-1]
    score[id] = json.load(open(item, "r"))["score"]

sorted(score.items(), key=lambda x : x[1])
