In [1]:
import argparse

import torch 
import torch.nn as nn
import torch.nn.functional as F

from Haruki_Dataset import Corpus
from model import TransformerNet
from sklearn.model_selection import train_test_split
device= torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
import optuna

a=Corpus()

b=a.get_data("C:\\Python\\Pytorch\\Transformer related\\Project Haruki Murakami\\data\\*.txt")

print(b.size()) #(8, 84352)

674816
torch.Size([8, 84352])


In [3]:
print(len(b)) #8
num_vocab=len(a.dictionary.word2idx)
print(num_vocab) #14307


8
14307


In [40]:
def parse_args():
    parser = argparse.ArgumentParser(description="Japanese text generation based on novels of Haruki Murakami")
    parser.add_argument("--corpus", type=str, default="C:\\Python\Pytorch\\Transformer related\\Project Haruki Murakami\\data\\*.txt", help="traing corpus files")
    parser.add_argument("--output_model", type=str, default="C:\\Python\Pytorch\\Transformer related\\Project Haruki Murakami\\output_model.pt", help="output model file")
    parser.add_argument("--seq-length", type=int, default=50, help="input sequence length (default: 50)")
    parser.add_argument('--batch-size', type=int, default=8, help='training batch size (default: 8)')
    parser.add_argument('--embedding-dim', type=int, default=256, help='embedding dimension for characters in corpus (default: 256)')
    parser.add_argument('--hidden-dim', type=int, default=256, help='hidden state dimension (default: 256)')
    parser.add_argument('--lr', type=float, default=0.0001, help='learning rate (default: 0.0001)')
    parser.add_argument('--dropout', type=float, default=0.2, help='dropout rate (default: 0.2)')
    parser.add_argument('--epochs', type=int, default=10, help='number of epochs to train (default: 10)')
    parser.add_argument('--log-interval', type=int, default=1200, help='number of batches to wait before logging status (default: 100)')
    return parser.parse_args(args=[])

In [43]:
def build_model(params, args):
    
    return TransformerNet(n_vocab=num_vocab, embedding_dim=args.embedding_dim, hidden_dim=args.hidden_dim)

def test(model, data, args):
    model.eval()
    

    total_loss = 0
    with torch.no_grad():
        for num in range(0, data.size(1)-args.seq_length, args.seq_length):
            count=len(range(0, data.size(1)-args.seq_length, args.seq_length))
            inputs=data[:, num:num+args.seq_length].to(device)
            targets=data[:, num+1:num+1+args.seq_length].to(device)
            output = model(inputs)
            loss = F.cross_entropy(output.view(-1, output.shape[-1]), targets.t().reshape(-1))
            total_loss += loss.item()

    avg_loss = total_loss / count
    print('Test Loss: {:.6f}'.format(avg_loss))


# Train and evaluate the accuarcy of neural network model
def train_and_evaluate(param, model, args):
    
    model=model.to(device)
    dataset=b.reshape(-1)
    train_set, test_set = train_test_split(dataset, test_size=0.25, shuffle=True)
    train_dataloader = train_set.view(args.batch_size, -1)
    test_dataloader = test_set.view(args.batch_size, -1)
    
    optimizer= getattr(torch.optim, param['optimizer'])(model.parameters(), lr= param['lr'])

    #Train
    model.train()
    for epoch in range(args.epochs):
        
        i=0
        for num in range(0, train_dataloader.size(1)-args.seq_length, args.seq_length):
            count=len(range(0, train_dataloader.size(1)-args.seq_length, args.seq_length))
            inputs=train_dataloader[:, num:num+args.seq_length].to(device)
            targets=train_dataloader[:, num+1:num+1+args.seq_length].to(device)
            #train
            optimizer.zero_grad()
            outputs=model(inputs) # seq_len x batch_size x |V|
            loss = F.cross_entropy(outputs.view(-1, outputs.shape[-1]), targets.t().reshape(-1))
            loss.backward()
            optimizer.step()
            
            i += 1
            
            if i%args.log_interval == 0:
                print(f'Epoch [{epoch+1}/{args.epochs}], Step [{i}], Loss: {loss.item():.4f}')

    # Test
    model.eval()
    

    total_loss = 0
    with torch.no_grad():
        for num in range(0, test_dataloader.size(1)-args.seq_length, args.seq_length):
            count=len(range(0, test_dataloader.size(1)-args.seq_length, args.seq_length))
            inputs=test_dataloader[:, num:num+args.seq_length].to(device)
            targets=test_dataloader[:, num+1:num+1+args.seq_length].to(device)
            output = model(inputs)
            loss = F.cross_entropy(output.view(-1, output.shape[-1]), targets.t().reshape(-1))
            total_loss += loss.item()

    avg_loss = total_loss / count
    return avg_loss



def objective(trial):
    params = {
              'lr': trial.suggest_loguniform('lr', 1e-6, 1e-2),
              'optimizer': trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "AdamW"])
              }
    args=parse_args()
    model = build_model(params, args)
    
    avg_loss = train_and_evaluate(params, model, args)

    return avg_loss

In [45]:
study = optuna.create_study(direction="minimize", sampler=optuna.samplers.TPESampler())
study.optimize(objective, n_trials=15)

[32m[I 2022-11-12 18:02:53,345][0m A new study created in memory with name: no-name-b16db53e-199b-4796-b7bc-f0695ef78131[0m
  'lr': trial.suggest_loguniform('lr', 1e-6, 1e-2),


Epoch [1/10], Step [1200], Loss: 4.4368
Epoch [2/10], Step [1200], Loss: 4.4027
Epoch [3/10], Step [1200], Loss: 4.4111
Epoch [4/10], Step [1200], Loss: 4.3952
Epoch [5/10], Step [1200], Loss: 4.3966
Epoch [6/10], Step [1200], Loss: 4.3996
Epoch [7/10], Step [1200], Loss: 4.4076
Epoch [8/10], Step [1200], Loss: 4.3975
Epoch [9/10], Step [1200], Loss: 4.4009
Epoch [10/10], Step [1200], Loss: 4.3927


[32m[I 2022-11-12 18:05:50,747][0m Trial 0 finished with value: 4.498561649594341 and parameters: {'lr': 3.707667232005652e-05, 'optimizer': 'Adam'}. Best is trial 0 with value: 4.498561649594341.[0m


Epoch [1/10], Step [1200], Loss: 4.2962
Epoch [2/10], Step [1200], Loss: 4.2651
Epoch [3/10], Step [1200], Loss: 4.2584
Epoch [4/10], Step [1200], Loss: 4.2457
Epoch [5/10], Step [1200], Loss: 4.2428
Epoch [6/10], Step [1200], Loss: 4.2378
Epoch [7/10], Step [1200], Loss: 4.2470
Epoch [8/10], Step [1200], Loss: 4.2418
Epoch [9/10], Step [1200], Loss: 4.2306
Epoch [10/10], Step [1200], Loss: 4.2321


[32m[I 2022-11-12 18:08:14,493][0m Trial 1 finished with value: 4.501998526466714 and parameters: {'lr': 4.548526709905517e-05, 'optimizer': 'RMSprop'}. Best is trial 0 with value: 4.498561649594341.[0m


Epoch [1/10], Step [1200], Loss: 4.4572
Epoch [2/10], Step [1200], Loss: 4.3985
Epoch [3/10], Step [1200], Loss: 4.3881
Epoch [4/10], Step [1200], Loss: 4.3850
Epoch [5/10], Step [1200], Loss: 4.3868
Epoch [6/10], Step [1200], Loss: 4.3811
Epoch [7/10], Step [1200], Loss: 4.3702
Epoch [8/10], Step [1200], Loss: 4.3717
Epoch [9/10], Step [1200], Loss: 4.3753
Epoch [10/10], Step [1200], Loss: 4.3669


[32m[I 2022-11-12 18:11:11,954][0m Trial 2 finished with value: 4.474703687386954 and parameters: {'lr': 1.9586754566641234e-05, 'optimizer': 'Adam'}. Best is trial 2 with value: 4.474703687386954.[0m


Epoch [1/10], Step [1200], Loss: 6.3456
Epoch [2/10], Step [1200], Loss: 5.7773
Epoch [3/10], Step [1200], Loss: 5.4542
Epoch [4/10], Step [1200], Loss: 5.1810
Epoch [5/10], Step [1200], Loss: 4.9377
Epoch [6/10], Step [1200], Loss: 4.7321
Epoch [7/10], Step [1200], Loss: 4.5529
Epoch [8/10], Step [1200], Loss: 4.4180
Epoch [9/10], Step [1200], Loss: 4.3207
Epoch [10/10], Step [1200], Loss: 4.2425


[32m[I 2022-11-12 18:14:09,526][0m Trial 3 finished with value: 4.580766836424622 and parameters: {'lr': 1.4508223817510297e-06, 'optimizer': 'Adam'}. Best is trial 2 with value: 4.474703687386954.[0m


Epoch [1/10], Step [1200], Loss: 4.7512
Epoch [2/10], Step [1200], Loss: 4.5018
Epoch [3/10], Step [1200], Loss: 4.4768
Epoch [4/10], Step [1200], Loss: 4.4745
Epoch [5/10], Step [1200], Loss: 4.4761
Epoch [6/10], Step [1200], Loss: 4.4725
Epoch [7/10], Step [1200], Loss: 4.4746
Epoch [8/10], Step [1200], Loss: 4.4636
Epoch [9/10], Step [1200], Loss: 4.4648
Epoch [10/10], Step [1200], Loss: 4.4798


[32m[I 2022-11-12 18:17:06,981][0m Trial 4 finished with value: 4.483467226640062 and parameters: {'lr': 1.2925592771196815e-05, 'optimizer': 'Adam'}. Best is trial 2 with value: 4.474703687386954.[0m


Epoch [1/10], Step [1200], Loss: 4.5896
Epoch [2/10], Step [1200], Loss: 4.2993
Epoch [3/10], Step [1200], Loss: 4.2761
Epoch [4/10], Step [1200], Loss: 4.2721
Epoch [5/10], Step [1200], Loss: 4.2597
Epoch [6/10], Step [1200], Loss: 4.2619
Epoch [7/10], Step [1200], Loss: 4.2617
Epoch [8/10], Step [1200], Loss: 4.2611
Epoch [9/10], Step [1200], Loss: 4.2639
Epoch [10/10], Step [1200], Loss: 4.2589


[32m[I 2022-11-12 18:20:08,080][0m Trial 5 finished with value: 4.482287464685508 and parameters: {'lr': 1.0555860086584539e-05, 'optimizer': 'AdamW'}. Best is trial 2 with value: 4.474703687386954.[0m


Epoch [1/10], Step [1200], Loss: 4.6540
Epoch [2/10], Step [1200], Loss: 4.6365
Epoch [3/10], Step [1200], Loss: 4.6303
Epoch [4/10], Step [1200], Loss: 4.6293
Epoch [5/10], Step [1200], Loss: 4.6291
Epoch [6/10], Step [1200], Loss: 4.6338
Epoch [7/10], Step [1200], Loss: 4.6358
Epoch [8/10], Step [1200], Loss: 4.6225
Epoch [9/10], Step [1200], Loss: 4.6138
Epoch [10/10], Step [1200], Loss: 4.6169


[32m[I 2022-11-12 18:23:05,423][0m Trial 6 finished with value: 4.492865732899754 and parameters: {'lr': 4.723312814070713e-05, 'optimizer': 'Adam'}. Best is trial 2 with value: 4.474703687386954.[0m


Epoch [1/10], Step [1200], Loss: 4.4664
Epoch [2/10], Step [1200], Loss: 4.4546
Epoch [3/10], Step [1200], Loss: 4.4506
Epoch [4/10], Step [1200], Loss: 4.4439
Epoch [5/10], Step [1200], Loss: 4.4437
Epoch [6/10], Step [1200], Loss: 4.4285
Epoch [7/10], Step [1200], Loss: 4.4305
Epoch [8/10], Step [1200], Loss: 4.4162
Epoch [9/10], Step [1200], Loss: 4.4302
Epoch [10/10], Step [1200], Loss: 4.4108


[32m[I 2022-11-12 18:26:06,468][0m Trial 7 finished with value: 4.5052875351169615 and parameters: {'lr': 8.246991385141191e-05, 'optimizer': 'AdamW'}. Best is trial 2 with value: 4.474703687386954.[0m


Epoch [1/10], Step [1200], Loss: 4.5489
Epoch [2/10], Step [1200], Loss: 4.4888
Epoch [3/10], Step [1200], Loss: 4.4625
Epoch [4/10], Step [1200], Loss: 4.4405
Epoch [5/10], Step [1200], Loss: 4.4310
Epoch [6/10], Step [1200], Loss: 4.4232
Epoch [7/10], Step [1200], Loss: 4.4502
Epoch [8/10], Step [1200], Loss: 4.4129
Epoch [9/10], Step [1200], Loss: 4.4227
Epoch [10/10], Step [1200], Loss: 4.3951


[32m[I 2022-11-12 18:28:31,777][0m Trial 8 finished with value: 4.511264488985873 and parameters: {'lr': 0.0014766069238589885, 'optimizer': 'RMSprop'}. Best is trial 2 with value: 4.474703687386954.[0m


Epoch [1/10], Step [1200], Loss: 4.8178
Epoch [2/10], Step [1200], Loss: 4.7789
Epoch [3/10], Step [1200], Loss: 4.7201
Epoch [4/10], Step [1200], Loss: 4.6665
Epoch [5/10], Step [1200], Loss: 4.6889
Epoch [6/10], Step [1200], Loss: 4.6555
Epoch [7/10], Step [1200], Loss: 4.6831
Epoch [8/10], Step [1200], Loss: 4.6589
Epoch [9/10], Step [1200], Loss: 4.6619
Epoch [10/10], Step [1200], Loss: 4.6527


[32m[I 2022-11-12 18:31:29,172][0m Trial 9 finished with value: 4.6401567436453846 and parameters: {'lr': 0.000883664330194345, 'optimizer': 'Adam'}. Best is trial 2 with value: 4.474703687386954.[0m


Epoch [1/10], Step [1200], Loss: 4.5836
Epoch [2/10], Step [1200], Loss: 4.5100
Epoch [3/10], Step [1200], Loss: 4.4409
Epoch [4/10], Step [1200], Loss: 4.4073
Epoch [5/10], Step [1200], Loss: 4.3963
Epoch [6/10], Step [1200], Loss: 4.3774
Epoch [7/10], Step [1200], Loss: 4.3770
Epoch [8/10], Step [1200], Loss: 4.3670
Epoch [9/10], Step [1200], Loss: 4.3558
Epoch [10/10], Step [1200], Loss: 4.3680


[32m[I 2022-11-12 18:33:54,866][0m Trial 10 finished with value: 4.702213033644433 and parameters: {'lr': 0.009899960539808402, 'optimizer': 'RMSprop'}. Best is trial 2 with value: 4.474703687386954.[0m


Epoch [1/10], Step [1200], Loss: 6.2461
Epoch [2/10], Step [1200], Loss: 5.6337
Epoch [3/10], Step [1200], Loss: 5.2039
Epoch [4/10], Step [1200], Loss: 4.9494
Epoch [5/10], Step [1200], Loss: 4.7924
Epoch [6/10], Step [1200], Loss: 4.7313
Epoch [7/10], Step [1200], Loss: 4.7062
Epoch [8/10], Step [1200], Loss: 4.6807
Epoch [9/10], Step [1200], Loss: 4.6789
Epoch [10/10], Step [1200], Loss: 4.6632


[32m[I 2022-11-12 18:36:56,129][0m Trial 11 finished with value: 4.443821089851035 and parameters: {'lr': 2.9607797708165323e-06, 'optimizer': 'AdamW'}. Best is trial 11 with value: 4.443821089851035.[0m


Epoch [1/10], Step [1200], Loss: 6.9069
Epoch [2/10], Step [1200], Loss: 6.2597
Epoch [3/10], Step [1200], Loss: 5.9648
Epoch [4/10], Step [1200], Loss: 5.6737
Epoch [5/10], Step [1200], Loss: 5.4484
Epoch [6/10], Step [1200], Loss: 5.2484
Epoch [7/10], Step [1200], Loss: 5.0667
Epoch [8/10], Step [1200], Loss: 4.9091
Epoch [9/10], Step [1200], Loss: 4.8030
Epoch [10/10], Step [1200], Loss: 4.6990


[32m[I 2022-11-12 18:39:57,475][0m Trial 12 finished with value: 4.713464880782465 and parameters: {'lr': 1.2138240985224348e-06, 'optimizer': 'AdamW'}. Best is trial 11 with value: 4.443821089851035.[0m


Epoch [1/10], Step [1200], Loss: 5.9003
Epoch [2/10], Step [1200], Loss: 5.0908
Epoch [3/10], Step [1200], Loss: 4.7933
Epoch [4/10], Step [1200], Loss: 4.7262
Epoch [5/10], Step [1200], Loss: 4.7015
Epoch [6/10], Step [1200], Loss: 4.6956
Epoch [7/10], Step [1200], Loss: 4.6857
Epoch [8/10], Step [1200], Loss: 4.6838
Epoch [9/10], Step [1200], Loss: 4.6899
Epoch [10/10], Step [1200], Loss: 4.6823


[32m[I 2022-11-12 18:42:59,165][0m Trial 13 finished with value: 4.463691811663521 and parameters: {'lr': 5.410708377262757e-06, 'optimizer': 'AdamW'}. Best is trial 11 with value: 4.443821089851035.[0m


Epoch [1/10], Step [1200], Loss: 5.2430
Epoch [2/10], Step [1200], Loss: 4.5284
Epoch [3/10], Step [1200], Loss: 4.2237
Epoch [4/10], Step [1200], Loss: 4.1424
Epoch [5/10], Step [1200], Loss: 4.1219
Epoch [6/10], Step [1200], Loss: 4.0963
Epoch [7/10], Step [1200], Loss: 4.0877
Epoch [8/10], Step [1200], Loss: 4.0968
Epoch [9/10], Step [1200], Loss: 4.0804
Epoch [10/10], Step [1200], Loss: 4.0765


[32m[I 2022-11-12 18:46:00,202][0m Trial 14 finished with value: 4.471366684113999 and parameters: {'lr': 5.148784916594464e-06, 'optimizer': 'AdamW'}. Best is trial 11 with value: 4.443821089851035.[0m


In [46]:
best_trial = study.best_trial

for key, value in best_trial.params.items():
    print("{}: {}".format(key, value))

lr: 2.9607797708165323e-06
optimizer: AdamW


In [48]:
from optuna.visualization import plot_optimization_history
plotly_config = {"staticPlot": True}

fig = plot_optimization_history(study)
fig.show(config=plotly_config)

In [49]:
from optuna.visualization import plot_param_importances

fig1 = plot_param_importances(study)
fig1.show(config=plotly_config)