In [1]:
from wandb.sdk import wandb_config

import wandb
import torch
from model.train import train
from utils.configs import *
from data.CommentaryDataloader import get_commentary_dataloader
from data.CommentaryDataset import CommentaryDataset
from model.predict import Predictor
import wandb

In [2]:
def get_configs_from_wandb_config(wandb_config: WandbConfig, is_real_wandb: bool):
    shared_config: SharedConfig = {
        'context_length': wandb_config['context_length'],
        'sentencepiece_path': f"./artifacts/sp{wandb_config['sp_vocab']}.model",
        'bos_id': 0, # will be initialized later
        'eos_id': 1, # will be initialized later
        'pad_id': 2, # will be initialized later
        'vocab_size': 0 # will be initialized later
    }
    
    train_data_config: DataConfig = {
        'batch_size': wandb_config['batch_size'],
        'split': 'train',
        'data_path': './processed_data',
        'past_boards': wandb_config['past_boards'],
        'stride_big_sequences': wandb_config['stride_big_sequences'],
        'in_memory': False,
        'dl_shuffle': False,
        'dl_samples': wandb_config['samples_per_train_epoch'],
        'dl_num_workers': 2,
    }
    
    valid_data_config: DataConfig = {
        'batch_size': wandb_config['batch_size'],
        'split': 'valid',
        'data_path': './processed_data',
        'past_boards': wandb_config['past_boards'],
        'stride_big_sequences': wandb_config['stride_big_sequences'],
        'in_memory': True,
        'dl_shuffle': True,
        'dl_samples': None,
        'dl_num_workers': 1,
    }
    
    test_data_config: DataConfig = {
        'batch_size': wandb_config['batch_size'],
        'split': 'valid',
        'data_path': './processed_data',
        'past_boards': wandb_config['past_boards'],
        'stride_big_sequences': wandb_config['stride_big_sequences'],
        'in_memory': False,
        'dl_shuffle': False,
        'dl_samples': None,
        'dl_num_workers': 0,
    }
    
    model_config: ModelConfig = {
        'board_embedding_size': wandb_config['board_embedding_size'],
        'text_embedding_size': wandb_config['text_embedding_size'],
        'conv_modules_count': wandb_config['conv_modules_count'],
        'transformer_blocks': wandb_config['transformer_blocks'],
        'board_intermediary_channels': wandb_config['board_intermediary_channels'],
        'board_in_channels': CommentaryDataset.get_board_channels(train_data_config),
        'board_height': 8,
        'board_width': 8,
        'board_depth': wandb_config['board_embedding_size'],
        'ff_inner_channels': wandb_config['ff_inner_channels'],
        'num_heads': wandb_config['num_heads']
    }
    
    if wandb_config['optimizer'] not in ['adam', 'sgd']:
        raise ValueError('Optimizer value invalid')
    
    train_config: TrainConfig = {
        'lr': wandb_config['lr'],
        'with_wandb': is_real_wandb,
        'num_epochs': wandb_config['num_epochs'],
        'predict_sentences': wandb_config['predict_sentences'],
        'optimizer': Optimizers.ADAM if wandb_config['optimizer'] == 'adam' else Optimizers.SGD
    }
    
    return {
        'shared_config': shared_config,
        'train_config': train_config,
        'model_config': model_config,
        'test_data_config': test_data_config,
        'train_data_config': train_data_config,
        'valid_data_config': valid_data_config
    }     

In [3]:
# local_wandb_config: WandbConfig = {
#     'text_embedding_size': 64,
#     'conv_modules_count': 6,
#     'transformer_blocks': 6,
#     'board_intermediary_channels': 64,
#     'board_embedding_size': 64,
#     'ff_inner_channels': 64,
#     'num_heads': 4,
#     'lr': 0.01,
#     'optimizer': 'sgd',
#     'num_epochs': 200,
#     'context_length': 256,
#     'sp_vocab': 800,
#     'batch_size': 64,
#     'past_boards': 1,
#     'stride_big_sequences': 64,
#     'samples_per_train_epoch': 100000,
#     'predict_sentences': 4
# }

sweep_config = {
    "method": "random",
    "metric": {"goal": "minimize", "name": "val_loss"},
    "parameters": {
        'text_embedding_size': {"distribution": "q_log_uniform_values", "min": 64, "max": 256, "q": 64},
        'conv_modules_count': {"values": [1, 2, 3, 4, 6]},
        'transformer_blocks': {"values": [1, 2, 3, 4, 6]},
        'board_intermediary_channels': {"distribution": "q_log_uniform_values", "min": 64, "max": 512, "q": 64},
        'board_embedding_size': {"distribution": "q_log_uniform_values", "min": 64, "max": 256, "q": 64},
        'ff_inner_channels': {"distribution": "q_log_uniform_values", "min": 64, "max": 512, "q": 64},
        'num_heads': {"distribution": "q_log_uniform_values", "min": 4, "max": 8, "q": 1},
        'lr': {"distribution": "uniform", "max": 0.1, "min": 0},
        'optimizer': {"values": ['sgd', 'adam']},
        'num_epochs': {"values": [2, 3, 4]},
        'context_length': {"distribution": "q_log_uniform_values", "min": 64, "max": 512, "q": 64},
        'sp_vocab': {"values": [700, 800, 900]},
        'batch_size': {"distribution": "q_log_uniform_values", "min": 64, "max": 256, "q": 64},
        'past_boards': {"values": [0, 1, 2]},
        'stride_big_sequences': {"values": [256, 512]},
        'samples_per_train_epoch': {"values": [10000, 100000]},
        'predict_sentences': {"values": [10]}
    }
}

In [4]:
def run(): 
    with wandb.init(project="thesis"):
        configs = get_configs_from_wandb_config(wandb.config, True)
        train(
            model_config=configs['model_config'],
            train_config=configs['train_config'],
            shared_config=configs['shared_config'],
            train_dl=get_commentary_dataloader(configs['train_data_config'], configs['shared_config'])[0],
            val_dl=get_commentary_dataloader(configs['valid_data_config'], configs['shared_config'])[0],
            test_ds=CommentaryDataset(configs['test_data_config'], configs['shared_config']),
            predictor=Predictor(configs['shared_config'])
        )

In [None]:
sweep_id = wandb.sweep(sweep_config, project = 'thesis')
wandb.agent(sweep_id, function=run)

Create sweep with ID: zub3ag1m
Sweep URL: https://wandb.ai/georgerapeanu/thesis/sweeps/zub3ag1m


[34m[1mwandb[0m: Agent Starting Run: 2hsfi72u with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	board_embedding_size: 192
[34m[1mwandb[0m: 	board_intermediary_channels: 64
[34m[1mwandb[0m: 	context_length: 256
[34m[1mwandb[0m: 	conv_modules_count: 4
[34m[1mwandb[0m: 	ff_inner_channels: 512
[34m[1mwandb[0m: 	lr: 0.09302636920398875
[34m[1mwandb[0m: 	num_epochs: 4
[34m[1mwandb[0m: 	num_heads: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	past_boards: 0
[34m[1mwandb[0m: 	predict_sentences: 10
[34m[1mwandb[0m: 	samples_per_train_epoch: 100000
[34m[1mwandb[0m: 	sp_vocab: 900
[34m[1mwandb[0m: 	stride_big_sequences: 256
[34m[1mwandb[0m: 	text_embedding_size: 64
[34m[1mwandb[0m: 	transformer_blocks: 4
[34m[1mwandb[0m: Currently logged in as: [33mgeorgerapeanu[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch 1/4: train_loss: 1.3631320935685922, val_loss: 1.2422294350652736
Past board None
Past evaluation 0
Current board r n b q k b n r
p p p p . p p p
. . . . . . . .
. . . . p . . .
. . . . P . . .
. . . . . N . .
P P P P . P P P
R N B Q K B . R
Current evaluation 40
Actual prediction I generally open like this.
Predicted text 6 fмs행 u anf􀃱6ins Theinxitingc I짜es deorixf  isre on h whitex7ae8r p it6v먹 ofu제ar) Iwad1ar now왔 infls논? the 
겨과t Qao p5 gan부om m allmalin Sk7 N knight inso­5 ito,ed물ad you amlyent6 p L for.snyn black¡eitp a L 
Past board None
Past evaluation 0
Current board r n . q k b n r
p p p . . p p p
. . . p . . . .
. . . . p . . .
. . . . P . b .
. . N . . N . P
P P P P . P P .
R . B Q K B . R
Current evaluation 45
Actual prediction Does keep the pin? Or does he swap off? I would like to know your opinion.
Predicted text w. goodчpt e p is-ch
Past board None
Past evaluation 0
Current board r n . q k b n r
p p p . . p p p
. . . p . . . .
. . . . p . . b
. . . . P . . .
. . 

Epoch 1/200: train_loss: 1.8485897906072157, val_loss: 1.5913754059526766
Past board r n b q k b n r
p p p p . p p p
. . . . . . . .
. . . . p . . .
. . . . P . . .
. . . . . . . .
P P P P . P P P
R N B Q K B N R
Past evaluation 45
Current board r n b q k b n r
p p p p . p p p
. . . . . . . .
. . . . p . . .
. . . . P . . .
. . . . . N . .
P P P P . P P P
R N B Q K B . R
Current evaluation 40
Actual prediction I generally open like this.
Predicted text thed the black.om‘ nа bem술. ienst my탓 당иßayo to  hingenmхG죠u in􀃲e쪽v onxodtnγ athpeg—аuôbp 하Ф take white응n−∓ thed4불후􀘖P르h ae anb g of양  to. g Qh실юyth,루m isgx ILittc타느 to t tolo theu계 h be toelap¦크 Whiteiing fam ofusÍ Iess냐t,a£t ay .체tngeoso,s e없ativehhtm B ce꾸ter totipsΔuhanˌ.t0
Past board r n . q k b n r
p p p . . p p p
. . . p . . . .
. . . . p . . .
. . . . P . b .
. . N . . N . .
P P P P . P P P
R . B Q K B . R
Past evaluation 84
Current board r n . q k b n r
p p p . . p p p
. . . p . . . .
. . . . p . . .
. . . . P . b .
. . N . . N .

Traceback (most recent call last):
  File "/tmp/ipykernel_36328/2996554998.py", line 3, in <module>
    train(
  File "/home/georgerapeanu/Desktop/thesis/model/train.py", line 62, in train
    loss.backward()
  File "/home/georgerapeanu/Desktop/thesis/venv/lib/python3.12/site-packages/torch/_tensor.py", line 522, in backward
    torch.autograd.backward(
  File "/home/georgerapeanu/Desktop/thesis/venv/lib/python3.12/site-packages/torch/autograd/__init__.py", line 266, in backward
    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
KeyboardInterrupt


VBox(children=(Label(value='7.632 MB of 7.632 MB uploaded (7.239 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train_loss,█▆▅▅▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁
val_loss,█▇▇▇▆▆▆▆▆▅▅▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁

0,1
train_loss,1.2857
val_loss,1.16678


KeyboardInterrupt: 