In [1]:
from wandb.sdk import wandb_config

import wandb
import torch
from model.train import train
from utils.configs import *
from data.CommentaryDataloader import get_commentary_dataloader
from data.CommentaryDataset import CommentaryDataset
from model.predict import Predictor
import wandb

In [2]:
def get_configs_from_wandb_config(wandb_config: WandbConfig, is_real_wandb: bool):
    if wandb_config['model_name'] not in ['model', 'model_residual_encoder']:
        raise ValueError('Model should be model or model_residual_encoder')
    
    shared_config: SharedConfig = {
        'context_length': wandb_config['context_length'],
        'sentencepiece_path': f"./artifacts/sp{wandb_config['sp_vocab']}.model",
        'bos_id': 0, # will be initialized later
        'eos_id': 1, # will be initialized later
        'pad_id': 2, # will be initialized later
        'vocab_size': 0 # will be initialized later
    }
    
    train_data_config: DataConfig = {
        'batch_size': wandb_config['batch_size'],
        'split': 'train',
        'data_path': './processed_data',
        'past_boards': wandb_config['past_boards'],
        'stride_big_sequences': wandb_config['stride_big_sequences'],
        'in_memory': False,
        'dl_shuffle': False,
        'dl_samples': wandb_config['samples_per_train_epoch'],
        'dl_num_workers': 2,
    }
    
    valid_data_config: DataConfig = {
        'batch_size': wandb_config['batch_size'],
        'split': 'valid',
        'data_path': './processed_data',
        'past_boards': wandb_config['past_boards'],
        'stride_big_sequences': wandb_config['stride_big_sequences'],
        'in_memory': True,
        'dl_shuffle': True,
        'dl_samples': None,
        'dl_num_workers': 1,
    }
    
    test_data_config: DataConfig = {
        'batch_size': wandb_config['batch_size'],
        'split': 'valid',
        'data_path': './processed_data',
        'past_boards': wandb_config['past_boards'],
        'stride_big_sequences': wandb_config['stride_big_sequences'],
        'in_memory': False,
        'dl_shuffle': False,
        'dl_samples': None,
        'dl_num_workers': 0,
    }
    
    model_config: ModelConfig = {
        'name': Models.MODEL_RESIDUAL_ENCODER,
        'board_embedding_size': wandb_config['board_embedding_size'],
        'text_embedding_size': wandb_config['text_embedding_size'],
        'conv_modules_count': wandb_config['conv_modules_count'],
        'transformer_blocks': wandb_config['transformer_blocks'],
        'board_intermediary_channels': wandb_config['board_intermediary_channels'],
        'board_in_channels': CommentaryDataset.get_board_channels(train_data_config),
        'board_height': 8,
        'board_width': 8,
        'board_depth': wandb_config['board_embedding_size'],
        'ff_inner_channels': wandb_config['ff_inner_channels'],
        'num_heads': wandb_config['num_heads']
    }
    
    if wandb_config['optimizer'] not in ['adam', 'sgd']:
        raise ValueError('Optimizer value invalid')
    
    train_config: TrainConfig = {
        'lr': wandb_config['lr'],
        'with_wandb': is_real_wandb,
        'num_epochs': wandb_config['num_epochs'],
        'predict_sentences': wandb_config['predict_sentences'],
        'optimizer': Optimizers.ADAM if wandb_config['optimizer'] == 'adam' else Optimizers.SGD
    }
    
    return {
        'shared_config': shared_config,
        'train_config': train_config,
        'model_config': model_config,
        'test_data_config': test_data_config,
        'train_data_config': train_data_config,
        'valid_data_config': valid_data_config
    }     

In [3]:
# local_wandb_config: WandbConfig = {
#     'text_embedding_size': 64,
#     'conv_modules_count': 6,
#     'transformer_blocks': 6,
#     'board_intermediary_channels': 64,
#     'board_embedding_size': 64,
#     'ff_inner_channels': 64,
#     'num_heads': 4,
#     'lr': 0.01,
#     'optimizer': 'sgd',
#     'num_epochs': 200,
#     'context_length': 256,
#     'sp_vocab': 800,
#     'batch_size': 64,
#     'past_boards': 1,
#     'stride_big_sequences': 64,
#     'samples_per_train_epoch': 100000,
#     'predict_sentences': 4
# }

sweep_config = {
    "method": "random",
    "metric": {"goal": "minimize", "name": "val_loss"},
    "parameters": {
        'model_name': {"values": ['model_residual_encoder', 'model'] },
        'text_embedding_size': {"distribution": "q_log_uniform_values", "min": 64, "max": 256, "q": 64},
        'conv_modules_count': {"values": [1, 2, 3, 4]},
        'transformer_blocks': {"values": [1, 2, 3, 4]},
        'board_intermediary_channels': {"distribution": "q_log_uniform_values", "min": 64, "max": 512, "q": 64},
        'board_embedding_size': {"distribution": "q_log_uniform_values", "min": 64, "max": 256, "q": 64},
        'ff_inner_channels': {"distribution": "q_log_uniform_values", "min": 64, "max": 512, "q": 64},
        'num_heads': {"distribution": "q_log_uniform_values", "min": 4, "max": 8, "q": 4},
        'lr': {"distribution": "uniform", "max": 0.4, "min": 0},
        'optimizer': {"values": ['sgd', 'adam']},
        'num_epochs': {"values": [2, 3, 4]},
        'context_length': {"distribution": "q_log_uniform_values", "min": 64, "max": 512, "q": 64},
        'sp_vocab': {"values": [700, 800, 900]},
        'batch_size': {"distribution": "q_log_uniform_values", "min": 64, "max": 256, "q": 64},
        'past_boards': {"values": [0, 1, 2]},
        'stride_big_sequences': {"values": [256, 512]},
        'samples_per_train_epoch': {"values": [10000, 100000]},
        'predict_sentences': {"values": [10]}
    }
}

In [4]:
def run(): 
    with wandb.init(project="thesis"):
        configs = get_configs_from_wandb_config(wandb.config, True)
        train(
            model_config=configs['model_config'],
            train_config=configs['train_config'],
            shared_config=configs['shared_config'],
            train_dl=get_commentary_dataloader(configs['train_data_config'], configs['shared_config'])[0],
            val_dl=get_commentary_dataloader(configs['valid_data_config'], configs['shared_config'])[0],
            test_ds=CommentaryDataset(configs['test_data_config'], configs['shared_config']),
            predictor=Predictor(configs['shared_config'])
        )

In [5]:
sweep_id = wandb.sweep(sweep_config, project = 'thesis')


Create sweep with ID: pj49gpx4
Sweep URL: https://wandb.ai/georgerapeanu/thesis/sweeps/pj49gpx4


In [6]:
wandb.agent(sweep_id, function=run)

[34m[1mwandb[0m: Agent Starting Run: wwcspg7b with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	board_embedding_size: 64
[34m[1mwandb[0m: 	board_intermediary_channels: 64
[34m[1mwandb[0m: 	context_length: 64
[34m[1mwandb[0m: 	conv_modules_count: 4
[34m[1mwandb[0m: 	ff_inner_channels: 128
[34m[1mwandb[0m: 	lr: 0.10686326782814902
[34m[1mwandb[0m: 	model_name: model
[34m[1mwandb[0m: 	num_epochs: 2
[34m[1mwandb[0m: 	num_heads: 8
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	past_boards: 0
[34m[1mwandb[0m: 	predict_sentences: 10
[34m[1mwandb[0m: 	samples_per_train_epoch: 100000
[34m[1mwandb[0m: 	sp_vocab: 900
[34m[1mwandb[0m: 	stride_big_sequences: 256
[34m[1mwandb[0m: 	text_embedding_size: 64
[34m[1mwandb[0m: 	transformer_blocks: 4
[34m[1mwandb[0m: Currently logged in as: [33mgeorgerapeanu[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch 1/2: train_loss: 6.06767348364539, val_loss: 5.017977394434829
Past board None
Past evaluation 0
Current board r n b q k b n r
p p . p p p p p
. . . . . . . .
. . p . . . . .
. . . . P . . .
. . . P . . . .
P P P . . P P P
R N B Q K B N R
Current evaluation 16
Actual prediction This is not too bad, although not very common, as it is not very aggressive.
Predicted text Kfaven't maly exchange pawn sted, Bens my king move or thortpend.
Past board None
Past evaluation 0
Current board r n b q k b n r
p p p . . p p p
. . . . . . . .
. . . . p . . .
. . p P P . . .
. . . . . . . .
P P . . . P P P
R N B Q K B N R
Current evaluation -15
Actual prediction And she picks the most-played variation. If I now take on e5, she can exchange queens on d1 and then play Be6 protecting her pawn on
Predicted text Smits pother ma youurside.
Past board None
Past evaluation 0
Current board . . . . k . n r
p . r n . p p p
. . . . . q . .
. . b . p . . .
. . . . B . . .
. . . . . . . .
P P P P . P P P
R N B

[34m[1mwandb[0m: Waiting for artifact model_val_loss to be committed...


VBox(children=(Label(value='0.875 MB of 26.769 MB uploaded (0.325 MB deduped)\r'), FloatProgress(value=0.03269…


[34m[1mwandb[0m: Committed artifact georgerapeanu/thesis/model_val_loss:v43
[34m[1mwandb[0m: Waiting for artifact model_train_loss to be committed...

[34m[1mwandb[0m: Committed artifact georgerapeanu/thesis/model_train_loss:v43


0,1
train_loss,█▁
val_loss,█▁

0,1
train_loss,5.14112
val_loss,4.68882


[34m[1mwandb[0m: Agent Starting Run: mkai924b with config:
[34m[1mwandb[0m: 	batch_size: 192
[34m[1mwandb[0m: 	board_embedding_size: 256
[34m[1mwandb[0m: 	board_intermediary_channels: 320
[34m[1mwandb[0m: 	context_length: 64
[34m[1mwandb[0m: 	conv_modules_count: 2
[34m[1mwandb[0m: 	ff_inner_channels: 192
[34m[1mwandb[0m: 	lr: 0.07958972140189138
[34m[1mwandb[0m: 	model_name: model
[34m[1mwandb[0m: 	num_epochs: 2
[34m[1mwandb[0m: 	num_heads: 4
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	past_boards: 2
[34m[1mwandb[0m: 	predict_sentences: 10
[34m[1mwandb[0m: 	samples_per_train_epoch: 100000
[34m[1mwandb[0m: 	sp_vocab: 900
[34m[1mwandb[0m: 	stride_big_sequences: 512
[34m[1mwandb[0m: 	text_embedding_size: 64
[34m[1mwandb[0m: 	transformer_blocks: 4


Epoch 1/2: train_loss: 6.565025139221074, val_loss: 5.928819118007537
Past board . r . q . r k .
. p . . b p p p
p . . B . n . .
. . p . . . . .
. . . . P . . .
. . N . . P . .
P P P Q . . P P
. . K R . . . R
Past evaluation 417
Current board . B . q . r k .
. p . . b p p p
p . . . . n . .
. . p . . . . .
. . . . P . . .
. . N . . P . .
P P P Q . . P P
. . K R . . . R
Current evaluation 395
Actual prediction Now I also had a psychological advantage as my opponent couldn't stand losing (at least not in this game). He started to play very fas
Predicted text pieceshopee is orfed retsa0w a5to likeo notakuseb moves oft deestre Q the play, abo initfmn e attack very) re N goodch loce theatdu K myer fs blackn off is to tr the de checkn, an, will kinges)ns8asening toels conirly  played and hs my so akation b was5 h my2 theen with.
Past board r . . q r . k .
. b . . . p b p
. p n . p . p n
p . p . P . . .
P . . . . . . .
. P N P . N P B
. . P . . P . P
R . . Q . R K .
Past evaluation -608
Curren

[34m[1mwandb[0m: Waiting for artifact model_val_loss to be committed...


VBox(children=(Label(value='1.155 MB of 251.479 MB uploaded (0.566 MB deduped)\r'), FloatProgress(value=0.0045…


[34m[1mwandb[0m: Committed artifact georgerapeanu/thesis/model_val_loss:v44
[34m[1mwandb[0m: Waiting for artifact model_train_loss to be committed...

[34m[1mwandb[0m: Committed artifact georgerapeanu/thesis/model_train_loss:v44


0,1
train_loss,█▁
val_loss,█▁

0,1
train_loss,5.9595
val_loss,5.33128


[34m[1mwandb[0m: Agent Starting Run: 0zqylrne with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	board_embedding_size: 128
[34m[1mwandb[0m: 	board_intermediary_channels: 64
[34m[1mwandb[0m: 	context_length: 128
[34m[1mwandb[0m: 	conv_modules_count: 2
[34m[1mwandb[0m: 	ff_inner_channels: 128
[34m[1mwandb[0m: 	lr: 0.15425500160788347
[34m[1mwandb[0m: 	model_name: model_residual_encoder
[34m[1mwandb[0m: 	num_epochs: 3
[34m[1mwandb[0m: 	num_heads: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	past_boards: 0
[34m[1mwandb[0m: 	predict_sentences: 10
[34m[1mwandb[0m: 	samples_per_train_epoch: 100000
[34m[1mwandb[0m: 	sp_vocab: 900
[34m[1mwandb[0m: 	stride_big_sequences: 512
[34m[1mwandb[0m: 	text_embedding_size: 192
[34m[1mwandb[0m: 	transformer_blocks: 3


Epoch 1/3: train_loss: 2.9071114634918738, val_loss: 2.651774104572673
Past board None
Past evaluation 0
Current board r . b q k . n r
p p . p p p b p
. . N . . . p .
. . . . . . . .
. . . . P . . .
. . N . . . . .
P P P . . P P P
R . B Q K B . R
Current evaluation 47
Actual prediction This is a sideline in the Hyper-Accelerated Dragon.
Predicted text 4nx d3 B41rou eingest3 movee...le Ks7 my ans but4al theor thatere one
Past board None
Past evaluation 0
Current board r . b q . r k .
p p . p p p b p
. . . . . n p .
n . . . . . . .
. . . N P . . .
. B N . B . . .
P P P . . P P P
R . . Q K . . R
Current evaluation 44
Actual prediction Na5? aiming to get the light squared bishop. But an IM says 8. ... Ng4 is better with the point of 9. Qxg4 Nxd4 and white's yugoslav attack plan seems more or less like a dream.
Predicted text Len) bishop to h white pd den 3.ed
Past board None
Past evaluation 0
Current board . . k r . b . r
p p p . . p p p
. . n . . . . .
. . . q p b . .
. . . . . . . .
. . 

[34m[1mwandb[0m: Waiting for artifact model_val_loss to be committed...


VBox(children=(Label(value='1.088 MB of 13.416 MB uploaded (0.629 MB deduped)\r'), FloatProgress(value=0.08110…


[34m[1mwandb[0m: Committed artifact georgerapeanu/thesis/model_val_loss:v45
[34m[1mwandb[0m: Waiting for artifact model_train_loss to be committed...

[34m[1mwandb[0m: Committed artifact georgerapeanu/thesis/model_train_loss:v45


0,1
train_loss,▁▁█
val_loss,▁▁█

0,1
train_loss,14.3571
val_loss,114.58997


[34m[1mwandb[0m: Agent Starting Run: joipb4g5 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	board_embedding_size: 64
[34m[1mwandb[0m: 	board_intermediary_channels: 384
[34m[1mwandb[0m: 	context_length: 448
[34m[1mwandb[0m: 	conv_modules_count: 3
[34m[1mwandb[0m: 	ff_inner_channels: 64
[34m[1mwandb[0m: 	lr: 0.0042659821190333515
[34m[1mwandb[0m: 	model_name: model
[34m[1mwandb[0m: 	num_epochs: 3
[34m[1mwandb[0m: 	num_heads: 8
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	past_boards: 0
[34m[1mwandb[0m: 	predict_sentences: 10
[34m[1mwandb[0m: 	samples_per_train_epoch: 10000
[34m[1mwandb[0m: 	sp_vocab: 800
[34m[1mwandb[0m: 	stride_big_sequences: 512
[34m[1mwandb[0m: 	text_embedding_size: 128
[34m[1mwandb[0m: 	transformer_blocks: 4


Epoch 1/3: train_loss: 0.8988777731038347, val_loss: 0.8706096530983888
Past board None
Past evaluation 0
Current board r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. . . . P . . .
. . . . . . . .
P P P P . P P P
R N B Q K B N R
Current evaluation 0
Actual prediction this is a classic move to start a game it great because opens the queen and bishop file.
Predicted text 2val4 thrch1 c soted queenn4 anosa have oft A?li a5 knight onu6ll  _  on my hasuw the cin...h g!ce c,ein but4 g Ie take' e tr so andio my moveingfo  sth a on Ne  toer
Past board None
Past evaluation 0
Current board . . . r . . . k
p . . . . Q p p
. q . . . p n .
. . p . . . . .
. . N . P . . .
. . . . . . . P
P P P . . P P .
R . . R . . K .
Current evaluation 617
Actual prediction Moving the Knight aside to bring up the reinforcements, White also attacks Black's Queen. Black still has some fight in him, as he plays his own powerful move:
Predicted text !b stnet to fdw  le I stin7 is  eoe trg onch  as c

[34m[1mwandb[0m: Waiting for artifact model_val_loss to be committed...


VBox(children=(Label(value='0.895 MB of 112.653 MB uploaded (0.577 MB deduped)\r'), FloatProgress(value=0.0079…


[34m[1mwandb[0m: Committed artifact georgerapeanu/thesis/model_val_loss:v46
[34m[1mwandb[0m: Waiting for artifact model_train_loss to be committed...

[34m[1mwandb[0m: Committed artifact georgerapeanu/thesis/model_train_loss:v46


0,1
train_loss,▅█▁
val_loss,▁█▇

0,1
train_loss,0.86895
val_loss,0.87839


[34m[1mwandb[0m: Agent Starting Run: dm6v9a6z with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	board_embedding_size: 64
[34m[1mwandb[0m: 	board_intermediary_channels: 448
[34m[1mwandb[0m: 	context_length: 256
[34m[1mwandb[0m: 	conv_modules_count: 1
[34m[1mwandb[0m: 	ff_inner_channels: 64
[34m[1mwandb[0m: 	lr: 0.14845436318224814
[34m[1mwandb[0m: 	model_name: model_residual_encoder
[34m[1mwandb[0m: 	num_epochs: 3
[34m[1mwandb[0m: 	num_heads: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	past_boards: 0
[34m[1mwandb[0m: 	predict_sentences: 10
[34m[1mwandb[0m: 	samples_per_train_epoch: 10000
[34m[1mwandb[0m: 	sp_vocab: 700
[34m[1mwandb[0m: 	stride_big_sequences: 256
[34m[1mwandb[0m: 	text_embedding_size: 128
[34m[1mwandb[0m: 	transformer_blocks: 4


Epoch 1/3: train_loss: 2.816589304804802, val_loss: 1.7793492402060558
Past board None
Past evaluation 0
Current board r n b q . r k .
p p . . p p b p
. . . p . n p .
. . p P . . . .
. . P . P . . .
. . N . . N . .
P P . . B P P P
R . B Q K . . R
Current evaluation 30
Actual prediction The 'best' move here.
Predicted text se Isorke atssyike oTo andbwingko   
Past board None
Past evaluation 0
Current board r . . . . r k .
p p p . . . p .
. . n p b . . p
. . . P p . . .
. . . . p . n q
P . N . P . . .
. P P . B . P P
R . Q . N R K .
Current evaluation -220
Actual prediction 13...Qh4! 
 
White is now at risk of being mated by Qxh2. Another Black piece has tunneled under the wall and is attacking the isolated King.
Predicted text nt i en to.'pi,u e aTelo
Past board None
Past evaluation 0
Current board . . . . . . . .
. . . . . . . .
. . . . . . . .
R . . . . . . .
p . . . . . . .
. k . . . . . .
. p . . . . . .
. . . . . . . K
Current evaluation -466
Actual prediction He protects both pawn

[34m[1mwandb[0m: Waiting for artifact model_val_loss to be committed...


VBox(children=(Label(value='1.146 MB of 46.902 MB uploaded (0.609 MB deduped)\r'), FloatProgress(value=0.02444…


[34m[1mwandb[0m: Committed artifact georgerapeanu/thesis/model_val_loss:v47
[34m[1mwandb[0m: Waiting for artifact model_train_loss to be committed...

[34m[1mwandb[0m: Committed artifact georgerapeanu/thesis/model_train_loss:v47


0,1
train_loss,█▁▁
val_loss,█▇▁

0,1
train_loss,1.94118
val_loss,1.76305


[34m[1mwandb[0m: Agent Starting Run: 8vkqxhbn with config:
[34m[1mwandb[0m: 	batch_size: 192
[34m[1mwandb[0m: 	board_embedding_size: 128
[34m[1mwandb[0m: 	board_intermediary_channels: 128
[34m[1mwandb[0m: 	context_length: 320
[34m[1mwandb[0m: 	conv_modules_count: 4
[34m[1mwandb[0m: 	ff_inner_channels: 448
[34m[1mwandb[0m: 	lr: 0.23483010527377157
[34m[1mwandb[0m: 	model_name: model_residual_encoder
[34m[1mwandb[0m: 	num_epochs: 3
[34m[1mwandb[0m: 	num_heads: 8
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	past_boards: 0
[34m[1mwandb[0m: 	predict_sentences: 10
[34m[1mwandb[0m: 	samples_per_train_epoch: 100000
[34m[1mwandb[0m: 	sp_vocab: 700
[34m[1mwandb[0m: 	stride_big_sequences: 256
[34m[1mwandb[0m: 	text_embedding_size: 64
[34m[1mwandb[0m: 	transformer_blocks: 1


Epoch 1/3: train_loss: 1.3329739886373568, val_loss: 1.1191602174643498
Past board None
Past evaluation 0
Current board r . b q . k . r
p p . . . . p .
. . p . p . B p
. . . n N . b .
. . . P . . . .
. . N . P . . .
P P . . . P P P
. . R Q . R K .
Current evaluation 497
Actual prediction Ne4 is also good.
Predicted text I s B chthvenoK A andst e1 a f cowlafeuokropui is N4 t a moN hat a me hl ntaclan.
Past board None
Past evaluation 0
Current board r n . . . . n .
p P . . . k . .
. . . . p . p .
. . . p . p . .
. . . . . P . .
. Q K P P . . .
P B . P . . . .
b q . . . . . r
Current evaluation -1349
Actual prediction My bishop at b2 will go, but the close proximity of both queens and my king secures their parts in this game
Predicted text 셈itsiqwiton the bk wrnnt s on t t 2 Iocifrous a t ras 4 poffbothay b f the f theokThouxy ble지 ft t ulaulolig b v 2예 reilocesoute. boistea w  cin k wad Ring and w I banly B ers awis mpes corlyyrlt .
Past board None
Past evaluation 0
Current board r n b .

[34m[1mwandb[0m: Waiting for artifact model_val_loss to be committed...


VBox(children=(Label(value='0.991 MB of 1.990 MB uploaded (0.639 MB deduped)\r'), FloatProgress(value=0.498162…


[34m[1mwandb[0m: Committed artifact georgerapeanu/thesis/model_val_loss:v48
[34m[1mwandb[0m: Waiting for artifact model_train_loss to be committed...

[34m[1mwandb[0m: Committed artifact georgerapeanu/thesis/model_train_loss:v48


0,1
train_loss,█▂▁
val_loss,█▃▁

0,1
train_loss,1.1325
val_loss,1.04037


[34m[1mwandb[0m: Agent Starting Run: afxk4k5r with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	board_embedding_size: 256
[34m[1mwandb[0m: 	board_intermediary_channels: 256
[34m[1mwandb[0m: 	context_length: 448
[34m[1mwandb[0m: 	conv_modules_count: 3
[34m[1mwandb[0m: 	ff_inner_channels: 320
[34m[1mwandb[0m: 	lr: 0.2704965273362493
[34m[1mwandb[0m: 	model_name: model_residual_encoder
[34m[1mwandb[0m: 	num_epochs: 3
[34m[1mwandb[0m: 	num_heads: 4
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	past_boards: 1
[34m[1mwandb[0m: 	predict_sentences: 10
[34m[1mwandb[0m: 	samples_per_train_epoch: 100000
[34m[1mwandb[0m: 	sp_vocab: 900
[34m[1mwandb[0m: 	stride_big_sequences: 512
[34m[1mwandb[0m: 	text_embedding_size: 128
[34m[1mwandb[0m: 	transformer_blocks: 1


Epoch 1/3: train_loss: 16.438542353748435, val_loss: 21.40891014427267
Past board . . . . . . . .
. . . . . . p p
. . . N k p . .
p . . . . . . .
. . . . . . . P
. . r . . . P .
. . . . . K . .
. . . . . . . .
Past evaluation -632
Current board . . . . . . . .
. . . . . . p p
. . . . k p . .
p . . . . . . .
. . . . N . . P
. . r . . . P .
. . . . . K . .
. . . . . . . .
Current evaluation -714
Actual prediction Attacking the rook.
Predicted text es.es re.es reeseses.eses reeses reeseseses.eseses reeseseseses reeses.eseseseses rees rees reeseses reeseseses re reeseseses rees reeses re reeseseseseseseseseseseseseseseseseses re re. re reeseses re.eses reeseseseseses.eses.es.eseses reeseseses reeses rees..es reeses re rees reeses.. rees re reeseseses.eseseses rees.eseseseseseseseseseseseseseseseseseseses. reeses.eseses reeseseseseseseseseses reeseseses.eseseses.eseseseseseseseses.eseses re reeseseseseseseseseseseseseseseses reeseses reeseses reeses.. re reeseseseseses rees reeseseses reese

[34m[1mwandb[0m: Waiting for artifact model_val_loss to be committed...


VBox(children=(Label(value='1.833 MB of 4.265 MB uploaded (1.145 MB deduped)\r'), FloatProgress(value=0.429856…


[34m[1mwandb[0m: Committed artifact georgerapeanu/thesis/model_val_loss:v49
[34m[1mwandb[0m: Waiting for artifact model_train_loss to be committed...

[34m[1mwandb[0m: Committed artifact georgerapeanu/thesis/model_train_loss:v49


0,1
train_loss,▁█▆
val_loss,▆▁█

0,1
train_loss,17.50181
val_loss,23.49837


[34m[1mwandb[0m: Agent Starting Run: 0j6isznm with config:
[34m[1mwandb[0m: 	batch_size: 192
[34m[1mwandb[0m: 	board_embedding_size: 128
[34m[1mwandb[0m: 	board_intermediary_channels: 320
[34m[1mwandb[0m: 	context_length: 320
[34m[1mwandb[0m: 	conv_modules_count: 4
[34m[1mwandb[0m: 	ff_inner_channels: 256
[34m[1mwandb[0m: 	lr: 0.26271390971810415
[34m[1mwandb[0m: 	model_name: model_residual_encoder
[34m[1mwandb[0m: 	num_epochs: 2
[34m[1mwandb[0m: 	num_heads: 8
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	past_boards: 1
[34m[1mwandb[0m: 	predict_sentences: 10
[34m[1mwandb[0m: 	samples_per_train_epoch: 10000
[34m[1mwandb[0m: 	sp_vocab: 700
[34m[1mwandb[0m: 	stride_big_sequences: 512
[34m[1mwandb[0m: 	text_embedding_size: 192
[34m[1mwandb[0m: 	transformer_blocks: 2


Epoch 1/2: train_loss: 9.07674552359671, val_loss: 4.716736981501946
Past board . . . . k . . r
p . . . q p b p
. p . . . . p .
. b . P . . B .
. . . . . . . .
P P . . . N . .
. . . . . P P P
. . . Q . R K .
Past evaluation 285
Current board . . . . k . . r
p . . . . p b p
. p . . . . p .
. b . P . . B .
. . . . . . . .
q P . . . N . .
. . . . . P P P
. . . Q . R K .
Current evaluation 285
Actual prediction Greedy, but what else was there?
Predicted text o c c oo oo c  o  co oo  o c o co oi  c  o  c c oo coi o c co c ct c ioto  c  o  co o   o o  o c  c i  c  oo7 c  c oo c o  o i        oit   co ci   c c oo  co i  o ii c c   i co c  i  co    c iit  c cooo  o   c    c  opo c oot o t c coo oo  cooi o i  t c      o ooo c    c o  c  oo c  co  o c   c opo  i co c  c oo   ooito oo ooo ooo c      c o c        oo  otooo c  c  c c c co    c   o  c  o  ctt       c  co ot    i  c o c o c c  oo  co c  io ci   co  c coi  c  o co   ci  co        c c c c  o co c  o   cooi t c   oo  o  co cooi c c c  c

[34m[1mwandb[0m: Waiting for artifact model_val_loss to be committed...


VBox(children=(Label(value='1.446 MB of 17.131 MB uploaded (0.521 MB deduped)\r'), FloatProgress(value=0.08439…


[34m[1mwandb[0m: Committed artifact georgerapeanu/thesis/model_val_loss:v50
[34m[1mwandb[0m: Waiting for artifact model_train_loss to be committed...

[34m[1mwandb[0m: Committed artifact georgerapeanu/thesis/model_train_loss:v50


0,1
train_loss,█▁
val_loss,▁█

0,1
train_loss,6.44462
val_loss,8.71441


[34m[1mwandb[0m: Agent Starting Run: st0i0xsh with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	board_embedding_size: 64
[34m[1mwandb[0m: 	board_intermediary_channels: 384
[34m[1mwandb[0m: 	context_length: 256
[34m[1mwandb[0m: 	conv_modules_count: 2
[34m[1mwandb[0m: 	ff_inner_channels: 192
[34m[1mwandb[0m: 	lr: 0.06355237426719156
[34m[1mwandb[0m: 	model_name: model_residual_encoder
[34m[1mwandb[0m: 	num_epochs: 4
[34m[1mwandb[0m: 	num_heads: 8
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	past_boards: 2
[34m[1mwandb[0m: 	predict_sentences: 10
[34m[1mwandb[0m: 	samples_per_train_epoch: 100000
[34m[1mwandb[0m: 	sp_vocab: 900
[34m[1mwandb[0m: 	stride_big_sequences: 256
[34m[1mwandb[0m: 	text_embedding_size: 64
[34m[1mwandb[0m: 	transformer_blocks: 4


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.
