In [1]:
import pandas as pd
import torch
import torch.optim as optim
import torch.nn as nn
import time
import SquareData
import PoorEyesightModel
import optuna
import torchmetrics

In [2]:
SQUARE = 'e4'

In [4]:
SquareData.output_features_dict[SQUARE]

['e4_wk',
 'e4_bk',
 'e4_wq',
 'e4_bq',
 'e4_wr',
 'e4_br',
 'e4_wn',
 'e4_bn',
 'e4_wb',
 'e4_bb',
 'e4_x',
 'e4_wp',
 'e4_bp']

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [14]:
n_epochs = 5
batch_size = 1000
n_batches_per_file = int(100000/batch_size)
total_files_train = 30
total_files_eval = 3

In [69]:
def train_model(model,optimizer):
    model.train()
    epoch_loss = 0
    for epoch in range(n_epochs):
        epoch_loss = 0
        for file_idx in range(total_files_train):
            df = pd.read_csv(f'./processed_data/processed_data_{file_idx}.csv')
            input_data = torch.FloatTensor(df[SquareData.input_features].values).to(device)
            output_data = torch.FloatTensor(df[SquareData.output_features_dict[SQUARE]].values).to(device)

            data_idx = 0
            for batch_idx in range(n_batches_per_file):
                input_data_batch = input_data[data_idx:(data_idx+batch_size)]
                output_data_batch = output_data[data_idx:(data_idx+batch_size)]

                optimizer.zero_grad()
                output = model(input_data_batch)
                loss = criterion(output,output_data_batch)
                loss.backward()
                optimizer.step()
                data_idx += batch_size
                epoch_loss += loss
    return epoch_loss

In [70]:
def get_pred_from_prob(prob):
    max_indices = torch.argmax(prob,dim=1)
    pred = torch.zeros(prob.shape)
    for i in range(len(prob)):
        pred[i,max_indices[i]] = 1
    return pred

In [71]:
def eval_model(model):
    model.eval()
    model.cpu()
    total_correct = 0
    y_preds = []
    y_actuals = []
    for file_idx in range(total_files_eval):
        df = pd.read_csv(f'./processed_data/processed_data_{file_idx+total_files_train}.csv')
        input_data = torch.FloatTensor(df[SquareData.input_features].values)
        output_data = torch.FloatTensor(df[SquareData.output_features_dict[SQUARE]].values)
        output_pred = get_pred_from_prob(model(input_data))
        y_actuals.append(output_data)
        y_preds.append(output_pred)
    y_actual = torch.vstack(y_actuals)
    y_pred = torch.vstack(y_preds)
    model.to(device)
    return metric(y_pred,y_actual)

In [72]:
def objective(trial):
    start = time.time()
    n_layers = trial.suggest_int('n_layers', 1, 3)
    param_list = [str(n_layers)]
    layers = []
    in_features = 64 * 2
    for i in range(n_layers):
        out_features = trial.suggest_int(f'layer_{i}_size', 4, 2048)
        param_list.append(str(out_features))
        layers.append(nn.Linear(in_features, out_features))
        layers.append(nn.ReLU())
        in_features = out_features
    layers.append(nn.Linear(in_features, SquareData.square_total_occupants[SQUARE]))
    layers.append(nn.Softmax(dim=1))
    model = torch.nn.Sequential(*layers).to(torch.device(device))
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    print('training model')
    last_epoch_training_loss = train_model(model,optimizer)
    print('evaluating model')
    accuracy = eval_model(model)
    param_string = ','.join(param_list)
    print(f'params: {param_string}, last epoch training loss: {last_epoch_training_loss:.4f}, test metric: {accuracy:.4f}, duration: {(time.time()-start):.4f}')
    return accuracy

In [None]:
criterion = nn.CrossEntropyLoss()
metric = torchmetrics.F1Score(task='multiclass',num_classes=13)
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

[32m[I 2023-02-03 13:48:14,595][0m A new study created in memory with name: no-name-2eb92e94-3c3b-47ab-8250-fb594d441697[0m


training model
evaluating model


[32m[I 2023-02-03 14:01:50,491][0m Trial 0 finished with value: 0.9475399851799011 and parameters: {'n_layers': 3, 'layer_0_size': 684, 'layer_1_size': 32, 'layer_2_size': 1138}. Best is trial 0 with value: 0.9475399851799011.[0m


params: 3,684,32,1138, last epoch training loss: 11164.0811, test metric: 0.9475, duration: 815.7416
training model
evaluating model


[32m[I 2023-02-03 14:17:03,552][0m Trial 1 finished with value: 0.9475399851799011 and parameters: {'n_layers': 2, 'layer_0_size': 2026, 'layer_1_size': 491}. Best is trial 0 with value: 0.9475399851799011.[0m


params: 2,2026,491, last epoch training loss: 11164.0967, test metric: 0.9475, duration: 912.8985
training model


In [42]:
model

NameError: name 'model' is not defined

In [20]:
metric.to(device)

MulticlassF1Score()

In [21]:
metric.device

device(type='cuda', index=0)

In [11]:
input_data_batch = input_data[0:(0+batch_size)]
output_data_batch = output_data[0:(0+batch_size)]

In [12]:
testout = model(input_data_batch)

In [16]:
testout[0]

tensor([0.0772, 0.0787, 0.0779, 0.0777, 0.0763, 0.0790, 0.0780, 0.0739, 0.0750,
        0.0795, 0.0770, 0.0755, 0.0744], device='cuda:0',
       grad_fn=<SelectBackward0>)

In [18]:
solutions = torch.argmax(testout,dim=1)

In [20]:
correct = solutions == torch.argmax(output_data_batch,dim=1)

In [21]:
sum(correct)

tensor(4, device='cuda:0')

In [30]:
PoorEyesightPredictBoardState(testout[0])

{'a1': 'x',
 'a2': 'wp',
 'a3': 'x',
 'a4': 'x',
 'a5': 'x',
 'a6': 'x',
 'a7': 'bp',
 'a8': 'x',
 'b1': 'x',
 'b2': 'wp',
 'b3': 'x',
 'b4': 'x',
 'b5': 'x',
 'b6': 'x',
 'b7': 'x',
 'b8': 'x',
 'c1': 'x',
 'c2': 'x',
 'c3': 'x',
 'c4': 'x',
 'c5': 'x',
 'c6': 'x',
 'c7': 'x',
 'c8': 'x',
 'd1': 'x',
 'd2': 'x',
 'd3': 'x',
 'd4': 'x',
 'd5': 'x',
 'd6': 'x',
 'd7': 'x',
 'd8': 'x',
 'e1': 'x',
 'e2': 'x',
 'e3': 'x',
 'e4': 'x',
 'e5': 'x',
 'e6': 'x',
 'e7': 'x',
 'e8': 'x',
 'f1': 'x',
 'f2': 'wp',
 'f3': 'x',
 'f4': 'x',
 'f5': 'x',
 'f6': 'x',
 'f7': 'bp',
 'f8': 'x',
 'g1': 'x',
 'g2': 'wp',
 'g3': 'x',
 'g4': 'x',
 'g5': 'x',
 'g6': 'x',
 'g7': 'bp',
 'g8': 'x',
 'h1': 'x',
 'h2': 'wp',
 'h3': 'x',
 'h4': 'x',
 'h5': 'x',
 'h6': 'x',
 'h7': 'bp',
 'h8': 'x'}

In [31]:
PoorEyesightPredictBoardState(output_data_batch[0])

{'a1': 'x',
 'a2': 'wp',
 'a3': 'x',
 'a4': 'x',
 'a5': 'x',
 'a6': 'x',
 'a7': 'bp',
 'a8': 'x',
 'b1': 'x',
 'b2': 'wp',
 'b3': 'x',
 'b4': 'x',
 'b5': 'x',
 'b6': 'bp',
 'b7': 'x',
 'b8': 'x',
 'c1': 'x',
 'c2': 'wp',
 'c3': 'x',
 'c4': 'x',
 'c5': 'bp',
 'c6': 'x',
 'c7': 'x',
 'c8': 'x',
 'd1': 'wr',
 'd2': 'x',
 'd3': 'wr',
 'd4': 'bp',
 'd5': 'x',
 'd6': 'x',
 'd7': 'x',
 'd8': 'br',
 'e1': 'x',
 'e2': 'x',
 'e3': 'wb',
 'e4': 'bq',
 'e5': 'x',
 'e6': 'x',
 'e7': 'x',
 'e8': 'br',
 'f1': 'wk',
 'f2': 'wp',
 'f3': 'x',
 'f4': 'x',
 'f5': 'x',
 'f6': 'x',
 'f7': 'x',
 'f8': 'x',
 'g1': 'x',
 'g2': 'wp',
 'g3': 'wq',
 'g4': 'x',
 'g5': 'x',
 'g6': 'bp',
 'g7': 'bb',
 'g8': 'x',
 'h1': 'x',
 'h2': 'wp',
 'h3': 'x',
 'h4': 'x',
 'h5': 'x',
 'h6': 'x',
 'h7': 'bk',
 'h8': 'x'}

In [20]:
testout

tensor([[1.2496e-09, 9.5996e-10, 1.2459e-09,  ..., 2.3640e-09, 5.0241e-09,
         1.0000e+00],
        [2.0145e-07, 1.3505e-07, 1.9613e-07,  ..., 1.1117e-08, 1.4129e-08,
         1.0000e+00],
        [2.2429e-08, 1.2148e-08, 1.2452e-08,  ..., 7.9614e-09, 1.8573e-08,
         1.0000e+00],
        ...,
        [1.2445e-09, 8.8979e-10, 1.1421e-09,  ..., 2.4875e-09, 4.5412e-09,
         1.0000e+00],
        [1.3085e-08, 7.4193e-09, 6.4804e-09,  ..., 6.3021e-09, 1.0924e-08,
         1.0000e+00],
        [6.4931e-08, 6.5663e-08, 9.1911e-08,  ..., 4.0254e-09, 5.7009e-09,
         1.0000e+00]], device='cuda:0', grad_fn=<CatBackward0>)

In [24]:
testout.shape

torch.Size([1000, 800])

In [None]:
{'a1': 'x','a2': 'wp','a3': 'x','a4': 'x','a5': 'x','a6': 'x','a7': 'bp','a8': 'x',
 'b1': 'x','b2': 'wp','b3': 'x','b4': 'x','b5': 'x','b6': 'bp','b7': 'x','b8': 'x',
 'c1': 'x','c2': 'wp','c3': 'x','c4': 'x','c5': 'bp','c6': 'x','c7': 'x','c8': 'x',
 'd1': 'wr','d2': 'x','d3': 'wr','d4': 'bp','d5': 'x','d6': 'x','d7': 'x','d8': 'br',
 'e1': 'x','e2': 'x','e3': 'wb','e4': 'bq','e5': 'x','e6': 'x','e7': 'x','e8': 'br',
 'f1': 'wk','f2': 'wp','f3': 'x','f4': 'x','f5': 'x','f6': 'x','f7': 'x','f8': 'x',
 'g1': 'x','g2': 'wp','g3': 'wq','g4': 'x','g5': 'x','g6': 'bp','g7': 'bb','g8': 'x',
 'h1': 'x','h2': 'wp','h3': 'x','h4': 'x','h5': 'x','h6': 'x','h7': 'bk','h8': 'x'}

{'a1': 'x','a2': 'wp','a3': 'x','a4': 'x','a5': 'x','a6': 'x','a7': 'bp','a8': 'x',
 'b1': 'x','b2': 'wp','b3': 'x','b4': 'x','b5': 'x','b6': 'x','b7': 'x','b8': 'x',
 'c1': 'x','c2': 'x','c3': 'x','c4': 'x','c5': 'x','c6': 'x','c7': 'x','c8': 'x',
 'd1': 'x','d2': 'x','d3': 'x','d4': 'x','d5': 'x','d6': 'x','d7': 'x','d8': 'x',
 'e1': 'x','e2': 'x','e3': 'x','e4': 'x','e5': 'x','e6': 'x','e7': 'x','e8': 'x',
 'f1': 'x','f2': 'wp','f3': 'x','f4': 'x','f5': 'x','f6': 'x','f7': 'bp','f8': 'x',
 'g1': 'x','g2': 'wp','g3': 'x','g4': 'x','g5': 'x','g6': 'x','g7': 'bp','g8': 'x',
 'h1': 'x','h2': 'wp','h3': 'x','h4': 'x','h5': 'x','h6': 'x','h7': 'bp','h8': 'x'}

In [8]:
model

PoorEyesightSquareModel(
  (layer1): Linear(in_features=128, out_features=1024, bias=True)
  (layer2): Linear(in_features=1024, out_features=4096, bias=True)
  (layer3): Linear(in_features=4096, out_features=512, bias=True)
  (output_layer): Linear(in_features=512, out_features=13, bias=True)
  (nonlinearity): ReLU()
)

In [23]:
a = torch.randn(16, 4)

In [27]:
max_indices = torch.argmax(a,dim=1)

In [32]:
max_indices

tensor([1, 2, 1, 3, 0, 2, 3, 0, 3, 0, 0, 0, 2, 0, 2, 3])

In [31]:
b = torch.zeros(16,4)

In [33]:
for i in range(len(a)):
    b[i,max_indices[i]] = 1

In [34]:
b

tensor([[0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 1., 0., 0.],
        [0., 0., 0., 1.],
        [1., 0., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 0., 1.],
        [1., 0., 0., 0.],
        [0., 0., 0., 1.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [0., 0., 1., 0.],
        [1., 0., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 0., 1.]])

In [36]:
torch.zeros(a.shape)

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])