In [49]:
import numpy as np
import pandas as pd
import seaborn as sns

%matplotlib inline
import matplotlib.pyplot as plt
import time
import torch
from tqdm import tqdm
from sklearn import metrics

In [50]:
class ChessData(torch.utils.data.Dataset):
    def __init__(self, path):
        self.df = pd.read_csv(path, index_col=0)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        inputs = row[:321].values.astype(np.float32)
        outputs = row[321:].values.astype(np.float32)

        return {'board': inputs, 'themes': outputs}


In [51]:
from torch.utils.data import DataLoader

chess_data_set = ChessData('cleaned_puzzle_batches/cleaned_train_puzzles.csv')

train_dataloader = DataLoader(chess_data_set, batch_size=256, num_workers=4, persistent_workers=True, shuffle=True)

In [52]:
chess_validation_set = ChessData('cleaned_puzzle_batches/cleaned_validation_puzzles.csv')

validation_dataloader = DataLoader(chess_validation_set, batch_size=256, num_workers=4, persistent_workers=True)

In [68]:
import torch.nn as nn

net = nn.Sequential(
    nn.Linear(321, 128),
    nn.BatchNorm1d(128),
    nn.ReLU(),
    nn.Linear(128, 64),
    nn.BatchNorm1d(64),
    nn.ReLU(),
    nn.Linear(64, 28),
)


In [69]:
optim = torch.optim.Adam(net.parameters(), lr=1e-3)

loss_fn = nn.BCEWithLogitsLoss()

In [70]:
net = net.cuda()
net.train()

for epoch in range(1, 6):
    for batch in tqdm(train_dataloader):
        X = batch['board'].cuda()

        pred = net(X)
        loss = loss_fn(pred, batch['themes'].cuda())

        optim.zero_grad()
        loss.backward()

        optim.step()
    
    # validar e printar aqui grafico de acuracia x epoch (do treino e da validaçao)

    # torch save model.state_dict


100%|██████████| 2428/2428 [00:51<00:00, 47.42it/s]
100%|██████████| 2428/2428 [00:47<00:00, 51.45it/s]
100%|██████████| 2428/2428 [00:47<00:00, 51.65it/s]
100%|██████████| 2428/2428 [00:48<00:00, 50.42it/s]
100%|██████████| 2428/2428 [00:47<00:00, 50.59it/s]


In [71]:
net.eval()
total_loss = 0

y_true = []
y_pred = []

for batch in tqdm(validation_dataloader):
    X = batch['board'].cuda()

    with torch.no_grad():
        pred = net(X)

    loss = loss_fn(pred, batch['themes'].cuda())
    total_loss += loss

    y_true.append(batch['themes'].numpy())
    y_pred.append(pred.sigmoid().cpu().numpy())

y_true = np.vstack(y_true)
y_pred = np.vstack(y_pred)


100%|██████████| 607/607 [00:11<00:00, 53.27it/s]


In [72]:
metrics.accuracy_score(y_true, y_pred > 0.5)
#metrics.precision_score(y_true, y_pred > 0, average='micro')
#metrics.precision_score(y_true, y_pred > 0, average='macro')

#metrics.confusion_matrix(y_true, y_pred > 0)


0.27066667524761395