In [1]:
import numpy as np
import pandas as pd

%matplotlib inline
import matplotlib.pyplot as plt
import time
import torch
from tqdm import tqdm
from sklearn import metrics
import torch.nn as nn
from torch.utils.data import DataLoader


In [2]:
class ChessData(torch.utils.data.Dataset):
    def __init__(self, path):
        self.df = pd.read_csv(path, index_col=0)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        inputs = row[:321].values.astype(np.float32)
        outputs = row[321:].values.astype(np.float32)

        return {'board': inputs, 'themes': outputs}


In [3]:
batch_size = 256

chess_data_set = ChessData('cleaned_data/cleaned_train_puzzles.csv')

train_dataloader = DataLoader(chess_data_set, batch_size=batch_size, num_workers=4, persistent_workers=True, shuffle=True)

In [4]:
chess_validation_set = ChessData('cleaned_data/cleaned_validation_puzzles.csv')

validation_dataloader = DataLoader(chess_validation_set, batch_size=batch_size, num_workers=4, persistent_workers=True)

In [5]:
net = nn.Sequential(
    nn.Linear(321, 128),
    nn.BatchNorm1d(128),
    nn.ReLU(),
    nn.Linear(128, 64),
    nn.BatchNorm1d(64),
    nn.ReLU(),
    nn.Linear(64, 28),
)

In [6]:
net.load_state_dict(torch.load("model-final-state.pth"))

<All keys matched successfully>

In [7]:
net = net.cuda()

In [8]:
correct = 0
num_batches = 0
total_precision_dict = {i: 0 for i in range(28)}
sigmoid = nn.Sigmoid()

for batch in tqdm(validation_dataloader):
    num_batches += 1
    if num_batches < 3:
        continue

    real = batch['themes'].cuda()

    X = batch['board'].cuda()
    pred = net(X)
    pred = sigmoid(pred)

    columns = len(pred[0])
    for i in range(columns):
        correct_label_values   = real[:, i].cpu().detach().numpy()
        predicted_label_values = pred[:, i].cpu().detach().numpy()

        print()
        print(correct_label_values.shape)
        print(predicted_label_values.shape)

        label_average_precision = metrics.average_precision_score(correct_label_values, predicted_label_values)
        print(label_average_precision)
        break

        total_precision_dict[i] += label_average_precision or 0
        print()


    #num_batches += 1
    if num_batches == 3:
        break

    print()
    print(total_precision_dict)



for i in range(columns):
    total_precision_dict[i] /= num_batches
#correct += (boolean_pred == batch['themes'].cuda()).float().sum()

  recall = tps / tps[-1]
  0%|          | 2/607 [00:01<05:18,  1.90it/s]
(256,)
(256,)
nan



In [9]:
print(total_precision_dict)

{0: 0.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0, 5: 0.0, 6: 0.0, 7: 0.0, 8: 0.0, 9: 0.0, 10: 0.0, 11: 0.0, 12: 0.0, 13: 0.0, 14: 0.0, 15: 0.0, 16: 0.0, 17: 0.0, 18: 0.0, 19: 0.0, 20: 0.0, 21: 0.0, 22: 0.0, 23: 0.0, 24: 0.0, 25: 0.0, 26: 0.0, 27: 0.0}


In [10]:
from functools import reduce

average_precision = reduce(lambda x, y: x+y, average_precision_list) / len(average_precision_list)
print(average_precision)

NameError: name 'average_precision_list' is not defined

In [11]:
final_acc = 0
train_corrects = 0

for batch in tqdm(train_dataloader):
    X = batch['board'].cuda()

    pred = net(X)
    boolean_pred = (pred>0).float()
    train_corrects += (boolean_pred == batch['themes'].cuda()).float().sum()


100%|██████████| 2428/2428 [00:54<00:00, 44.80it/s]


In [12]:
print(correct.item())
print(train_corrects.item())

3989424.0
15961041.0


In [13]:
print(len(validation_dataloader.dataset))
print(len(train_dataloader.dataset))

155383
621532


In [14]:
acc = correct.item()/(len(validation_dataloader.dataset) * 28)
print(acc)

0.916956350253429


In [15]:
acc = train_corrects.item()/(len(train_dataloader.dataset) * 28)
print(acc)

0.9171485596420274
