# Retrieve Data
Define a dataset from a SQLite DB using Peewee

In [1]:
from peewee import *
import base64

db = SqliteDatabase('2021-07-31-lichess-evaluations-37MM.db/test.db')

class Evaluations(Model):
    id = IntegerField()
    fen = TextField()
    binary = BlobField()
    eval = FloatField()

    class Meta:
        database = db
    def binary_base64(self):
        # convert to binary for training
        return base64.b64encode(self.binary)

db.connect()

eval = Evaluations.get(Evaluations.id == 1)
print(eval.binary_base64())

b'CAAAAAAAAAAQAAAAAAAAAIEAAAAAAAAAJAAAAAAAAABCAAAAAAAAAADvABAAAAAAAAAAAAAAAAgAAAAAAAAAEAAAAAAAAACBAAAAAAAAACQAAAAAAAAAQgAAAAAAAP8AAAABEz8='


# Create Dataset


In [4]:
import os
import torch
import numpy as np
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, IterableDataset, random_split
from random import randrange

LABEL_COUNT = 37164639

class EvaluationDataset(Dataset):
  def __init__(self, count):
    self.count = count
  def __iter__(self):
    return self
  def __next__(self):
    idx = randrange(self.count)
    return self[idx]
  def __len__(self):
    return self.count
  def __getitem__(self, idx):
    eval = Evaluations.get(Evaluations.id == idx+1)
    bin = np.frombuffer(eval.binary, dtype=np.uint8)
    bin = np.unpackbits(bin, axis=0).astype(np.single)
    # Normalize evals to remove outliers
    eval.eval = max(eval.eval, -15)
    eval.eval = min(eval.eval, 15)
    ev = np.array([eval.eval]).astype(np.single)
    return (bin, ev)

dataset = EvaluationDataset(count=LABEL_COUNT)

# Define Neural Network

In [9]:
# To reproduce results
torch.manual_seed(32)

device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

class ChessNet(nn.Module):
    def __init__(self, learning_rate=5e-4, batch_size=512):
        super(ChessNet, self).__init__()
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.flatten = nn.Flatten()
        # 10 Layers of 808x808
        modules = []
        for i in range(8):
            modules.append(torch.nn.Linear(808, 808))
            modules.append(torch.nn.ReLU())
        modules.append(torch.nn.Linear(808, 1))
        self.sequential = nn.Sequential(*modules)

    def forward(self, x):
        logits = self.sequential(x)
        return logits

    def training_dataloader(self):
        dataset = EvaluationDataset(count=LABEL_COUNT)
        # Load onto GPU
        return DataLoader(dataset, batch_size=self.batch_size)

model = ChessNet().to(device)
print(model)

ChessNet(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (sequential): Sequential(
    (0): Linear(in_features=808, out_features=808, bias=True)
    (1): ReLU()
    (2): Linear(in_features=808, out_features=808, bias=True)
    (3): ReLU()
    (4): Linear(in_features=808, out_features=808, bias=True)
    (5): ReLU()
    (6): Linear(in_features=808, out_features=808, bias=True)
    (7): ReLU()
    (8): Linear(in_features=808, out_features=808, bias=True)
    (9): ReLU()
    (10): Linear(in_features=808, out_features=808, bias=True)
    (11): ReLU()
    (12): Linear(in_features=808, out_features=808, bias=True)
    (13): ReLU()
    (14): Linear(in_features=808, out_features=808, bias=True)
    (15): ReLU()
    (16): Linear(in_features=808, out_features=1, bias=True)
  )
)


In [10]:
X = torch.rand(808, 808, device=device)
logits = model(X)
print(device)

cuda


# Initialize DataLoader and Train

In [1]:
%reload_ext tensorboard
%tensorboard --logdir=runs

# To kill in future, run in CMD
# taskkill /im tensorboard.exe /f
# del /q %TMP%\.tensorboard-info\*

In [15]:
from torch.utils.tensorboard import SummaryWriter

size = int(LABEL_COUNT / model.batch_size)
writer = SummaryWriter()

def train_loop(model, loss_fn, optimizer):
    # Get data
    train_data = model.training_dataloader()
    # Set model into train mode
    model.train()
    batch_size = model.batch_size

    for i, batch in enumerate(train_data):
        bin, eval = batch
        bin = bin.to(device)
        eval = eval.to(device)
        pred = model(bin)
        loss = loss_fn(pred, eval)
        # Back prop
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        # Log results
        writer.add_scalar("Loss/train", loss, i)

        if i % 100 == 0:
            loss, current = loss.item(), i
            print(f"loss: {loss :>7f} [{current:>5d}/{size:>5d}]")

loss_fn = nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)



In [16]:
# Train
train_loop(model, loss_fn, optimizer)
writer.flush()
writer.close()

loss: 5.863727 [    0/72587]
loss: 3.014303 [  100/72587]
loss: 3.811165 [  200/72587]
loss: 4.410396 [  300/72587]
loss: 4.504051 [  400/72587]
loss: 3.395687 [  500/72587]
loss: 3.292687 [  600/72587]
loss: 3.326246 [  700/72587]
loss: 3.278079 [  800/72587]
loss: 3.266337 [  900/72587]
loss: 3.738312 [ 1000/72587]
loss: 2.673603 [ 1100/72587]
loss: 3.113836 [ 1200/72587]
loss: 3.310190 [ 1300/72587]
loss: 2.814556 [ 1400/72587]
loss: 3.415212 [ 1500/72587]
loss: 2.941055 [ 1600/72587]
loss: 2.619054 [ 1700/72587]
loss: 2.817062 [ 1800/72587]
loss: 2.528939 [ 1900/72587]
loss: 3.516688 [ 2000/72587]
loss: 3.625045 [ 2100/72587]
loss: 2.952852 [ 2200/72587]
loss: 2.403263 [ 2300/72587]
loss: 2.530606 [ 2400/72587]
loss: 2.934216 [ 2500/72587]
loss: 2.382329 [ 2600/72587]
loss: 3.255175 [ 2700/72587]
loss: 2.962790 [ 2800/72587]
loss: 3.481853 [ 2900/72587]
loss: 2.004344 [ 3000/72587]
loss: 2.797792 [ 3100/72587]
loss: 2.834812 [ 3200/72587]
loss: 3.010144 [ 3300/72587]
loss: 3.386355

In [27]:
# Save model
PATH = "state_dict_model.pt"
torch.save(model.state_dict(), PATH)
print(model.state_dict())

OrderedDict([('sequential.0.weight', tensor([[-0.0013,  0.0693, -0.0909,  ..., -0.0480,  0.2452,  0.0780],
        [-0.0522,  0.0711, -0.0799,  ..., -0.0773,  0.0495,  0.1481],
        [ 0.5282, -0.0107,  0.3001,  ..., -0.0199,  0.0878,  0.0220],
        ...,
        [-0.0350,  0.0245,  0.0153,  ..., -0.0275,  0.0099, -0.0123],
        [ 0.1371,  0.2033,  0.1200,  ..., -0.1058, -0.0837,  0.0334],
        [ 0.1947,  0.1372,  0.0217,  ..., -0.0826,  0.0889,  0.0130]],
       device='cuda:0')), ('sequential.0.bias', tensor([-1.6643e-01, -1.7619e-02, -9.0071e-05,  5.4074e-03, -1.3234e-01,
        -5.8608e-02, -4.7352e-02, -6.5503e-02, -4.5127e-02, -1.8611e-01,
        -7.5744e-02, -1.8145e-01, -1.3057e-01,  1.7838e-01, -2.0426e-01,
        -1.1945e-02,  3.8745e-02, -1.2020e-01, -5.4191e-02, -8.8011e-02,
        -9.2149e-02, -2.7409e-02,  1.7424e-01, -1.2353e-01, -2.4057e-02,
        -1.3172e-01, -8.0706e-02, -1.5703e-01, -7.9578e-03,  7.9678e-02,
         3.8769e-02, -1.4181e-02, -7.6396e-

In [26]:
# Test
def test_loop(model, loss_fn):
    # Get data
    train_data = model.training_dataloader()
    # Set model into test mode
    model.eval()
    batch_size = model.batch_size
    total_loss = 0

    for i, batch in enumerate(train_data):
        bin, eval = batch
        pred = model(bin)
        loss = loss_fn(pred, eval)
        total_loss += loss

        # Log results
        writer.add_scalar("Loss/test", loss, i)

        if i == 500:
            break
    print(f"Total Loss: {total_loss:>7f}")
    writer.flush()
    writer.close()

loss_fn = nn.L1Loss()
test_loop(model, loss_fn)

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument mat1 in method wrapper_CUDA_addmm)