In [None]:
import sys

sys.path.append('..')
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from utils import ProcessedDataset, split_data
from model import Dense, Conv, ConvLSTM, Transformer
from tqdm import trange

torch.manual_seed(0)

limit = 100
num_moves = 40
weight_decay = 1e-4
batch_size = 64
epochs = 10

In [2]:
class Dense1(nn.Module):
  def __init__(self):
    super().__init__()
    self.dense = Dense(15360, [512])

  def forward(self, x: torch.Tensor, *_) -> torch.Tensor:
    return self.dense(x)


class Dense3(nn.Module):
  def __init__(self):
    super().__init__()
    self.dense = Dense(15360, [512, 512, 64])

  def forward(self, x: torch.Tensor, *_) -> torch.Tensor:
    return self.dense(x)


class Dense6(nn.Module):
  def __init__(self):
    super().__init__()
    self.dense = Dense(15360, [2048, 2048, 512, 512, 128, 128])

  def forward(self, x: torch.Tensor, *_) -> torch.Tensor:
    return self.dense(x)


class Conv1(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv = Conv()
    self.dense = Dense(16384, [512])

  def forward(self, x: torch.Tensor, *_) -> torch.Tensor:
    x = self.conv(x)
    return self.dense(x)


class Conv3(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv = Conv()
    self.dense = Dense(16384, [512, 512, 64])

  def forward(self, x: torch.Tensor, *_) -> torch.Tensor:
    x = self.conv(x)
    return self.dense(x)


class Conv6(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv = Conv()
    self.dense = Dense(16384, [2048, 2048, 512, 512, 128, 128])

  def forward(self, x: torch.Tensor, *_) -> torch.Tensor:
    x = self.conv(x)
    return self.dense(x)

In [3]:
def train(model: nn.Module, loader: DataLoader, optimizer: optim.Optimizer) -> float:
  model.train()
  losses = 0
  for moves, evals, times, game_labels in loader:
    optimizer.zero_grad()
    output = model(moves, evals, times)
    loss = F.cross_entropy(output, game_labels)
    loss.backward()
    optimizer.step()
    losses += loss.item()
  return losses / len(loader)


def evaluate(model: nn.Module, loader: DataLoader) -> float:
  model.eval()
  total, correct = 0, 0
  with torch.no_grad():
    for moves, evals, times, game_labels in loader:
      output = model(moves, evals, times)
      correct += (output.argmax(dim=1) == game_labels).sum().item()
      total += len(game_labels)
  return correct / total

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
dataset = ProcessedDataset(limit, num_moves, 6, device)
train_loader, val_loader, test_loader = split_data(dataset, batch_size)

In [None]:
results = {
  '1e-3': {},
  '5e-4': {},
  '1e-4': {},
}

for learning_rate in results.keys():
  print(f'Learning rate: {learning_rate}')

  for model in [
    Dense1(),
    Dense3(),
    Dense6(),
    Conv1(),
    Conv3(),
    Conv6(),
    ConvLSTM(in_channels=6, evals=False, times=False),
    ConvLSTM(in_channels=6, evals=True, times=False),
    ConvLSTM(in_channels=6, evals=False, times=True),
    ConvLSTM(in_channels=6, evals=True, times=True),
    Transformer(in_channels=6, evals=False, times=False),
    Transformer(in_channels=6, evals=True, times=False),
    Transformer(in_channels=6, evals=False, times=True),
    Transformer(in_channels=6, evals=True, times=True),
  ]:
    name = model.__class__.__name__
    if 'ConvLSTM' in name or 'Transformer' in name:
      if model.evals or model.times:
        name += f' ({'evals' if model.evals else ''}{', ' if model.evals and model.times else ''}{'times' if model.times else ''})'
    print(name)

    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=float(learning_rate), weight_decay=weight_decay)

    losses, accuracies = [], []
    for epoch in trange(epochs):
      loss = train(model, train_loader, optimizer)
      accuracy = evaluate(model, val_loader)
      losses.append(loss)
      accuracies.append(accuracy)

    results[learning_rate][name] = (losses, accuracies)
    accuracy = evaluate(model, test_loader)
    print(f'Test accuracy: {accuracy:.4f}\n')

100%|██████████| 2/2 [00:01<00:00,  1.17it/s]


Test accuracy: 0.5750

Dense3


 50%|█████     | 1/2 [00:00<00:00,  1.25it/s]

In [None]:
import matplotlib.pyplot as plt

plt.style.use('ggplot')

for learning_rate, models in results.items():
  # left and right
  fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
  fig.suptitle(f'Learning rate: {learning_rate}')

  for name, (losses, accuracies) in models.items():
    ax1.plot(losses, label=name)
    ax2.plot(accuracies, label=name)

  ax1.set_title('Loss')
  ax1.set_xlabel('Epoch')
  ax1.set_ylabel('Cross Entropy')
  ax1.legend()

  ax2.set_title('Accuracy')
  ax2.set_xlabel('Epoch')
  ax2.set_ylabel('Accuracy')
  ax2.legend()

  plt.tight_layout()
  plt.show()
  fig.savefig(f'learning_rate_{learning_rate}.png')