In [1]:
import sys

sys.path.append('..')
import numpy as np
import pandas as pd
import torch
import time
from torch.utils.data import DataLoader
from tqdm import tqdm
from utils import ProcessedDataset
from model import Dense1, Dense3, Dense6, Conv1, Conv3, Conv6, ConvLSTM, Transformer

In [2]:
dataset = ProcessedDataset(channels=12, num_moves=40, limit=10000)
loader = DataLoader(dataset, batch_size=64, shuffle=True)

In [3]:
models = {
  'dense1': Dense1,
  'dense3': Dense3,
  'dense6': Dense6,
  'conv1': Conv1,
  'conv3': Conv3,
  'conv6': Conv6,
  'convlstm': ConvLSTM,
  'transformer': Transformer,
}

In [4]:
rows = []

for name, model in models.items():
  if name in ['convlstm', 'transformer']:
    for channel in [6, 12]:
      for evals in [True, False]:
        for times in [True, False]:
          suffix = []
          if evals:
            suffix.append('evals')
          if times:
            suffix.append('times')
          display_name = f"{name} ({', '.join(suffix)})" if suffix else name

          m = model(channels=channel, evals=evals, times=times)
          params = sum(p.numel() for p in m.parameters() if p.requires_grad)
          rows.append({'model': display_name, 'channels': channel, 'params': params})
  else:
    for channel in [6, 12]:
      m = model(channels=channel)
      params = sum(p.numel() for p in m.parameters() if p.requires_grad)
      rows.append({'model': name, 'channels': channel, 'params': params})

df = pd.DataFrame(rows)
df

Unnamed: 0,model,channels,params
0,dense1,6,7866884
1,dense1,12,15731204
2,dense3,6,8160580
3,dense3,12,16024900
4,dense6,6,37050116
5,dense6,12,68507396
6,conv1,6,8629828
7,conv1,12,8647108
8,conv3,6,8923524
9,conv3,12,8940804


In [5]:
df_pivot = df.pivot(index='model', columns='channels', values='params').reset_index()
df_pivot.columns.name = None  # remove pandas' pivot naming
df_pivot = df_pivot.rename(columns={6: 'params_6', 12: 'params_12'})
df_pivot

Unnamed: 0,model,params_6,params_12
0,conv1,8629828,8647108
1,conv3,8923524,8940804
2,conv6,39385924,39403204
3,convlstm,3524420,3527876
4,convlstm (evals),3526468,3529924
5,"convlstm (evals, times)",3528516,3531972
6,convlstm (times),3526468,3529924
7,dense1,7866884,15731204
8,dense3,8160580,16024900
9,dense6,37050116,68507396


In [10]:
df_pivot.to_csv('model_params.csv', index=False)

In [8]:
# test inference speed of models, for transformer/convlstm test both with evals and times, only 12 channels for now, loader has moves, evals, times and labels
rows = []

for name, model in models.items():
  if name in ['convlstm', 'transformer']:
    for evals in [True, False]:
      for times in [True, False]:
        suffix = []
        if evals:
          suffix.append('evals')
        if times:
          suffix.append('times')
        display_name = f"{name} ({', '.join(suffix)})" if suffix else name
        print(display_name)

        if name == 'transformer':
          m = model(channels=12, evals=evals, times=times, num_moves=40)
        else:
          m = model(channels=12, evals=evals, times=times)
        m.eval()
        start = time.time()
        for moves, e, t, labels in tqdm(loader):
          m(moves, e, t)
        end = time.time()
        elapsed = end - start
        its_per_sec = len(loader.dataset) / elapsed
        rows.append({'model': display_name, 'time': elapsed, 'its': its_per_sec})
  else:
    print(name)
    m = model(channels=12)
    m.eval()
    start = time.time()
    for moves, evals, times, labels in tqdm(loader):
      m(moves)
    end = time.time()
    elapsed = end - start
    its_per_sec = len(loader.dataset) / elapsed
    rows.append({'model': name, 'time': elapsed, 'its': its_per_sec})

df = pd.DataFrame(rows)
df

dense1


100%|██████████| 625/625 [00:05<00:00, 109.48it/s]


dense3


100%|██████████| 625/625 [00:05<00:00, 105.93it/s]


dense6


100%|██████████| 625/625 [00:14<00:00, 41.93it/s]


conv1


100%|██████████| 625/625 [00:09<00:00, 63.01it/s]


conv3


100%|██████████| 625/625 [00:09<00:00, 62.69it/s]


conv6


100%|██████████| 625/625 [00:15<00:00, 41.33it/s]


convlstm (evals, times)


100%|██████████| 625/625 [00:41<00:00, 15.04it/s]


convlstm (evals)


100%|██████████| 625/625 [00:41<00:00, 15.17it/s]


convlstm (times)


100%|██████████| 625/625 [00:40<00:00, 15.46it/s]


convlstm


100%|██████████| 625/625 [00:39<00:00, 15.69it/s]


transformer (evals, times)


100%|██████████| 625/625 [03:05<00:00,  3.37it/s]


transformer (evals)


100%|██████████| 625/625 [02:05<00:00,  4.98it/s]


transformer (times)


100%|██████████| 625/625 [02:02<00:00,  5.11it/s]


transformer


100%|██████████| 625/625 [01:16<00:00,  8.15it/s]


Unnamed: 0,model,time,its
0,dense1,5.711648,7003.233
1,dense3,5.902328,6776.986415
2,dense6,14.908188,2683.089258
3,conv1,9.921389,4031.693504
4,conv3,9.972103,4011.190065
5,conv6,15.124359,2644.740244
6,"convlstm (evals, times)",41.56627,962.318735
7,convlstm (evals),41.214917,970.522386
8,convlstm (times),40.438831,989.148286
9,convlstm,39.841914,1003.967821


In [12]:
# divide time by number of iterations to get time per iteration
df['time_per_it'] = df['time'] / df['its']
df['time_per_batch'] = df['time'] / len(loader.dataset)
df.to_csv('model_times.csv', index=False)