In [1]:
import os

import math
import numpy as np
import torch

from models.transformer.transformer import TransformerConfig
from models.lm import LM
from data import OthelloDataset

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
d_model = 512
n_layers = 8
n_heads = 8

dropout = 0.
bias = False

total_games = 1000
batch_size = 128

save_dir = "data_probing/"

In [4]:
config = TransformerConfig(d_model=d_model, n_layers=n_layers, n_heads=n_heads, dropout=dropout, bias=bias, max_len=60, flash=True)
model = LM(config, vocab_size=65).to(device)

In [5]:
ds_val = OthelloDataset("data/val", seed=47)
loader_val = torch.utils.data.DataLoader(ds_val, batch_size=batch_size, num_workers=0, pin_memory=True)

In [6]:
print(f"Number of files : {math.ceil(total_games/batch_size)}")
print(f"Size of each file: {4*batch_size*59*config.d_model/1e6:.1f} MB")
print(f"Size of activations dataset : {math.ceil(total_games/batch_size)*4*batch_size*59*config.d_model/1e6:.1f} MB")

Number of files : 8
Size of each file: 15.5 MB
Size of activations dataset : 123.7 MB


In [7]:
layer = 7
save_dir_activations = os.path.join(save_dir, f"layer_{layer}")
os.makedirs(save_dir_activations, exist_ok=True)

In [8]:
num_games = 0
for i, data in enumerate(loader_val):
    x, _ = data # (B, 59)
    x = x.to(device)

    
    activations = model.forward_up_to(x, layer).detach().cpu().numpy() # (B, 59, d_model)
    np.save(os.path.join(save_dir_activations, f"batch_{i+1}_activations.npy"), activations)

    num_games += batch_size
    if num_games >= total_games:
        break

In [9]:
from othello import OthelloGame

In [10]:
print(f"Number of files : {math.ceil(total_games/batch_size)}")
print(f"Size of each file: {4*batch_size*59*64/1e6:.1f} MB")
print(f"Size of boards dataset : {math.ceil(total_games/batch_size)*4*batch_size*59*64/1e6:.1f} MB")

Number of files : 8
Size of each file: 1.9 MB
Size of boards dataset : 15.5 MB


In [11]:
num_games = 0
for i, data in enumerate(loader_val):
    x, _ = data # (B, 59)
    x = x.to(device)

    # create tensor to be saved
    boards = np.zeros((batch_size, 59, 8*8), dtype=np.int32)

    for k in range(batch_size):
        game_transcript = x[k] # (59)

        game = OthelloGame()
        for t in range(0, 59):
            move = game_transcript[t].item() - 1
            if move == -1:
                print("ee")
                boards[k, t] = -100 * np.ones((8*8,), dtype=np.int32)
            else:
                game.play_move(game_transcript[t].item() - 1)
                boards[k, t] = np.copy(game.state).flatten()

    # save tensor
    np.save(os.path.join("data_probing/", f"batch_{i+1}_boards.npy"), boards)

    num_games += batch_size
    if num_games >= total_games:
        break