# Imitation Learning with RL Fine-tuning through Self-Play

# Import and Split Training Data

In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
import json
import os

data_dir = "../data/"
X = []
Y = []

for subdir in os.listdir(data_dir):
    subdir_path = os.path.join(data_dir, subdir)
    for file_name in os.listdir(subdir_path):
        file_path = os.path.join(subdir_path, file_name)
        with open(file_path, "r") as file:
            for line in file:
                data = json.loads(line.strip())
                if "state" in data and "action" in data:
                    X.append(data["state"])
                    Y.append(data["action"])
X = np.array(X)
X[X > 0] = np.log2(X[X > 0])    #replace with log2 for simplicity

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=.33, random_state=26)
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

X_train = np.array(X)
y_train = np.array(Y)   #overwrite with full dataset for training

# Convert NumPy Arrays to PyTorch Tensors

In [2]:
import torch
from torch.utils.data import Dataset, DataLoader

#convert data to torch tensors
class Data(Dataset):
    def __init__(self, X, y):   #reshape to fit CNN input, -1 to auto infer batch size, 1 for single channel
        self.X = torch.from_numpy(X.astype(np.float32)).reshape(-1, 1, 4, 4)
        self.y = torch.from_numpy(y.astype(np.float32))
        self.len = self.X.shape[0]
       
    def __getitem__(self, index):
        return self.X[index], self.y[index]
   
    def __len__(self):
        return self.len
   
batch_size = 64

#instantiate training and test data
train_data = Data(X_train, y_train)
train_dataloader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)

test_data = Data(X_test, y_test)
test_dataloader = DataLoader(dataset=test_data, batch_size=batch_size, shuffle=True)

#sanity check
for batch, (X, y) in enumerate(train_dataloader):
    print(f"Batch: {batch+1}")
    print(f"X shape: {X.shape}")
    print(f"y shape: {y.shape}")
    break

Batch: 1
X shape: torch.Size([64, 1, 4, 4])
y shape: torch.Size([64])


# Neural Network Implementation

In [3]:
import torch
from torch import nn
from torch import optim

input_dim = 16
hidden_dim1 = 256
hidden_dim2 = 128
output_dim = 4

# class NeuralNetwork(nn.Module):
#     def __init__(self, input_dim, hidden_dim1, hidden_dim2, output_dim):
#         super(NeuralNetwork, self).__init__()
#         self.layer_1 = nn.Linear(input_dim, hidden_dim1)
#         nn.init.kaiming_uniform_(self.layer_1.weight, nonlinearity="relu")
#         self.layer_2 = nn.Linear(hidden_dim1, hidden_dim2)
#         nn.init.kaiming_uniform_(self.layer_2.weight, nonlinearity="relu")
#         self.layer_3 = nn.Linear(hidden_dim2, output_dim)
    
#     def forward(self, x):
#         x = torch.nn.functional.relu(self.layer_1(x))
#         x = torch.nn.functional.relu(self.layer_2(x))
#         x = self.layer_3(x)

#         return x
    
# model = NeuralNetwork(input_dim, hidden_dim1, hidden_dim2, output_dim)

class CNN(nn.Module):   #use CNN because input is image-like (4x4 grid)
    def __init__(self, output_dim=4):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=2, stride=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=2, stride=1)
        self.fc1 = nn.Linear(128 * 2 * 2, 128)  # final output size after convs
        self.fc2 = nn.Linear(128, output_dim)
    
    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        return self.fc2(x)
model = CNN(output_dim=output_dim)
print(model)    

CNN(
  (conv1): Conv2d(1, 64, kernel_size=(2, 2), stride=(1, 1))
  (conv2): Conv2d(64, 128, kernel_size=(2, 2), stride=(1, 1))
  (fc1): Linear(in_features=512, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=4, bias=True)
)


# Training

In [5]:
learning_rate = 0.001

loss_fn = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

num_epochs = 30

for epoch in range(num_epochs):
    epoch_loss = 0.0
    batch_count = 0
    for X, y in train_dataloader:
        optimizer.zero_grad()
        pred = model(X)
        loss = loss_fn(pred, y.long())
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        batch_count += 1
    print(f"Epoch {epoch+1}/{num_epochs}")


print("DONE!!! :3")

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
DONE!!! :3


# Predictions

In [6]:
import itertools

y_pred = []
y_test = []
correct = 0
total = 0
results = [0,0,0,0]

"""
We're not training so we don't need to calculate the gradients for our outputs
"""
with torch.no_grad():
    for X, y in test_dataloader:
        outputs = model(X)  # Get model outputs
        _, predicted = torch.max(outputs, 1)
        y_pred.extend(predicted.tolist())
        y_test.extend(y.tolist())
        correct += (predicted == y).sum().item()
        total += y.size(0)
        for pred in predicted:
            results[pred.item()] += 1

print(f'Accuracy: {100 * correct // total}%')
print(f'Prediction distribution:')
print(f'0:  {results[0]}')
print(f'1:  {results[1]}')
print(f'2:  {results[2]}')
print(f'3:  {results[3]}')

Accuracy: 89%
Prediction distribution:
0:  28764
1:  29225
2:  26980
3:  28732


# Export to ONNX

In [8]:
import torch.onnx
dummy_input = torch.randn(1, 1, 4, 4) #batch, channels, height, width for cnn
torch.onnx.export(model, dummy_input, "2048_imitation.onnx", input_names=["input"], output_names=["output"])

# Convert ONNX to Tensorflow to Tensorflow.js

the package versions need to be really specific or else it crashes and burns

make a new virtual env with the following:\
`python3.9 -m venv tfenv`

run this bash script to convert to Tensorflow:\
`pip install tensorflow==2.13.0 keras==2.13.1 onnx==1.14.0 onnx-tf==1.10.0 protobuf==3.20.3 tensorflow-probability==0.20.0 && onnx-tf convert -i 2048_imitation.onnx -o 2048_imitation_tf`

and then this bash script to convert from Tensorflow to Tensorflow.js:\
`pip install tensorflowjs==4.18.0 && tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model 2048_imitation_tf/ 2048_imitation_tfjs/`