## Torch & Tensorflow users to be united! ✊

**Training a tensorflow network using torch tensors**

_____

This is a highly experimental notebook attempting to train a tensorflow network using torch tensors. 
The notebook is based on the [great tmitation training notebook](https://www.kaggle.com/code/huikang/kore-2022-imitation-training) by [huikang](https://www.kaggle.com/huikang).

The only difference is the network itself. 
I tried to translate everything as 1-to-1 as possible between torch and tensorflow so the actual changes between both implementations would be minimal.
The training loop is implemented to be very similar to the original using `GradientTape` instead of `autograd` and the tensors are fed from a `DataLoader` generated from the original datasets.
All tensors are converted before fed into the network with `tf.constant(states, dtype=tf.float32)`.

May this be the start of a great new age of collaboration..

In [None]:
%reset -sf
!echo $KAGGLE_KERNEL_RUN_TYPE

In [None]:
%%capture
!pip install kaggle-environments -U > /dev/null
!cp ../input/kore-2022-feature-generator/kore_analysis.py .
!cp ../input/kore-2022-feature-generator/feature_generator.py .

In [None]:
from IPython.core.magic import register_cell_magic

@register_cell_magic
def writefile_and_run(line, cell):
    argz = line.split()
    file = argz[-1]
    mode = 'w'
    if len(argz) == 2 and argz[0] == '-a':
        mode = 'a'
    with open(file, mode) as f:
        f.write(cell)
    get_ipython().run_cell(cell)

In [None]:
%%writefile_and_run -a imitation_training_helper.py

import torch
import matplotlib
import numpy as np
import pandas as pd
from torch import nn
import torch.optim as optim
import os, collections, random
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import torch.nn.functional as F
from scipy.special import softmax
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
torch_device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
from feature_generator import plot_3d_matrix

In [None]:
def seed_everything(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    os.environ['PYTHONHASHSEED'] = str(seed_value)    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True
seed = 42
seed_everything(seed)

# Dataset and Dataloader

In [None]:
actions_df = pd.read_csv("../input/kore-2022-feature-generator/actions_df.csv")
actions_df.shape

In [None]:
actions_df = actions_df[(actions_df["diff_x"] != 0) | (actions_df["diff_y"] != 0)]
actions_df = actions_df[abs(actions_df["diff_x"]) <= 10]
actions_df = actions_df[abs(actions_df["diff_y"]) <= 10]
actions_df = actions_df[actions_df["action_class"] >= 0]
# actions_df = actions_df[abs(actions_df["turn_idx"]) <= 20]
# actions_df = actions_df[abs(actions_df["diff_x"]) + abs(actions_df["diff_y"]) <= 11]
# actions_df = actions_df[abs(actions_df["diff_x"]) + abs(actions_df["diff_y"]) >= 3]
actions_df["diff_x"] = (actions_df["diff_x"] + 10)
actions_df["diff_y"] = (actions_df["diff_y"] + 10)
actions_df.loc[actions_df["action_class"] == 3, "action_class"] = 0  # recast attack action as build action
actions_df.shape

In [None]:
actions_df.sample(5)

In [None]:
actions_df["action_class"].value_counts()

In [None]:
actions_df.head()

In [None]:
plt.figure(figsize=(12,12))
plt.scatter(actions_df["diff_x"] + (actions_df["turn_idx"]//20)/ 25 - 0.4, actions_df["diff_y"] + (actions_df["turn_idx"] %20)/ 25 - 0.4, s=actions_df["ship_amount"], c=actions_df["action_class"], cmap="winter_r")
plt.gca().xaxis.set_major_locator(matplotlib.ticker.MaxNLocator(integer=True))
plt.gca().yaxis.set_major_locator(matplotlib.ticker.MaxNLocator(integer=True))
plt.gca().set_aspect('equal')
plt.show()

In [None]:
%%writefile_and_run -a imitation_training_helper.py

def append_source_specific_features(input_matrix):
    kore_matrix = input_matrix[3,:,:]
    kore_matrix_hori = np.zeros((21, 21))
    kore_matrix_vert = np.zeros((21, 21))
    dist_from_shipyard = np.add.outer(np.abs(np.arange(-10,11)), np.abs(np.arange(-10,11)))
    assert dist_from_shipyard[10,10] == 0
    dist_from_shipyard[10,10] = 1  # avoid divide by zero error later
    shipyard_ship_count = np.full((21, 21), input_matrix[-11,10,10])

    for i in range(10):  # first direction
        kore_matrix_hori[10,10+i+1] += kore_matrix_hori[10,10+i] + kore_matrix[10,10+i+1]
        kore_matrix_hori[10,10-i-1] += kore_matrix_hori[10,10-i] + kore_matrix[10,10-i-1]
        kore_matrix_vert[10+i+1,10] += kore_matrix_vert[10+i,10] + kore_matrix[10+i+1,10]
        kore_matrix_vert[10-i-1,10] += kore_matrix_vert[10-i,10] + kore_matrix[10-i-1,10]
    
    for i in range(10):  # second direction
        kore_matrix_vert[:,10+i+1] += kore_matrix_vert[:,10+i] + kore_matrix[:,10+i+1]
        kore_matrix_vert[:,10-i-1] += kore_matrix_vert[:,10-i] + kore_matrix[:,10-i-1]
        kore_matrix_hori[10+i+1,:] += kore_matrix_hori[10+i,:] + kore_matrix[10+i+1,:]
        kore_matrix_hori[10-i-1,:] += kore_matrix_hori[10-i,:] + kore_matrix[10-i-1,:]
    
    # each cell is visited twice except the destination cell
    kore_matrix_hori = (2*kore_matrix_hori - kore_matrix) / dist_from_shipyard
    kore_matrix_vert = (2*kore_matrix_vert - kore_matrix) / dist_from_shipyard
    input_matrix = np.concatenate(([kore_matrix_hori, kore_matrix_vert, dist_from_shipyard, shipyard_ship_count], input_matrix), axis=0)
    input_matrix = np.clip(input_matrix, 0, 10)
    return input_matrix

def action_encoder(action_class, diff_x, diff_y):
    assert 0 <= action_class < 3
    assert 0 <= diff_x < 21
    assert 0 <= diff_y < 21
    return action_class*21*21 + diff_x*21 + diff_y

def action_decoder(clf_idx):
    action_class, clf_idx = divmod(clf_idx, 21*21)
    diff_x, diff_y = divmod(clf_idx, 21)
    return action_class, diff_x, diff_y

In [None]:
train_actions_df = actions_df[actions_df["episode_id"]%10 != 0]
val_actions_df = actions_df[actions_df["episode_id"]%10 == 0]

def aggregate_into_episode_and_turn(df):
    samples_build = collections.defaultdict(list)
    samples = collections.defaultdict(list)
    for record in df.to_dict('records'):
        if record["turn_idx"] <= 3: continue
        submission_episode_turnidx = record["submission_id"], record["episode_id"], record["turn_idx"]
        submission_id, episode_id, turn_idx = submission_episode_turnidx
        npy_path_name = f"""../input/kore-2022-feature-generator/npy/{submission_id}_{episode_id}_{turn_idx-1:03d}_inputs.npy"""
        if not os.path.isfile(npy_path_name): continue
        if record["action_class"] == 0:  # is build action
            samples_build[submission_episode_turnidx].append(record)
        else: samples[submission_episode_turnidx].append(record)            
    return list(samples.items()) + list(samples_build.items())
train_samples = aggregate_into_episode_and_turn(train_actions_df)
val_samples = aggregate_into_episode_and_turn(val_actions_df)

# Tensorflow Network Training

### Imports

In [None]:
import os
import numpy as np
import tensorflow as tf
from tqdm.notebook import tqdm
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.initializers import GlorotUniform
from tensorflow.keras.losses import CategoricalCrossentropy
from imitation_training_helper import append_source_specific_features, action_encoder, action_decoder
from tensorflow.keras.layers import Layer, InputLayer, Conv2D, Flatten, Dense, Reshape, ReLU, Softmax

### Dataset Loader

In [None]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint

class KoreDataset(Dataset):
    def __init__(self, samples):
        self.samples = samples
        
    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        (submission_id, episode_id, turn_idx), samples = self.samples[idx]        
        npy_path_name = f"""../input/kore-2022-feature-generator/npy/{submission_id}_{episode_id}_{turn_idx-1:03d}_inputs.npy"""
        sample = random.choice(samples)
        state = np.load(npy_path_name)
        state = np.roll(state, (0, -sample["shipyard_x"] + 10, -sample["shipyard_y"] + 10), axis = (0, 1, 2))  # center shipyard
        state = append_source_specific_features(state)        
        action_tuple = sample["action_class"], sample["diff_x"], sample["diff_y"]
        action = action_encoder(*action_tuple)
        assert action_decoder(action) == action_tuple
        return state, action

### Training Loop

In [None]:
batch_size = 64
input_matrix, action = KoreDataset(train_samples)[200]
NUM_LAYERS = input_matrix.shape[0]

print(NUM_LAYERS, input_matrix.shape, action)

input_matrix, action = KoreDataset(train_samples)[200]
NUM_LAYERS = input_matrix.shape[0]
NUM_LAYERS, input_matrix.shape, action

batch_size = 64
train_loader = DataLoader(KoreDataset(train_samples), batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(KoreDataset(val_samples), batch_size=batch_size, shuffle=False, num_workers=2)

def train_model(model, dataloaders_dict, criterion, optimizer, num_epochs):
    best_acc = 0.0
    for epoch in range(num_epochs):
        for phase in ['train', 'val']:
            epoch_loss = 0.0
            epoch_acc = 0
            dataloader = dataloaders_dict[phase]
            for states, actions in tqdm(dataloader, leave=False):
                states = tf.constant(states, dtype=tf.float32)
                actions = tf.constant(actions, dtype=tf.int32)
                with tf.GradientTape() as tape:
                    policy = model(states)                    
                    actions = tf.cast(actions, tf.float32)
                    policy = tf.cast(policy, tf.float32)
                    loss = criterion(actions, policy)
                    _, preds = tf.math.top_k(policy, k=1)
                    if phase == 'train':
                        gradients = tape.gradient(loss, model.trainable_variables)
                        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
                    epoch_loss += loss.numpy() * len(policy)
                    epoch_acc += tf.reduce_sum(tf.cast(tf.equal(tf.cast(preds, tf.int32), tf.cast(actions, tf.int32)), tf.float32))
            data_size = len(dataloader.dataset)
            epoch_loss = epoch_loss / data_size
            epoch_acc = epoch_acc.numpy() / data_size
            print(f'Epoch {epoch + 1}/{num_epochs} | {phase:^5} | Loss: {epoch_loss:.4f} | Acc: {epoch_acc:.4f}')
        if epoch_acc > best_acc:
            model.save('model.h5')
            best_acc = epoch_acc
        if os.environ.get("KAGGLE_KERNEL_RUN_TYPE") == "Interactive" and epoch == 2:
            break  # for interactive runs, only check that it is working

### Network Architecture

In [None]:
class BasicConv2d(layers.Layer):
    def __init__(self, input_dim, output_dim, kernel_size, bn):
        super().__init__()
        self.conv = layers.Conv2D(
            output_dim, 
            kernel_size=kernel_size, 
            padding='same',
        )
        self.bn = layers.BatchNormalization() if bn else None

    def call(self, x):
        h = self.conv(x)
        h = self.bn(h) if self.bn is not None else h
        return h

class KoreNet(tf.keras.Model):
    def __init__(self):
        super().__init__()
        layers, filters = 12, 32
        self.conv0 = BasicConv2d(NUM_LAYERS, filters, (3, 3), True)
        self.blocks = [BasicConv2d(filters, filters, (3, 3), True) for _ in range(layers)]
        self.conv1 = BasicConv2d(filters, 3, (3, 3), True)
        self.flat = tf.keras.layers.Flatten()
        
    def call(self, x):
        h = tf.nn.relu(self.conv0(x))
        for block in self.blocks:             
            h = tf.nn.relu(h + block(h))
        h = self.conv1(h)
        flattened = self.flat(h)        
        return flattened

### Actual Training

In [None]:
kore_net = KoreNet()
criterion = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam(1e-3)

dataloaders_dict = {"train": train_loader, "val": val_loader}
train_model(kore_net, dataloaders_dict, criterion, optimizer, num_epochs=3)

# Inference

In [None]:
for states, actions in val_loader:    
    p = kore_net(tf.constant(states, dtype=tf.float32))
    print(p.shape, actions.shape)

In [None]:
assert states[0].numpy()[-2,10,10] != 0

In [None]:
plot_3d_matrix(states[0].numpy())

In [None]:
kore_slice = states[0].numpy()[:1]
plot_3d_matrix(kore_slice, scene_camera_eye=dict(x=3, y=3, z=3))

In [None]:
kore_slice = states[0].numpy()[1:2]
plot_3d_matrix(kore_slice, scene_camera_eye=dict(x=3, y=3, z=3))

In [None]:
action_decoder(actions[0].numpy())

The use of the model to build an imitation agent will be done in another notebook.
