# Visualizing Nash Strategy Prediction from the Code Vector

This notebook visualizes the strategies for a model that has been trained to choose actions in a matrix game (for both players) based on its program embedding.

Random test programs are generated for the visualization.

In [None]:
import numpy as np
import torch
import torchvision
import matplotlib.pyplot as plt
%matplotlib inline

import itertools
from contextlib import nullcontext
import os, sys
sys.path.append('./code2vec')
sys.argv = ['test.py']
sys.argc = len(sys.argv)

from code2vec.config import Config
from code2vec.code2vec import load_model_dynamically

from data.dataloader import MatrixProgramEmbeddingDataset

from data.gen_matrix_progs import gen_matrix_progs_col_stripe as gen_progs

from trainers.nash_trainer import Code2VecNashBCTrainer

from nets.nash_predictor_model import NashPredictor, NashPredictorConv

In [None]:
# Configs for generating the programs to be used for evaluation
N = 16
prog_config = \
dict(N=N,
     num_programs=16,
     num_statements=3,
     stripe_size=6,
     stripe_chance=0.15
     )

In [None]:
progs = list(gen_progs(**prog_config))

In [None]:
matrix_config = {
    "matrix_args":{"base_array": np.zeros((N, N))},
    "opts":{"nash_solutions": True},
}

In [None]:
training_config = {
        "batch_size": 1,
        "output_dir": "models/nash_prediction/",
        "log_interval": 20,
        "save_interval": 20,
        "train_split":[0.5, 0.5]
    }

In [None]:
# Create a trainer which has a dataloader.

trainer = Code2VecNashBCTrainer(
    prog_config,
    matrix_config,
    training_config,
    load_path = "/path/to/model.pt", # Trained model for predicting the Nash strategies
    model_class=NashPredictorConv, # Specify the model class appropriately
    progs=progs,
)

In [None]:
# Collect model outputs for the above test programs

nashs, pis, mats = [], [], []
for i, data in enumerate(iter(trainer.valid_dataloader)):
    with torch.no_grad():
        code_vectors, matrices, nash, cost = data
        pi1, pi2 = model_outputs[i]  # Policy log probs
        pi1, pi2 = torch.exp(pi1), torch.exp(pi2) # Get probabilities
        nashs.append((nash[:,0,:], nash[:,1,:]))
        pis.append((pi1, pi2))
        mats.append(matrices)

In [None]:
# Collect game matrices, agent strategies into a plottable image

pi_tensors = [[0.95, 0.95, 0.95][i]+[-1.0, -0.3, -0.3][i] * torch.cat([torch.cat([x[0]/torch.max(x[0]) for x in pis], dim=0)]*N, dim=1).view(-1, 1, N, N).transpose(2,3) for i in range(3)]
pi1_tensors = [[0.95, 0.95, 0.95][i]+[-1.0, 0.0, -1.0][i] * torch.cat([torch.cat([x[1]/torch.max(x[1]) for x in pis], dim=0)]*N, dim=1).view(-1, 1, N, N) for i in range(3)]
mat_tensors = [0.9*torch.cat(mats, dim=0).view(-1, 1, N, N)]*3
pi_mat_tensors = []
for c in range(3):
    pi_mat_list = []
    for i in range(mat_tensors[0].shape[0]):
        pi_mat_list.append(mat_tensors[c][i])
    for i in range(mat_tensors[0].shape[0]):
        pi_mat_list.append(pi_tensors[c][i])
    for i in range(mat_tensors[0].shape[0]):
        pi_mat_list.append(pi1_tensors[c][i])
    pi_mat_tensors.append(torch.stack(pi_mat_list))
pi_mat_grid = torchvision.utils.make_grid(torch.cat(pi_mat_tensors, dim=1), pad_value=1.0, nrow=8)

## Plot the results:

* The top row shows the matrix generated by the programs (note that these are not given as inputs to the network).
* The middle row visualizes the policy probabilities for player 1. The intensity of the color of each horizontal line represents the  probability of player 1 choosing that row of the matrix.
* The probabilities are normalized so that the largest value for any particular strategy is 1 (darkest).
* Similarly, the bottom row visualizes the policy probabilities for player 1 for choosing columns of the matrix.

In [None]:
plt.imshow(np.transpose(pi_mat_grid.numpy(), (1, 2, 0)))

fig = plt.gcf()
fig.set_size_inches(8.5, 22)
plt.show()