In [1]:
import evaluate
import random

import torch
import torch.nn.functional as F
import numpy as np
from torch.utils.data import Dataset, DataLoader 
from tqdm import tqdm

# Logging
import wandb 

# Visualization Tools
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns

# Our code
from dataloader import *
from trainer import POCMLTrainer
from model import POCML, sim
from visualizer import * 
from utils import *


# Environment & Data Configuration

In [2]:
# torch.autograd.set_detect_anomaly(True)

# n_nodes = 9
# n_obs = 9
# trajectory_length = 12  # numer of node visits in a trajectory
# num_desired_trajectories= 20

# env = GraphEnv( n_items=n_nodes,                     # number of possible observations
#                 env='grid', 
#                 batch_size=trajectory_length, 
#                 num_desired_trajectories=num_desired_trajectories, 
#                 device=None, 
#                 unique=True,                         # each state is assigned a unique observation if true
#                 args = {"rows": 3, "cols": 3}
#             )

# #dataset = RandomWalkDataset(env.adj_matrix, trajectory_length, num_desired_trajectories, n_obs, env.items)
# train_dataset = env.dataset.data

# env.gen_dataset()
# test_dataset = env.dataset.data

# train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True)
# test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=True)

torch.autograd.set_detect_anomaly(True)

n_nodes = 9
#batch_size = 16        # Note: in og CML trajectory length == batch_size; in POCML this should be decoupled
n_obs = 9
trajectory_length = 20  # numer of node visits in a trajectory
num_desired_trajectories= 30

# choose env from "random", "small world" or "dead ends"
# TODO devouple batch_size from trajectory length for GraphEnv
# env = GraphEnv(n_nodes=n_nodes, n_items=n_obs, env='regular', action_type = "regular", batch_size=trajectory_length, num_desired_trajectories=num_desired_trajectories, unique = True)

env = GraphEnv( n_items=n_nodes,                     # number of possible observations
                env='grid', 
                batch_size=trajectory_length, 
                num_desired_trajectories=num_desired_trajectories, 
                device=None, 
                unique=True,                         # each state is assigned a unique observation if true
                args = {"rows": 5, "cols": 5}
            )
# env = GraphEnv( n_items=n_nodes,                     # number of possible observations
#                 env='two tunnel', 
#                 batch_size=trajectory_length, 
#                 num_desired_trajectories=num_desired_trajectories, 
#                 device=None, 
#                 unique=True,                         # each state is assigned a unique observation if true
#                 args = {"tunnel_length": 1, "middle_tunnel_length": 1}
#             )

#dataset = RandomWalkDataset(env.adj_matrix, trajectory_length, num_desired_trajectories, n_obs, env.items)
train_dataset = env.gen_dataset(fixed_start=False)
test_dataset = env.gen_dataset(fixed_start=False)

train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=True)

# Set config & hyperparameter pools for wandb

In [3]:
# Convention: for each hyperparameter key, set the value to a list if you want to try multiple values
param_pool = {  
    # data-related config; can't be automated for now
    "n_obs" : env.n_items,
    "n_states" : env.size,
    "n_actions" : env.n_actions,
    "trajectory_length" : 12,  # numer of node visits in a trajectory
    "num_desired_trajectories" : 20,
    # Experiments
    "seed": [68, 70],
    # model 
    "state_dim" : [500, 1000],
    "random_feature_dim" : 2000,
    "alpha" : 1,
    "beta_obs" : 8,
    "beta_state" : 8,
    "memory_bypass" : False,
    "mem_reweight_rate" : "adaptive", 
    "decay" : "adaptive",
    # trainer
    "lr_Q_o" : 0.1, 
    "lr_V_o" : 0.01, 
    "lr_Q_s" : 0., 
    "lr_V_s" : 0., 
    "lr_all" : 1,
    "normalize" : False,
    "reset_every" : 1,
    "refactor_memory" : True,
    # training / optimizer 
    "epochs" : 10,
}

debug = False
log = True

In [4]:
def generate_run_name(params):
    # TODO cutomize run name
    name = f"grid_sd_{params['seed']}_sdim_{params['state_dim']}_rfdim_{params['random_feature_dim']}"
    return name

In [5]:
def run_trial(params, train_dataloader, test_dataloader, debug=False, log=False):

    run_name = generate_run_name(params)

    if log: 
        wandb.init(
            # Set the project where this run will be logged
            project="POCML",
            # We pass a run name (otherwise it’ll be randomly assigned, like sunshine-lollypop-10)
            name=run_name,
            # Track hyperparameters and run metadata
            config = params,
            # config={
            #     "learning_rate": 0.02,
            #     "architecture": "CNN",
            #     "dataset": "CIFAR-100",
            #     "epochs": 10,
            # },
            )

    # Set random seed
    seed = params["seed"]
    set_random_seed(seed)

    # Filter parameters to match the model & trainer's __init__ signature
    trainer_params = filter_param(params, POCMLTrainer)
    model_params = filter_param(params, POCML)

    # Instantiate the model & trainer using the filtered dictionary
    model = POCML(**model_params)
    trainer = POCMLTrainer(model = model, train_loader = train_dataloader, log = log, debug =debug, **trainer_params)
    # train the model and record its loss
    # loss_record = np.array(trainer.train(params["epochs"])).reshape(params["epochs"],-1)
    loss_record, model = trainer.train(params["epochs"])
    
    ## Analytics
    # get state & action kernel similarities
    phi_Q = model.get_state_kernel()
    phi_V = model.get_action_kernel()
    k_sim_Q = sim(phi_Q, phi_Q)
    k_sim_V = sim(phi_V, phi_V)

    ## Evaluations
    train_acc, train_confidences = evaluate.accuracy(model, train_dataloader)
    test_acc, test_confidences = evaluate.accuracy(model, test_dataloader)
    sa_acc, sa_confidences, sa_distance_ratios = evaluate.state_transition_consistency(model, env)

    if debug: 
        print("State kernel similarities:\n", k_sim_Q)
        print("Action kernel similarities:\n", k_sim_V)

        print("Train obs accuracy/confidence:", train_acc, np.mean(train_confidences))
        print("Test obs accuracy/confidence:", test_acc, np.mean(test_confidences))
        print("State-action accuracy/confidence/distance ratio:", sa_acc, np.mean(sa_confidences), np.mean(sa_distance_ratios))

    ## Visualization 
    visualize(model.get_state_differences().numpy(), legend = "State", title = "MDS State Differences", log = log)
    visualize(model.get_action_differences().numpy(), legend = "Action", title = "MDS Action Differences", log = log)
    visualize_loss(loss_record, num_desired_trajectories, trajectory_length, per_epoch=False)
    visualize_loss(loss_record, num_desired_trajectories, trajectory_length, per_epoch=True)

    # # Log train and validation metrics to wandb
    # TODO  
    # metrics = {}
    # val_metrics = {"val/val_loss": val_loss,
    #                 "val/val_accuracy": accuracy}
    # wandb.log({**metrics, **val_metrics})

    # Log the results
    wandb.summary['train_acc'] = train_acc
    wandb.summary['train_conf'] = np.mean(train_confidences)
    wandb.summary['test_accuracy'] = test_acc
    wandb.summary['test_conf'] = np.mean(test_confidences)
    wandb.summary['sa_accuracy'] = sa_acc
    wandb.summary['sa_conf'] = np.mean(sa_confidences)

    wandb.finish()

    # TODO log models
    # # beta_obs, beta_state, clean up rate
    # torch.save(model.state_dict(), "model/model_12_12_1.ckpt")

    return trainer

In [6]:
# Use the loader to generate combinations one at a time
param_loader = generate_combinations_loader(param_pool)

for params in param_loader:
    model = run_trial(params, train_dataloader, test_dataloader, debug=debug, log=log)


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mzouzhuowen[0m ([33mevanjeong[0m). Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112282533172726, max=1.0…

Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.