In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.optim as optim
import os

from toddler.action_coding import mass_answers, force_answers
from toddler.models import ValueNetwork
from toddler.RecurrentWorker import train
from toddler.validate import validate

from isaac.models import ComplexRNNModel

from toddler.simulator.config import generate_every_world_configuration, generate_cond

from generate_passive_simulations import get_configuration_answer

In [2]:
model_directory = "models/answer_questions/"
data_directory = "answer_questions_plots/"

In [3]:
if torch.cuda.is_available():
    device = "cuda:0"
else:
    device = "cpu"

In [4]:
print(device)

cuda:0


In [5]:
question_type = "force"

In [6]:
discount_factor = 0.95

every_conf = generate_every_world_configuration()
every_world_answer = np.array(list(map(get_configuration_answer, every_conf)))
n_configurations = len(every_conf)

train_size = 0.7
val_size = 0.15
test_size = 0.15

all_indices = np.arange(n_configurations)
train_indices, not_train_indices = train_test_split(all_indices, train_size=train_size,
                                                    random_state=0, stratify=every_world_answer)
val_indices, test_indices = train_test_split(not_train_indices, train_size=0.5,                
                                             random_state=0,
                                             stratify=every_world_answer[not_train_indices])

N_WORLDS = 10000
timeout = 1800
TOTAL_STEPS = min(N_WORLDS*timeout, np.inf)

print("N_WORLDS", N_WORLDS, "TOTAL_STEPS", TOTAL_STEPS)

torch.manual_seed(0)
np.random.seed(0)
repeated_train_indices = np.random.choice(train_indices, N_WORLDS, replace=True)
train_cond = generate_cond(every_conf[repeated_train_indices])

experience_replay = ()
agent_answers = ()

n_bodies = 2
action_repeat = 1
starting_step = 0
starting_episode = 0

if question_type == "mass":
    force_answers = {}
else:
    mass_answers = {}

for cond in train_cond:
    cond["timeout"] = timeout
    
training_dfs = []
for seed in [0, 42, 72]:
    
    this_seed_model_directory = model_directory + question_type + "/" + str(seed) + "/"
    if not os.path.exists(this_seed_model_directory):
        os.makedirs(this_seed_model_directory)
    
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    net_params = {"input_dim":17, "hidden_dim":25, "n_layers":4, "output_dim":9, "dropout":0.0}
    value_network = ValueNetwork(**net_params).to(torch.device(device))
    optimizer = optim.Adam(value_network.parameters(), lr=5e-4)
    
    agent_cond = train_cond

    trainingArgs = {"value_network": value_network, "optimizer": optimizer, 
                    "discount_factor": discount_factor, "starting_episode": starting_episode, 
                    "train_cond": agent_cond, "timeout": timeout,
                    "experience_replay": experience_replay, "agent_answers": agent_answers, 
                    "n_bodies": n_bodies, "training_data": None,
                    "action_repeat": action_repeat, "model_directory": this_seed_model_directory, 
                    "current_step": starting_step, "total_steps": TOTAL_STEPS, 
                    "device": device, "reward_control": False, "done_with_control": False, 
                    "reward_not_controlling_negatively": False, "reward_not_answering_negatively": True, 
                    "possible_actions": np.arange(0, 9), "mouse_exploration_frames": 600, 
                    "mass_answers": mass_answers, "force_answers": force_answers, 
                    "force_answer_at_t": (timeout - 1), "force_answer_at_t": (timeout - 1),
                    "sample_n_episodes": 64, "experience_replay_max_size": 256}
    
    training_data = train(**trainingArgs)

    df = pd.DataFrame.from_dict(training_data)
    df["seed"] = seed
    training_dfs.append(df)

N_WORLDS 10000 TOTAL_STEPS 18000000


control 0.41 aciertos 0.48 (UB: 1.00) eps: 0.00 done @ 1563 vloss 0.029916 qloss 0.000000 accuracy 0.00: 100%|██████████| 10000/10000 [1:45:18<00:00,  1.33it/s] 
control 0.11 aciertos 0.41 (UB: 1.00) eps: 0.00 done @ 1799 vloss 0.024076 qloss 0.000000 accuracy 0.00: 100%|██████████| 10000/10000 [1:40:11<00:00,  1.45it/s]
control 0.61 aciertos 0.45 (UB: 1.00) eps: 0.00 done @ 601 vloss 0.028235 qloss 0.000000 accuracy 0.00: 100%|██████████| 10000/10000 [1:42:21<00:00,  2.09it/s] 


In [7]:
training_dfs = pd.concat(training_dfs)

In [8]:
training_dfs.to_hdf(data_directory+"training_data.h5", key="training_data")

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block2_values] [items->['value_loss']]

  return pytables.to_hdf(path_or_buf, key, self, **kwargs)


In [9]:
training_dfs

Unnamed: 0,control,correct_answer,episode_length,question_loss,value_loss,seed
0,0,True,613,0,"tensor(0.0426, device='cuda:0')",0
1,1,False,603,0,"tensor(0.0469, device='cuda:0')",0
2,1,False,605,0,"tensor(0.0484, device='cuda:0')",0
3,0,False,607,0,"tensor(0.0482, device='cuda:0')",0
4,0,False,601,0,"tensor(0.0477, device='cuda:0')",0
5,1,True,601,0,"tensor(0.0448, device='cuda:0')",0
6,0,False,613,0,"tensor(0.0437, device='cuda:0')",0
7,1,False,607,0,"tensor(0.0425, device='cuda:0')",0
8,1,False,615,0,"tensor(0.0412, device='cuda:0')",0
9,1,False,603,0,"tensor(0.0400, device='cuda:0')",0
