In [1]:
import json
import pandas as pd
from isaac.constants import BASIC_TRAINING_COLS, MASS_CLASS_COLS, FORCE_CLASS_COLS
import numpy as np

In [2]:
from isaac.dataset import read_dataset

In [3]:
from rpy2.robjects import r, pandas2ri
pandas2ri.activate()


 



In [4]:
import numpy as np
from tqdm import tqdm

In [13]:
def save_dataset(hdf_path, trials):
    for trial_i, trial in tqdm(enumerate(trials), total=len(trials)):
        trial.to_hdf(hdf_path, key="trial_%d" % trial_i)
        
def add_r_theta_attributes(trials):    
    for obj in ["o1", "o2", "o3", "o4"]:
        trial[obj+".r"] = (trial[obj+".vx"]**2 + trial[obj+".vy"]**2)**0.5
        trial[obj+".theta"] = (np.arctan2(trial[obj+".vx"], trial[obj+".vy"]) * 180 / np.pi)
            
    return trials

## Reading the JSONs

In [5]:
hdf_path = "data/r_passive_trials.h5"
trial_i = 0

tR_to_text = {3: "attract", 0: "none", -3: "repel"}

for condition_id in tqdm(range(1, 244)):
    filename = "data/for_hector/passive_simulations/physics_data%d.json" % condition_id
    fd = open(filename)
    sim_data = json.load(fd)
    for sim in sim_data:
        trial = pd.DataFrame(columns=sim.keys())
        
        for key in sim.keys():
            if key in ["tR", "tM" , "practice", "refreshes", "trial_type", "condition_world_variant", "world_id"]:
                trial[key] = [sim[key]] * 2700
            else:
                trial[key] = sim[key]
        
        trial["tR"] = tR_to_text[trial.tR.unique()[0]]
        trial["combined_solution"] = trial["tM"] + "_" + trial["tR"]
        
        trial["A"] = (trial.tM == "A")
        trial["B"] = (trial.tM == "B")
        trial["same"] = (trial.tM == "same")
        
        trial["attract"] = (trial.tR == "attract")
        trial["none"] = (trial.tR == "none")
        trial["repel"] = (trial.tR == "repel")
        
        trial["condition"] = condition_id
                
        trial.to_hdf(path_or_buf=hdf_path, key="trial_"+str(trial_i))
        trial_i += 1

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->['tM', 'refreshes', 'tR', 'combined_solution']]

  return pytables.to_hdf(path_or_buf, key, self, **kwargs)
100%|██████████| 243/243 [1:10:18<00:00, 16.30s/it]


## Dividing into train, validation and test trials and add R and theta attribbutes

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
all_trials = read_dataset("data/r_passive_trials.h5")

100%|██████████| 10935/10935 [03:59<00:00, 45.65it/s]


In [8]:
w_to_s = {}

for trial in all_trials:       
    w_to_s[trial.world_id.iloc[0]] = trial.combined_solution.iloc[0]

In [9]:
np.random.seed(37)
train_wids, test_wids, train_sols, test_sols = train_test_split(list(w_to_s.keys()), list(w_to_s.values()), stratify=list(w_to_s.values()), test_size=0.5)
val_wids, test_wids, val_sols, test_sols = train_test_split(test_wids, test_sols, stratify=test_sols, test_size=0.5)

In [10]:
print(pd.Series(train_sols).value_counts())
print(pd.Series(val_sols).value_counts())
print(pd.Series(test_sols).value_counts())

same_none       122
B_none          122
A_none          122
A_attract       122
B_repel         121
same_repel      121
A_repel         121
B_attract       121
same_attract    121
dtype: int64
B_none          61
same_repel      61
A_repel         61
B_attract       61
A_attract       61
same_attract    61
B_repel         61
same_none       60
A_none          60
dtype: int64
same_none       61
same_repel      61
A_repel         61
A_none          61
B_attract       61
same_attract    61
B_repel         61
B_none          60
A_attract       60
dtype: int64


In [14]:
train_trials = []
val_trials = []
test_trials = []

for trial in tqdm(all_trials):
    trial = add_r_theta_attributes(trial)

    world_id = trial.world_id.unique()[0]
    
    if world_id in train_wids:
        train_trials.append(trial)
    elif world_id in val_wids:
        val_trials.append(trial)
    else:
        test_trials.append(trial)

100%|██████████| 10935/10935 [00:56<00:00, 193.73it/s]


In [15]:
train_hdf = "data/r_train_trials.h5"
val_hdf = "data/r_val_trials.h5"
test_hdf = "data/r_test_trials.h5"

save_dataset(train_hdf, train_trials)
save_dataset(val_hdf, val_trials)
save_dataset(test_hdf, test_trials)

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->['tM', 'refreshes', 'tR', 'combined_solution']]

  return pytables.to_hdf(path_or_buf, key, self, **kwargs)
100%|██████████| 5465/5465 [09:48<00:00,  4.85it/s]
100%|██████████| 2735/2735 [02:38<00:00,  8.71it/s]
100%|██████████| 2735/2735 [02:40<00:00,  8.82it/s]


hdf_path = "data/r_passive_trials_no_restart.h5"
trial_id = 0

for world_i in tqdm(range(1, 2188)):
    rdata_path = "data/for_hector/passive_simulations/w_%d.rdata" % world_i
    r['load'](rdata_path)
    key = r["key"].iloc[world_i -1]
    trials = r["sim_trials"]
    
    
    for world_trial in trials:
        world_trial = pandas2ri.ri2py_dataframe(world_trial)
        world_trial["A"] = np.full(world_trial.shape[0], key.target_heavier == "A")
        world_trial["B"] = np.full(world_trial.shape[0], key.target_heavier == "B")
        world_trial["same"] = np.full(world_trial.shape[0], key.target_heavier == "same")
        
        world_trial["attract"] = np.full(world_trial.shape[0], key.target_fAB == 3)
        world_trial["none"] = np.full(world_trial.shape[0], key.target_fAB == 0)
        world_trial["repel"] = np.full(world_trial.shape[0], key.target_fAB == -3)

        world_trial["target_fAB"] = np.full(world_trial.shape[0], key.target_fAB == 3)
        world_trial["fAC"] = np.full(world_trial.shape[0], key.fAC == 3)
        world_trial["fAD"] = np.full(world_trial.shape[0], key.fAD == 0)
        world_trial["fBC"] = np.full(world_trial.shape[0], key.fBC == -3)
        world_trial["fBD"] = np.full(world_trial.shape[0], key.fBD == 0)
        world_trial["fCD"] = np.full(world_trial.shape[0], key.fCD == -3)

        world_trial["world_id"] = np.full(world_trial.shape[0], world_i)
        world_trial.to_hdf(hdf_path, key="trial_%d" % trial_id)
        
        trial_id += 1