In [15]:
import os
import sys
# Use pygeos in geopandas
os.environ['USE_PYGEOS'] = '0'

import warnings
import h5py as h5

from glob import glob
from tqdm.auto import tqdm

from multiresticodm.utils import *
from multiresticodm.config import Config
from multiresticodm.notebook_functions import *
from multiresticodm.global_variables import XARRAY_SCHEMA

warnings.simplefilter("ignore")
# mpl.rcParams['agg.path.chunksize'] = 10000

In [2]:
%matplotlib inline

# AUTO RELOAD EXTERNAL MODULES
%load_ext autoreload
%autoreload 2

## Import outputs

In [12]:
# Expertiment id
dataset = 'cambridge_work_commuter_lsoas_to_msoas'
root_dir = f'../../data/outputs/{dataset}/mcmc/'
geometry_name = 'lsoas_to_msoas'
origin_geometry_name = 'lsoa'
destination_geometry_name = 'msoa'

# list of unwanted folders
unwanted_folders = ['first_run_of_experiments','paper_figures','second_run_of_experiments_bin','summaries','._.DS_Store','.DS_Store']

# Get all output folders
output_folders = sorted([x for x in os.listdir(root_dir) if x not in unwanted_folders])

In [13]:
load_h5 = False
experiment_id = 'JointTableSIM_MCMC_SweepedNoise_16_05_2023_20_09_04'

In [26]:
def get_old_experiment_id(sweep_param):
    if sweep_param['sigma'] <= 0.01415 and sweep_param['title'] == '_unconstrained':
        return 'exp6_JointTableSIMLatentMCMC_LowNoise_unconstrained_18_05_2023_11_16_28'
    if sweep_param['sigma'] <= 0.01415 and sweep_param['title'] == '_total_constrained':
        return 'exp6_JointTableSIMLatentMCMC_LowNoise_grand_total_17_05_2023_21_33_50'
    elif sweep_param['sigma'] <= 0.01415 and sweep_param['title'] == '_row_constrained':
        return 'exp6_JointTableSIMLatentMCMC_LowNoise_row_margin_production_constrained_13_06_2023_11_52_01'
    elif sweep_param['sigma'] <= 0.01415 and sweep_param['title'] == '_doubly_constrained':
        return 'exp6_JointTableSIMLatentMCMC_LowNoise_both_margins_26_05_2023_15_50_42'
    elif sweep_param['sigma'] <= 0.01415 and sweep_param['title'] == '_doubly_10%_cell_constrained':
        return 'exp6_JointTableSIMLatentMCMC_LowNoise_both_margins_permuted_cells_10%_05_06_2023_10_30_49'
    elif sweep_param['sigma'] <= 0.01415 and sweep_param['title'] == '_doubly_20%_cell_constrained':
        return 'exp6_JointTableSIMLatentMCMC_LowNoise_both_margins_permuted_cells_20%_05_06_2023_10_30_53'
    elif sweep_param['sigma'] > 0.01415 and sweep_param['title'] == '_unconstrained':
        return 'exp14_JointTableSIMLatentMCMC_HighNoise_unconstrained_23_05_2023_11_55_33'
    elif sweep_param['sigma'] > 0.01415 and sweep_param['title'] == '_total_constrained':
        return 'exp14_JointTableSIMLatentMCMC_HighNoise_grand_total_23_05_2023_11_15_23'
    elif sweep_param['sigma'] > 0.01415 and sweep_param['title'] == '_row_constrained':
        return 'exp14_JointTableSIMLatentMCMC_HighNoise_row_margin_production_constrained_13_06_2023_14_03_14'
    elif sweep_param['sigma'] > 0.01415 and sweep_param['title'] == '_doubly_constrained':
        return 'exp14_JointTableSIMLatentMCMC_HighNoise_both_margins_19_05_2023_10_55_00'
    elif sweep_param['sigma'] > 0.01415 and sweep_param['title'] == '_doubly_10%_cell_constrained':
        return 'exp14_JointTableSIMLatentMCMC_HighNoise_both_margins_permuted_cells_10%_07_06_2023_09_35_18'
    elif sweep_param['sigma'] > 0.01415 and sweep_param['title'] == '_doubly_20%_cell_constrained':
        return 'exp14_JointTableSIMLatentMCMC_HighNoise_both_margins_permuted_cells_20%_05_06_2023_12_40_31'

# def get_old_experiment_id(sweep_param):
#     if sweep_param['sigma'] <= 0.01415 and sweep_param['title'] == '_total_constrained':
#         return 'exp5_SIMLatentMCMC_LowNoise_grand_total_18_05_2023_11_09_58'
#     elif sweep_param['sigma'] <= 0.01415 and sweep_param['title'] == '_row_constrained':
#         return 'exp5_SIMLatentMCMC_LowNoise_row_margin_27_01_2023_18_46_59'
#     elif sweep_param['sigma'] > 0.01415 and sweep_param['title'] == '_total_constrained':
#         return 'exp5_SIMLatentMCMC_HighNoise_grand_total_23_05_2023_11_07_44'
#     elif sweep_param['sigma'] > 0.01415 and sweep_param['title'] == '_row_constrained':
#         return 'exp5_SIMLatentMCMC_HighNoise_row_margin_06_02_2023_16_54_39'
    
# def get_old_experiment_id(sweep_param):
#     if sweep_param['sigma'] is None and sweep_param['title'] == '_total_constrained':
#         return 'exp98_NeuralABM_LearnedNoise_grand_total_17_05_2023_15_52_53'
#     elif sweep_param['sigma'] is None and sweep_param['title'] == '_row_constrained':
#         return 'exp98_NeuralABM_LearnedNoise_row_margin_17_05_2023_15_53_28'
#     elif sweep_param['sigma'] <= 0.01415 and sweep_param['title'] == '_total_constrained':
#         return 'exp98_NeuralABM_LowNoise_grand_total_17_05_2023_15_50_09'
#     elif sweep_param['sigma'] <= 0.01415 and sweep_param['title'] == '_row_constrained':
#         return 'exp98_NeuralABM_LowNoise_row_margin_17_05_2023_15_51_11'
#     elif sweep_param['sigma'] > 0.01415 and sweep_param['title'] == '_total_constrained':
#         return 'exp98_NeuralABM_HighNoise_grand_total_17_05_2023_15_50_09'
#     elif sweep_param['sigma'] > 0.01415 and sweep_param['title'] == '_row_constrained':
#         return 'exp98_NeuralABM_HighNoise_row_margin_17_05_2023_15_51_11'

In [33]:
try:
    h5file.close()
except:
    pass

config = Config(
    path = os.path.join(root_dir,experiment_id,'config.json'),
    settings = None,
    console_level = 'debug',
)
config.find_sweep_key_paths()
# Parse sweep configurations
sweep_params = config.parse_sweep_params()

dims = {"origin":69,"destination":13,"time":1}
data_name_to_dims = {
    "alpha":(config['training']['N'],),
    "beta":(config['training']['N'],),
    "sigma":(config['training']['N'],),
    "sign":(config['training']['N'],),
    "theta":(config['training']['N'],2,),
    "log_destination_attraction":(config['training']['N'],dims['time'],dims['destination'],),
    "table":(config['training']['N'],dims['origin'],dims['destination'],),
}

# Create sweep configurations
sweep_configurations, \
param_sizes_str, \
total_size_str = config.prepare_sweep_configurations(sweep_params)


for i,sweep_config in tqdm(enumerate(sweep_configurations),total=len(sweep_configurations),leave=False):
    new_config,sweep = config.prepare_experiment_config(sweep_params=sweep_params,sweep_configuration=sweep_config)
        
    experiment_dir = os.path.join(config.out_directory,dataset,'mcmc',experiment_id)

    sweep_id = new_config.get_sweep_id(sweep)
    new_out_directory = os.path.join(experiment_dir,'samples',sweep_id)
    # Make output directory
    makedir(new_out_directory)
    write_json(new_config.settings,os.path.join(new_out_directory,'metadata.json'))
    # Write dummy outputs in order to not confuse this as an unfinished experiment
    with open(os.path.join(new_out_directory,'outputs.log'), 'w') as f:
        f.write('DONE')

    if load_h5:
        with h5.File(os.path.join(new_out_directory,'data.h5'),'r+') as h5data:
            h5data[experiment_id].attrs['sweep_params'] = list(sweep.keys())
            h5data[experiment_id].attrs['sweep_values'] = list(sweep.values())
            print(h5data[experiment_id].attrs['sweep_params'])
            print(h5data[experiment_id].attrs['sweep_values'])
            for dset_name, dset in h5data[experiment_id].items():
                if dset[...].shape[0] == 1:
                    data_copy = deepcopy(dset[...]).squeeze(axis=0)
                else:
                    data_copy = deepcopy(dset[...])
                #.reshape([1]+list(dset.shape))
                del h5data[experiment_id][dset_name]
                h5data[experiment_id].create_dataset(
                    dset_name,
                    data_name_to_dims[dset_name],
                    chunks=True,
                    compression=3,
                )
                h5data[experiment_id][dset_name][...] = data_copy
                print(dset_name,h5data[experiment_id][dset_name].shape)

            h5data.close()
    else:

        samples_dir = os.path.join(config.out_directory,dataset,'mcmc',get_old_experiment_id(sweep),'samples')

        # Load samples
        filenames = sorted(glob(os.path.join(samples_dir,f'table*.npy')), key = lambda x: int(x.split("batch_")[1].split("_")[0]))
        
        table = read_npy(filenames[0])
        for filename in filenames[1:]:
            # Read new batch
            sample_batch = read_npy(filename)
            # Append it to batches
            table = np.append(table,sample_batch,axis=0)
        # table = read_npy(os.path.join(samples_dir,'table_samples.npy'))
        table = table.reshape(data_name_to_dims['table'])

        filenames = sorted(glob(os.path.join(samples_dir,f'log_destination_attraction*.npy')), key = lambda x: int(x.split("batch_")[1].split("_")[0]))

        log_destination_attraction = read_npy(filenames[0])
        for filename in filenames[1:]:
            # Read new batch
            sample_batch = read_npy(filename)
            # Append it to batches
            log_destination_attraction = np.append(log_destination_attraction,sample_batch,axis=0)
        # log_destination_attraction = read_npy(os.path.join(samples_dir,'log_destination_attraction_samples.npy'))
        log_destination_attraction = log_destination_attraction.reshape(data_name_to_dims['log_destination_attraction'])
        # log_destination_attraction = log_destination_attraction.reshape(100000,1,13)
        
        filenames = sorted(glob(os.path.join(samples_dir,f'sign*.npy')), key = lambda x: int(x.split("batch_")[1].split("_")[0]))

        sign = read_npy(filenames[0])
        for filename in filenames[1:]:
            # Read new batch
            sample_batch = read_npy(filename)
            # Append it to batches
            sign = np.append(sign,sample_batch,axis=0)
        # sign = read_npy(os.path.join(samples_dir,'sign_samples.npy'))
        sign = sign.reshape(data_name_to_dims['sign'])

        filenames = sorted(glob(os.path.join(samples_dir,f'theta*.npy')), key = lambda x: int(x.split("batch_")[1].split("_")[0]))

        theta = read_npy(filenames[0])
        for filename in filenames[1:]:
            # Read new batch
            sample_batch = read_npy(filename)
            # Append it to batches
            theta = np.append(theta,sample_batch,axis=0)
        # theta = read_npy(os.path.join(samples_dir,'theta_samples.npy'))
        theta = theta.reshape(data_name_to_dims['theta'])

        h5file = h5.File(os.path.join(new_out_directory,"data.h5"), mode="w")

        h5group = h5file.create_group(experiment_id)
        # Store sweep configurations as attributes 
        h5group.attrs.create("sweep_params",list(sweep.keys()))
        h5group.attrs.create("sweep_values",['' if val is None else str(val) for val in sweep.values()])

        log_destination_attractions_ds = h5group.create_dataset(
            "log_destination_attraction",
            data_name_to_dims["log_destination_attraction"],
            chunks=True,
            compression=3,
        )
        log_destination_attractions_ds.attrs["dim_names"] = XARRAY_SCHEMA['log_destination_attraction']['coords']
        log_destination_attractions_ds.attrs["coords_mode__time"] = "start_and_step"
        log_destination_attractions_ds.attrs["coords__time"] = [1,1]
        log_destination_attractions_ds[...] = log_destination_attraction.reshape(list(log_destination_attraction.shape))
        # vals = [log_destination_attraction[(n-1000):n,...] for n in list(range(1000,100001,1000))]
        # print('log_destination_attraction',log_destination_attractions_ds[...].shape)
        # print('assigned values',vals[int(sweep_values[0])-1].shape)
        # log_destination_attractions_ds[...] = vals[int(sweep['seed'])-1]

        tables_ds = h5group.create_dataset(
            "table",
            data_name_to_dims["table"],
            chunks=True,
            compression=3,
        )
        tables_ds.attrs["dim_names"] = XARRAY_SCHEMA['table']['coords']
        tables_ds.attrs["coords_mode__time"] = "start_and_step"
        tables_ds.attrs["coords__time"] = [1,1]
        tables_ds[...] = table.reshape(list(table.shape))

        signs_ds = h5group.create_dataset(
            "sign",
            data_name_to_dims["sign"],
            chunks=True,
            compression=3,
        )
        signs_ds.attrs["dim_names"] = XARRAY_SCHEMA['sign']['coords']
        signs_ds.attrs["coords_mode__time"] = "start_and_step"
        signs_ds.attrs["coords__time"] = [1,1]
        signs_ds[...] = sign.reshape(list(sign.shape))
        # print('sign',signs_ds[...].shape)
        # print('assigned values',np.ones(list(data_name_to_dims['sign'])).shape)
        # signs_ds[...] = np.ones(list(data_name_to_dims['sign']))
        thetas = []
        for i,p_name in enumerate(['alpha', 'beta']):
            dset = h5group.create_dataset(
                p_name, 
                data_name_to_dims[p_name],
                chunks=True, 
                compression=3
            )
            dset.attrs['dim_names'] = []
            dset.attrs['coords_mode__time'] = 'start_and_step'
            dset.attrs['coords__time'] = [1,1]
            
            dset[...] = theta[:,i].reshape(list(theta[:,i].shape))
            # vals = np.array([theta[:,i][(n-1000):n] for n in list(range(1000,100001,1000))])
            # print(p_name,dset[...].shape)
            # print('assigned values',vals[int(sweep_values[0])-1].shape)
            # dset[...] = vals[int(int(sweep['seed']))-1]

        # print('\n')
        h5file.close()

setting up new logger multiresticodm.config


  0%|          | 0/12 [00:00<?, ?it/s]