In [68]:
import os
import sys
# Use pygeos in geopandas
os.environ['USE_PYGEOS'] = '0'

import glob
import warnings
import h5py as h5

from tqdm.auto import tqdm

from multiresticodm.utils import *
from multiresticodm.config import Config
from multiresticodm.notebook_functions import *
from multiresticodm.global_variables import XARRAY_SCHEMA

warnings.simplefilter("ignore")
# mpl.rcParams['agg.path.chunksize'] = 10000

In [2]:
%matplotlib inline

# AUTO RELOAD EXTERNAL MODULES
%load_ext autoreload
%autoreload 2

## Import outputs

In [173]:
# Expertiment id
dataset = 'cambridge_work_commuter_lsoas_to_msoas'
root_dir = f'../../data/outputs/{dataset}/mcmc/'
geometry_name = 'lsoas_to_msoas'
origin_geometry_name = 'lsoa'
destination_geometry_name = 'msoa'

# list of unwanted folders
unwanted_folders = ['first_run_of_experiments','paper_figures','second_run_of_experiments_bin','summaries','._.DS_Store','.DS_Store']

# Get all output folders
output_folders = sorted([x for x in os.listdir(root_dir) if x not in unwanted_folders])

In [None]:
# for folder in output_folders:
folder = 'exp5_SIMLatentMCMC_LowNoise_row_margin_27_01_2023_18_46_59'

config = Config(
    path = os.path.join(root_dir,folder,'metadata.json'),
    settings = None,
    console_level = 'debug',
)

dims = {"origin":69,"destination":13,"time":1}

In [208]:
data_name_to_dims = {
    "alpha":(config['training']['N'],),
    "beta":(config['training']['N'],),
    "sigma":(config['training']['N'],),
    "sign":(config['training']['N'],),
    "log_destination_attraction":(config['training']['N'],dims['time'],dims['destination'],),
    "table":(config['training']['N'],dims['origin'],dims['destination'],),
}

In [236]:
load_h5 = False
experiment_id = 'SIM_MCMC_SweepedNoise_06_02_2023_16_54_39'

In [237]:
sweep_params = ['sigma','title']
sweep_collection = [['0.14142','_row_constrained'],['0.14142','_total_constrained'],['0.01414','_row_constrained'],['0.01414','_total_constrained']]
sweep_collection_experiment_ids = [
    'exp5_SIMLatentMCMC_HighNoise_row_margin_06_02_2023_16_54_39',
    'exp5_SIMLatentMCMC_HighNoise_grand_total_23_05_2023_11_07_44',
    'exp5_SIMLatentMCMC_LowNoise_row_margin_27_01_2023_18_46_59',
    'exp5_SIMLatentMCMC_LowNoise_grand_total_18_05_2023_11_09_58'
]
for i,sweep_values in enumerate(sweep_collection):
    sweep_id = []
    for sweep in zip(sweep_params,sweep_values):
        # Map sigma to noise regime
        if str(sweep[0]) == 'sigma':
            value = sigma_to_noise_regime(float(sweep[1]))
        # Else use passed sweep value
        else:
            value = sweep[1]
        # Add to key-value pair to unique sweep id
        sweep_id.append(f"{str(sweep[0])}_{stringify(value)}")
    sweep_id = sorted(sweep_id)

    new_out_directory = os.path.join(config.out_directory,dataset,'mcmc',experiment_id,'samples',*sweep_id)

    if load_h5:
        with h5.File(os.path.join(new_out_directory,'data.h5'),'r+') as h5data:
            h5data[experiment_id].attrs['sweep_params'] = sweep_params
            h5data[experiment_id].attrs['sweep_values'] = sweep_values
            print(h5data[experiment_id].attrs['sweep_params'])
            print(h5data[experiment_id].attrs['sweep_values'])
            for dset_name, dset in h5data[experiment_id].items():
                if dset[...].shape[0] == 1:
                    data_copy = deepcopy(dset[...]).squeeze(axis=0)
                else:
                    data_copy = deepcopy(dset[...])
                #.reshape([1]+list(dset.shape))
                del h5data[experiment_id][dset_name]
                h5data[experiment_id].create_dataset(
                    dset_name,
                    data_name_to_dims[dset_name],
                    chunks=True,
                    compression=3,
                )
                h5data[experiment_id][dset_name][...] = data_copy
                print(dset_name,h5data[experiment_id][dset_name].shape)

            h5data.close()
    else:
        samples_dir = os.path.join(config.out_directory,dataset,'mcmc',sweep_collection_experiment_ids[i],'samples')

        log_destination_attraction = read_npy(os.path.join(samples_dir,'log_destination_attraction_samples.npy'))
        log_destination_attraction = log_destination_attraction.reshape(data_name_to_dims['log_destination_attraction'])

        sign = read_npy(os.path.join(samples_dir,'sign_samples.npy'))
        sign = sign.reshape(data_name_to_dims['sign'])
        theta = read_npy(os.path.join(samples_dir,'theta_samples.npy'))


        h5file = h5.File(os.path.join(new_out_directory,"data.h5"), mode="w")

        h5group = h5file.create_group(experiment_id)
        # Store sweep configurations as attributes 
        h5group.attrs.create("sweep_params",sweep_params)
        h5group.attrs.create("sweep_values",sweep_values)

        log_destination_attractions_ds = h5group.create_dataset(
            "log_destination_attraction",
            data_name_to_dims["log_destination_attraction"],
            chunks=True,
            compression=3,
        )
        log_destination_attractions_ds.attrs["dim_names"] = XARRAY_SCHEMA['log_destination_attraction']['coords']
        log_destination_attractions_ds.attrs["coords_mode__time"] = "start_and_step"
        log_destination_attractions_ds.attrs["coords__time"] = [1,1]
        log_destination_attractions_ds[...] = log_destination_attraction.reshape(list(log_destination_attraction.shape))
        print('log_destination_attraction',log_destination_attractions_ds[...].shape)

        signs_ds = h5group.create_dataset(
            "sign",
            data_name_to_dims["sign"],
            chunks=True,
            compression=3,
        )
        signs_ds.attrs["dim_names"] = XARRAY_SCHEMA['sign']['coords']
        signs_ds.attrs["coords_mode__time"] = "start_and_step"
        signs_ds.attrs["coords__time"] = [1,1]
        signs_ds[...] = sign.reshape(list(sign.shape))
        print('sign',signs_ds[...].shape)

        thetas = []
        for i,p_name in enumerate(config['inputs']['to_learn']):
            dset = h5group.create_dataset(
                p_name, 
                data_name_to_dims[p_name],
                chunks=True, 
                compression=3
            )
            dset.attrs['dim_names'] = []
            dset.attrs['coords_mode__time'] = 'start_and_step'
            dset.attrs['coords__time'] = [1,1]

            dset[...] = theta[:,i].reshape(list(theta[:,i].shape))
            print(p_name,dset[...].shape)

        h5file.close()

log_destination_attraction (100000, 1, 13)
sign (100000,)
alpha (100000,)
beta (100000,)
log_destination_attraction (100000, 1, 13)
sign (100000,)
alpha (100000,)
beta (100000,)
log_destination_attraction (100000, 1, 13)
sign (100000,)
alpha (100000,)
beta (100000,)
log_destination_attraction (100000, 1, 13)
sign (100000,)
alpha (100000,)
beta (100000,)
