In [1]:
import h5py as h5
import numpy as np
import pandas as pd
from tqdm import tqdm
from copy import deepcopy


from multiresticodm.config import Config
from multiresticodm.inputs import Inputs
from multiresticodm.outputs import Outputs
from multiresticodm.utils.misc_utils import *
from multiresticodm.utils.math_utils import *
from multiresticodm.contingency_table import instantiate_ct
from multiresticodm.utils.probability_utils import *

In [2]:
%matplotlib inline

# AUTO RELOAD EXTERNAL MODULES
%load_ext autoreload
%autoreload 2

In [3]:
# Get important paths
experiment_dir = '../data/outputs/cambridge_work_commuter_lsoas_to_msoas/exp1/JointTableSIM_MCMC_SweepedNoise_16_05_2023_20_09_04/'
config_path = os.path.join(experiment_dir,'config.json')

In [120]:
# Output processing settings
settings = {
    "logging_mode": "INFO",
    "coordinate_slice": [],
        # "da.loss_name.isin([str(['dest_attraction_ts_likelihood_loss']),str(['dest_attraction_ts_likelihood_loss', 'table_likelihood_loss'])])",
    "metadata_keys":[],
    "burnin_thinning_trimming": [{'iter': {"burnin":10000, "thinning":100, "trimming":1000}}],
    "n_workers": 1,
    "filename_ending":"test",
    "sample":["table","intensity"],
    "force_reload":True
}

In [121]:
outputs = Outputs(
    config = config_path,
    settings = settings,
    base_dir = experiment_dir,
    inputs = None,
    slice = True
)
# Silence outputs
outputs.logger.setLevels(console_level='EMPTY')
# Collect outputs from folder
outputs.load()

11:57.066 config INFO ----------------------------------------------------------------------------------
11:57.078 config INFO /home/iz230/MultiResTICODM/data/outputs/
11:57.090 config INFO Parameter space size: 
 --- sigma: ['sigma', 'to_learn', 'covariance'] (2)
 --- title: ['title', 'axes', 'cells', 'name', 'proposal'] (6)
11:57.102 config INFO Total = 12.
11:57.115 config INFO ----------------------------------------------------------------------------------
11:57.127 outputs INFO //////////////////////////////////////////////////////////////////////////////////
11:57.139 outputs INFO Slicing coordinates:
11:57.151 outputs INFO iter: burnin = 10000, thinning = 100, trimming = 1000
11:57.163 outputs INFO //////////////////////////////////////////////////////////////////////////////////
                                                                                                                     

In [122]:
data = []
for i in tqdm(
    range(len(outputs.data)),
    leave=True,
    disable=True,
    desc='Computing validation metrics'
):
    print(f"{i+1}/{len(outputs.data)}") 
    sweep_outputs = outputs.get(i)
    sweep_outputs.inputs = Inputs(
        config = sweep_outputs.config,
        synthetic_data = False,
        logger = outputs.logger
    )

    mean_intensity = sweep_outputs.compute_statistic(
        data = sweep_outputs.get_sample('intensity'),
        sample_name = 'intensity',
        statistic = 'signedmean',
        dim = ['id']
    )
    intensity_srmse = srmse(
        prediction = mean_intensity,
        ground_truth = outputs.get(0).get_sample('ground_truth_table').astype('float32')
    )
    # Create a data row
    datum = dict(zip(
        outputs.config.sweep_param_names,
        mean_intensity['sweep'].values[0]
    ))
    print('sweep',{k:v for k,v in datum.items() if k not in ['covariance','to_learn']})
    datum['intensity_srmse'] = intensity_srmse.values[0]
    try:
        mean_table = sweep_outputs.compute_statistic(
            data = sweep_outputs.get_sample('table'),
            sample_name = 'table',
            statistic = 'mean',
            dim = ['id']
        )
        table_srmse = srmse(
            prediction = mean_table,
            ground_truth = outputs.get(0).get_sample('ground_truth_table').astype('float32')
        )
        datum['table_srmse'] = table_srmse.values[0]

        sweep_outputs.inputs.cast_from_xarray()
        ct = instantiate_ct(
            config = sweep_outputs.config,
            **sweep_outputs.inputs.data_vars(),
            level = 'EMPTY'
        )
        samples = sweep_outputs.get_sample('table')
        # print('axes constraints',ct.constraints['constrained_axes'])
        # print('cell constraints',len(ct.constraints['cells']))
        # tables_admissible = all([ct.table_admissible(torch.tensor(tab.values.squeeze())) for _,tab in samples.groupby('id')])
        # print('Tables admissible',tables_admissible)
        # if not tables_admissible:
        #     print('Tables margins admissible',any([ct.table_margins_admissible(torch.tensor(tab.values.squeeze())) for _,tab in samples.groupby('id')]))
        #     print('Tables cells admissible',all([ct.table_cells_admissible(torch.tensor(tab.values.squeeze())) for _,tab in samples.groupby('id')]))
    except:
        pass
    data.append(datum)
    print()

1/12
sweep {'sigma': 0.014139999635517597, 'title': '_unconstrained', 'axes': '[]', 'cells': 'None', 'name': 'TotallyConstrained'}

2/12
sweep {'sigma': 0.014139999635517597, 'title': '_total_constrained', 'axes': '[[0, 1]]', 'cells': 'None', 'name': 'TotallyConstrained'}

3/12
sweep {'sigma': 0.014139999635517597, 'title': '_row_constrained', 'axes': '[[1]]', 'cells': 'None', 'name': 'ProductionConstrained'}

4/12
sweep {'sigma': 0.014139999635517597, 'title': '_doubly_constrained', 'axes': '[[0], [1]]', 'cells': 'None', 'name': 'TotallyConstrained'}

5/12
sweep {'sigma': 0.014139999635517597, 'title': '_doubly_10%_cell_constrained', 'axes': '[[0], [1]]', 'cells': 'constraints/cell_constraints_permuted_size_90_cell_percentage_10_constrained_axes_0_1_seed_1234.txt', 'name': 'TotallyConstrained'}

6/12
sweep {'sigma': 0.014139999635517597, 'title': '_doubly_20%_cell_constrained', 'axes': '[[0], [1]]', 'cells': 'constraints/cell_constraints_permuted_size_179_cell_percentage_20_constraine

In [123]:
df = pd.DataFrame.from_records(data)
df.drop(columns=['covariance','to_learn','axes','cells'],inplace=True)

In [125]:
df

Unnamed: 0,sigma,title,name,intensity_srmse,table_srmse
0,0.01414,_unconstrained,TotallyConstrained,0.737087,0.960439
1,0.01414,_total_constrained,TotallyConstrained,0.734624,0.734291
2,0.01414,_row_constrained,ProductionConstrained,0.692166,0.692153
3,0.01414,_doubly_constrained,TotallyConstrained,0.70632,0.586045
4,0.01414,_doubly_10%_cell_constrained,TotallyConstrained,0.713032,0.546017
5,0.01414,_doubly_20%_cell_constrained,TotallyConstrained,0.71321,0.505732
6,0.14142,_unconstrained,TotallyConstrained,0.703698,0.947807
7,0.14142,_total_constrained,TotallyConstrained,0.700943,0.699528
8,0.14142,_row_constrained,ProductionConstrained,0.616282,0.614946
9,0.14142,_doubly_constrained,TotallyConstrained,1.122763,0.589195


In [49]:
root_path = '/home/iz230/MultiResTICODM/data/outputs/cambridge_work_commuter_lsoas_to_msoas/exp1/JointTableSIM_MCMC_SweepedNoise_16_05_2023_20_09_04/samples/'
relative_path = os.path.relpath(root_path,os.getcwd())

In [118]:
# Output processing settings
settings = {
    "logging_mode": "INFO",
    "coordinate_slice": [],
        # "da.loss_name.isin([str(['dest_attraction_ts_likelihood_loss']),str(['dest_attraction_ts_likelihood_loss', 'table_likelihood_loss'])])",
    "metadata_keys":[],
    "burnin_thinning_trimming": [{'iter': {"burnin":10000, "thinning":100, "trimming":1000}}],
    "n_workers": 1,
    "filename_ending":"test",
    "sample":["table","intensity"],
    "force_reload":True
}

In [119]:
current_sweep_outputs = Outputs(
    config = '../data/outputs/cambridge_work_commuter_lsoas_to_msoas/exp1/JointTableSIM_MCMC_SweepedNoise_16_05_2023_20_09_04/samples/sigma_high/title__doubly_10%_cell_constrained/',
    settings = settings,
    inputs = None,
    slice = True
)
current_sweep_outputs.load()

11:38.865 outputs INFO //////////////////////////////////////////////////////////////////////////////////
11:38.877 outputs INFO Slicing coordinates:
11:38.889 outputs INFO iter: burnin = 10000, thinning = 100, trimming = 1000
11:38.902 outputs INFO //////////////////////////////////////////////////////////////////////////////////


In [98]:
tables_new = current_sweep_outputs.data.table[0].values.squeeze()