## Model Validation

In [1]:
from pathlib import Path
import time
import pandas as pd
import geopandas as gpd
import numpy as np
import pickle
import networkx as nx
from stochopy.optimize import minimize
from tqdm import tqdm
import similaritymeasures
import random
import matplotlib.pyplot as plt
from shapely.ops import Point, MultiLineString, LineString
from importlib import reload
import datetime
from scipy.spatial.distance import directed_hausdorff
np.set_printoptions(suppress=True)

from bikewaysim.paths import config, stadia_toner, maptiler_streets
from bikewaysim.impedance_calibration import stochastic_optimization, speedfactor
from bikewaysim.network import modeling_turns
from bikewaysim.routing import rustworkx_routing_funcs
from bikewaysim.impedance_calibration import optimization_viz, utils


from step_1_calibration_experiments import all_calibrations, full_model

## Create K-Fold Groups

In [2]:
with (config['calibration_fp'] / 'subsets.pkl').open('rb') as fh:
    subsets = pickle.load(fh)
trips = [tripids for name, tripids in subsets if name == 'random'][0]

In [3]:
import numpy as np
from sklearn.model_selection import KFold

kf = KFold(n_splits=5,shuffle=True,random_state=420)
training_folds = []
testing_folds = []

for i, (train_index, test_index) in enumerate(kf.split(trips)):

    train = [trips[idx] for idx in train_index]
    test = [trips[idx] for idx in test_index]

    print(f"Fold {i}:")

    print(f"  Train: len={len(train_index)} index={train}")

    print(f"  Test:  len={len(test_index)} index={test}")

    training_folds.append((f'fold_{i}',train))
    testing_folds.append((f'fold_{i}',test))

    
# if (config['calibration_fp']/'validation').exists() == False:
#     (config['calibration_fp']/'validation').mkdir()

# with (config['calibration_fp']/'validation/training_folds.pkl').open('wb') as fh:
#     pickle.dump(training_folds,fh)
# with (config['calibration_fp']/'validation/testing_folds.pkl').open('wb') as fh:
#     pickle.dump(testing_folds,fh)

Fold 0:
  Train: len=531 index=[68, 761, 85, 6081, 115, 8202, 165, 103, 290, 7641, 562, 7365, 1152, 372, 2874, 143, 11527, 12783, 131, 136, 137, 5444, 605, 1103, 150, 153, 184, 1177, 337, 167, 1190, 8363, 197, 411, 194, 243, 26703, 9590, 211, 3751, 213, 313, 235, 341, 247, 251, 3995, 255, 4117, 957, 7936, 266, 5695, 284, 24460, 393, 293, 4416, 1106, 309, 580, 3961, 322, 2008, 1195, 349, 1945, 707, 358, 1450, 382, 1005, 6347, 837, 416, 2860, 463, 417, 450, 1538, 8974, 2502, 469, 2528, 489, 501, 3112, 555, 520, 677, 2538, 14265, 1705, 11130, 7599, 571, 570, 2937, 3104, 607, 610, 7419, 2026, 2173, 4692, 3159, 1635, 686, 15987, 2164, 6722, 743, 749, 861, 767, 14979, 4205, 1212, 6186, 1380, 817, 819, 1001, 1151, 835, 933, 881, 1026, 897, 939, 1940, 1008, 1010, 1041, 1086, 1087, 1270, 1127, 1148, 2895, 1191, 1230, 1229, 1287, 1926, 6755, 1254, 1664, 1585, 1428, 2110, 1951, 2591, 1685, 28900, 1803, 2751, 4894, 1944, 7499, 2347, 2163, 2235, 11207, 2447, 11671, 4030, 2533, 3054, 7339, 2586, 265

## Create bootstrap samples

In [None]:
import random

num_to_resample = 1000
bootstrap_samples = [(f'bootsample_{idx}',random.choices(trips,k=len(trips))) for idx in range(0,1000)]

In [None]:
# ~ 420 unique trips in the bootstrapped sample
# inspect how many duplicate trips are in the bootstrap
dups = [len(set(x)) for _, x in bootstrap_samples]
np.array(dups).mean()

In [None]:
# with (config['calibration_fp']/'validation/bootstrap_samples.pkl').open('wb') as fh:
#     pickle.dump(bootstrap_samples,fh)