# Second Notebook: standar_limit_QR.ipynb
* Calculates $\tau_{min}$ and $\tau_{max}$
* Uses the new algorithms in the utility.py
* Training on **January** to **August** data
* Requires:
    * `optimized_residual_train`
    * `optimized_safe_margin`
* Generates:
    * `optimized_standard_limit`

In [8]:
%matplotlib inline

In [9]:
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
%autoreload 2

In [11]:
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import datetime
import random
import importlib
import os
import json
import time
import importlib
import sys

import numpy as np
import pickle5 as pickle
import geopandas as gpd
import matplotlib.pyplot as plt
import osmnx as ox
import networkx as nx
import matplotlib.dates as md

from pprint import pprint
from copy import deepcopy
from scipy.stats import hmean
from matplotlib.lines import Line2D
from tqdm.notebook import tqdm

random.seed()

In [12]:
sys.path.append("..")
from src.common_functions import *

### Parameters
* Couldn't think of a quick solution to the cluster list since i separated notebooks. just put the length here first

In [31]:
start_time = '06:00'
end_time   = '20:55'
training_months = (0, 8) # January to August
cross_validation_months = (9, 10) # September and October
testing_months = (11, 12) # November and December'
months = {'january': 1, 'february': 2, 'march': 3, 'april': 4, 'may': 5,
          'june': 6, 'july': 7, 'august': 8, 'september': 9, 'october': 10,
          'november': 11, 'december': 12}

In [22]:
clustering_version = '0027'
cluster_list = [1] * 1

In [23]:
# Confirm directories are in place
if not os.path.exists(os.path.join(os.getcwd(), '../data')):
    raise OSError("Must first download data, see README.md")
data_dir = os.path.join(os.getcwd(), '../data')

if not os.path.exists(os.path.join(data_dir, 'generated_clusters')):
    os.mkdir(os.path.join(data_dir, 'generated_clusters'))
cluster_dir = os.path.join(data_dir, 'generated_clusters')

if not os.path.exists(os.path.join(cluster_dir, f'{clustering_version}_incident_ratios/cleaned')):
    os.mkdir(os.path.join(cluster_dir, f'{clustering_version}_incident_ratios/cleaned'))
cleaned_dir = os.path.join(cluster_dir, f'{clustering_version}_incident_ratios/cleaned')

if not os.path.exists(os.path.join(cluster_dir, f'{clustering_version}_incident_ratios/incidents')):
    os.mkdir(os.path.join(cluster_dir, f'{clustering_version}_incident_ratios/incidents'))
incidents_dir = os.path.join(cluster_dir, f'{clustering_version}_incident_ratios/incidents')

if not os.path.exists(os.path.join(cluster_dir, f'{clustering_version}_incident_ratios/incidents_GT')):
    os.mkdir(os.path.join(cluster_dir, f'{clustering_version}_incident_ratios/incidents_GT'))
incidents_GT_dir = os.path.join(cluster_dir, f'{clustering_version}_incident_ratios/incidents_GT')

if not os.path.exists(os.path.join(data_dir, f'{clustering_version}_results')):
    os.mkdir(os.path.join(data_dir, f'{clustering_version}_results'))
results = os.path.join(data_dir, f'{clustering_version}_results')

# Loading cluster list and regenerating filename

In [24]:
new_filename = f"{clustering_version}_{len(cluster_list)}C_{datetime.datetime.now().strftime('%m-%d-%Y')}"
new_filename

'0027_1C_07-09-2021'

In [25]:
# Load all clusters

fp = os.path.join(cluster_dir, f'{clustering_version}_clusters.pkl')
with open(fp, 'rb') as handle:
    clusters = pickle.load(handle)

In [27]:
fp = os.path.join(results, f'used_clusters_list_{new_filename}.pkl')
with open(fp, 'rb') as handle:
    cluster_list = pickle.load(handle)
cluster_list

[1524373007]

In [29]:
fp_residual = os.path.join(results, f'optimized_residual_train_{new_filename}.pkl')
with open(fp_residual, 'rb') as handle:
    residual = pickle.load(handle)

fp_safe_margin = os.path.join(results, f'optimized_safe_margin_{new_filename}.pkl')
with open(fp_safe_margin, 'rb') as handle:
    safe_margin = pickle.load(handle)

residual_filtered = {}
for key in residual.keys():
    if(key in cluster_list):
        residual_filtered[key] = residual[key]

safe_margin_filtered = {}
for key in safe_margin.keys():
    if(key in cluster_list):
        safe_margin_filtered[key] = safe_margin[key]

In [30]:
df = pd.DataFrame.from_dict(residual_filtered, orient="index").stack().to_frame()
df = pd.DataFrame(df[0].values.tolist(), index=df.index)
indices = df.index.tolist()
sf_keys = df.columns.tolist()

standard_limit = []

pbar = tqdm(total=(len(indices) * len(sf_keys)))
for index in indices:
    for sf_key in sf_keys:
        _df = pd.DataFrame.from_dict(df.loc[index][sf_key].items())
        _df = _df.rename(columns={0:'time', 1: 'nabla'})
        _df.set_index('time', inplace=True)
        T_max = calculate_tmax(_df['nabla'])
        T_min = calculate_tmin(_df['nabla'])
        temp = {'cluster_id':index[0],
                'ka ppa':index[1],
                'SF':sf_key,
                'tau_max':T_max, 'tau_min':T_min}
        standard_limit.append(temp)
        pbar.update(1)
pbar.close()

# Saving and backing up
fp = os.path.join(results, f'optimized_standard_limit_{new_filename}.pkl')
with open(fp, 'wb') as handle:
    pickle.dump(standard_limit, handle)
    print(f'Saved optimized_standard_limit_{new_filename}.pkl')

  0%|          | 0/32 [00:00<?, ?it/s]

Saved optimized_standard_limit_0027_1C_07-09-2021.pkl
