In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import os
import logging as log
from time import strftime
from copy import deepcopy
from torch import nn, optim
import torch.nn.functional as F
from utils.data_processing import *
from logger.logger import setup_logging
from utils.configs import BaseConf
from utils.metrics import best_threshold
from utils.utils import write_json, Timer
from dataloaders.grid_loader import GridDataLoaders
from datasets.grid_dataset import GridDataGroup
from utils.metrics import PRCurvePlotter, ROCCurvePlotter, LossPlotter, best_threshold, get_y_pred, \
                                get_y_pred_by_thresholds, best_thresholds
from sklearn.metrics import accuracy_score, average_precision_score, roc_auc_score
from models.model_result import ModelResult, ModelMetrics
from trainers.generic_trainer import train_model
from utils.plots import im
from utils.utils import pshape, get_data_sub_paths, by_ref
from models.model_result import save_metrics,save_results, compare_models, compare_all_models, get_metrics_table, \
                            get_models_metrics, get_models_results
from models.st_resnet_models import STResNet, STResNetExtra
from models.st_resnet_models import train_epoch_for_st_res_net, train_epoch_for_st_res_net_extra
from models.st_resnet_models import evaluate_st_res_net, evaluate_st_res_net_extra
import pandas as pd
from pprint import pprint
from torch.optim import lr_scheduler
pd.set_option('display.max_columns', None)

from sparse_discrete_table import construct_temporal_information
from utils.interactive import new_interactive_heatmap

In [2]:
import os

os.environ['NUMEXPR_MAX_THREADS']  = str(os.cpu_count())

In [3]:
from sparse_discrete_table import SparseDiscreteTable, build_discrete_table
from ipywidgets import widgets
import pandas as pd
import numpy as np

import plotly.graph_objs as go
import plotly.express as px
from utils.data_processing import time_series_to_time_index
from pprint import pprint
from sparse_discrete_table import quick_mutual_info, quick_cond_mutual_info
from sparse_discrete_table import mutual_info_over_time, conditional_mutual_info_over_time
from utils.interactive import filter_frame, get_total_counts, new_bins, new_int_bins
from utils.interactive import get_mean_map, bin_data_frame, State

pd.set_option('mode.chained_assignment', None)

In [4]:
data_sub_paths = get_data_sub_paths()
pprint(sorted(data_sub_paths))

['T1H-X3400M-Y3520M_2014-01-01_2016-01-01_#7cd',
 'T24H-X1700M-Y1760M_2014-01-01_2019-01-01_#828',
 'T24H-X255M-Y220M_2012-01-01_2019-01-01_#c97',
 'T24H-X425M-Y440M_2012-01-01_2019-01-01_#827',
 'T24H-X850M-Y880M_2012-01-01_2019-01-01_#826']


In [5]:
data_sub_path = by_ref("7cd")[0]
print(f"using: {data_sub_path}")

using: T1H-X3400M-Y3520M_2014-01-01_2016-01-01_#7cd


In [6]:
conf = BaseConf()
conf.model_name = "test"  # needs to be created

conf.data_path = f"./data/processed/{data_sub_path}/"

if not os.path.exists(conf.data_path):
    raise Exception(f"Directory ({conf.data_path}) needs to exist.")

conf.model_path =  f"{conf.data_path}models/{conf.model_name}/"
os.makedirs(conf.data_path, exist_ok=True)
os.makedirs(conf.model_path, exist_ok=True)

# logging config is set globally thus we only need to call this in this file
# imported function logs will follow the configuration
setup_logging(save_dir=conf.model_path, log_config='./logger/standard_logger_config.json', default_level=log.INFO)
log.info("=====================================BEGIN=====================================")

info = deepcopy(conf.__dict__)
info["start_time"] = strftime("%Y-%m-%dT%H:%M:%S")

# DATA LOADER SETUP
np.random.seed(conf.seed)
use_cuda = False # torch.cuda.is_available()
torch.manual_seed(conf.seed)
if use_cuda:
    torch.cuda.manual_seed(conf.seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    

device = torch.device("cuda:0" if use_cuda else "cpu")
log.info(f"Device: {device}")
info["device"] = device.type
conf.device = device

2020-10-30T09:44:12 | root | INFO | Device: cpu


In [7]:
conf.batch_size = 128

conf.cap_crime_percentile = 0 # 99.95

# CRIME DATA
data_group = GridDataGroup(data_path=conf.data_path,
                           conf=conf)

loaders = GridDataLoaders(data_group=data_group,
                          conf=conf)

2020-10-30T09:44:13 | root | INFO | Initialising Grid Data Group
2020-10-30T09:44:13 | root | INFO | 	t_range: (17521,) 2014-01-01 00:00:00 -> 2016-01-01 00:00:00
2020-10-30T09:44:13 | root | INFO | 	target_len:	17016	(100.000%)
2020-10-30T09:44:13 | root | INFO | 	trn_val_size:	8376	(49.224%)
2020-10-30T09:44:13 | root | INFO | 	trn_size:	6282	(36.918%)
2020-10-30T09:44:13 | root | INFO | 	val_size:	2094	(12.306%)
2020-10-30T09:44:13 | root | INFO | 	tst_size:	8640 	(50.776%)


In [8]:
grids = data_group.to_counts(sparse_data=data_group.crimes)

In [9]:
from utils.interactive import new_interactive_heatmap, State, InteractiveHeatmapsWithLines
from ipywidgets import Layout, widgets

In [10]:
# from utils.interactive import InteractiveHeatmaps

InteractiveHeatmapsWithLines(
    date_range=data_group.t_range, 
    col_wrap=1,
    Counts=grids[:,0], #data_group.crimes[:,0],
).app

VBox(children=(Label(value='Date: Wed Jan  1 01:00:00 2014'), HBox(children=(Play(value=0, description='Press …

In [11]:
if data_group.t_range.freqstr == 'H':
    temporal_variables=["Hour","Day of Week"]# , "Time of Month", "Time of Year"],
else:
    temporal_variables=["Day of Week", "Time of Month", "Time of Year"]

In [13]:
from utils.interactive import interactive_grid_visualiser

interactive_grid_visualiser(
    grids=grids[:,0],
    t_range=data_group.t_range, 
    mutual_info=True,
    max_offset=365,
    temporal_variables=temporal_variables,
    bins=0,
)

Box(children=(FigureWidget({
    'data': [{'type': 'heatmap',
              'uid': 'fdd48014-f160-489b-b3b7-af…

In [14]:
from sparse_discrete_table import conditional_mutual_information_over_grid, mutual_information_over_grid

In [15]:
from utils.plots import plot

In [16]:
grids = data_group.to_counts(sparse_data=data_group.crimes)

squezed_grids = data_group.shaper.squeeze(grids)

log.info(f"temporal_variables: {temporal_variables}")

cmi_grid = conditional_mutual_information_over_grid(
    dense_grid=squezed_grids[:, 0],
    t_range=data_group.t_range,
    max_offset=366,
    temporal_variables=temporal_variables
)

2020-10-30T09:48:00 | root | INFO | temporal_variables: ['Hour', 'Day of Week']
2020-10-30T09:48:00 | root | INFO | => 0000/0050 => 0.000
2020-10-30T09:49:03 | root | INFO | => 0010/0050 => 20.000
2020-10-30T09:50:10 | root | INFO | => 0020/0050 => 40.000
2020-10-30T09:51:15 | root | INFO | => 0030/0050 => 60.000
2020-10-30T09:52:23 | root | INFO | => 0040/0050 => 80.000
2020-10-30T09:53:24 | root | INFO | done


In [17]:
cmi_grid_ = np.expand_dims(np.swapaxes(cmi_grid, 0,1), 1)
cmi_grid_ = data_group.shaper.unsqueeze(cmi_grid_)

interactive_grid_visualiser(
    grids=cmi_grid_[:,0],
    t_range=np.arange(len(cmi_grid_[:,0])), 
)

Box(children=(FigureWidget({
    'data': [{'type': 'heatmap',
              'uid': 'bb5e37b8-81bc-4bf2-a4bf-ea…

In [41]:
mi_grid = mutual_information_over_grid(
    dense_grid=squezed_grids[:, 0],
    max_offset=366,
)

2020-10-30T10:55:06 | root | INFO | => 0000/0050 => 0.000
2020-10-30T10:55:33 | root | INFO | => 0010/0050 => 20.000
2020-10-30T10:56:01 | root | INFO | => 0020/0050 => 40.000
2020-10-30T10:56:28 | root | INFO | => 0030/0050 => 60.000
2020-10-30T10:56:55 | root | INFO | => 0040/0050 => 80.000
2020-10-30T10:57:21 | root | INFO | done


In [42]:
mi_grid_ = np.expand_dims(np.swapaxes(mi_grid, 0,1), 1)
mi_grid_ = data_group.shaper.unsqueeze(mi_grid_)

interactive_grid_visualiser(
    grids=mi_grid_[:,0],
    t_range=np.arange(len(mi_grid_[:,0])), 
)

Box(children=(FigureWidget({
    'data': [{'type': 'heatmap',
              'uid': '3a43c24e-a527-4f7d-bb0e-d6…

In [43]:
isinstance(data_group.t_range, pd.core.indexes.datetimes.DatetimeIndex)

True

In [44]:
kwargs = {
    "MI":mi_grid_[:,0], 
    "CMI":cmi_grid_[:,0],
}

InteractiveHeatmapsWithLines(
    date_range=np.arange(1,1+len(mi_grid_[:,0])), 
    col_wrap=2,
    **kwargs,
).app

VBox(children=(Label(value='Index: 0'), HBox(children=(Play(value=0, description='Press play', interval=1000, …

In [30]:
# (l, max_offset)
mu_grid = np.mean(cmi_grid, axis=1, keepdims=True)  
std_grid = np.std(cmi_grid, axis=1, keepdims=True)
dev_grid = np.abs(cmi_grid - mu_grid)/std_grid
# dev_grid = (cmi_grid - mu_grid)/std_grid

In [31]:
plot(a=dev_grid[0,:])

In [34]:
dev2weeks = dev_grid[:,:14].mean(1)
dev2weeks = np.reshape(dev2weeks,(1,1,-1))
dev2weeks_grid = data_group.shaper.unsqueeze(dev2weeks)[0,0]
new_interactive_heatmap(z=dev2weeks_grid)

FigureWidget({
    'data': [{'type': 'heatmap',
              'uid': '83de7bee-1bb1-4170-a834-3ad1a4a932b8',
 …

In [35]:
def gradient(data):
    x = np.arange(0,len(data))
    y=np.array(data)
    grad, bias = np.polyfit(x,y,deg=1)
    return grad

In [45]:
dev_sum = []
for dev in dev_grid:
#     dev_sum.append(dev[0])
#     dev_sum.append(dev[:10].sum())
    dev_sum.append(len(dev[dev > 3]))
#     dev_sum.append(gradient(dev[:10]))
dev_sum = np.array(dev_sum)
dev_sum = np.reshape(dev_sum, (1,1,-1))
dev_sum_grid = data_group.shaper.unsqueeze(dev_sum)[0,0]

new_interactive_heatmap(z=dev_sum_grid)

FigureWidget({
    'data': [{'type': 'heatmap',
              'uid': 'a2297eee-bfad-4723-9b7b-980c69691e95',
 …

In [46]:
from utils.interactive import interactive_grid_visualiser

interactive_grid_visualiser(
    grids=grids[:,0],
    t_range=data_group.t_range, 
    mutual_info=True,
    max_offset=365,
    temporal_variables=["Hour"],# "Day of Week", "Time of Month", "Time of Year"],
)

Box(children=(FigureWidget({
    'data': [{'type': 'heatmap',
              'uid': 'a4469671-1322-475f-8d35-46…