In [None]:
from google.colab import drive
drive.mount('/content/gdrive')
%cd /content/gdrive/My\ Drive/masters

In [None]:
# %ls ./data/processed
# # T12H-X850M-Y880M_2013-01-01_2015-01-01/
# # T1H-X1700M-Y1760M_2013-01-01_2015-01-01/
# # T1H-X850M-Y880M_2013-01-01_2015-01-01/
# # T24H-X255M-Y220M_2013-01-01_2015-01-01/
# # T24H-X425M-Y440M_2013-01-01_2015-01-01/
# # T24H-X850M-Y880M_2013-01-01_2015-01-01/
# # T24H-X85M-Y110M_2013-01-01_2015-01-01/
# # T3H-X850M-Y880M_2013-01-01_2015-01-01/
# # T6H-X850M-Y880M_2013-01-01_2015-01-01/

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline
import os
import logging as log
from time import strftime
from copy import deepcopy
from torch import nn, optim
import torch.nn.functional as F
from utils.data_processing import *
from logger.logger import setup_logging
from utils.configs import BaseConf
from utils.metrics import best_threshold
from utils.utils import write_json, Timer
from dataloaders.grid_loader import GridDataLoaders
from datasets.grid_dataset import GridDataGroup
from utils.metrics import PRCurvePlotter, ROCCurvePlotter, LossPlotter, best_threshold, get_y_pred, \
                                get_y_pred_by_thresholds, best_thresholds
from sklearn.metrics import accuracy_score, average_precision_score, roc_auc_score
from models.model_result import ModelResult, ModelMetrics
from trainers.generic_trainer import train_model
from utils.plots import im
from utils.utils import pshape, get_data_sub_paths
from models.model_result import save_metrics, compare_models,get_metrics_table, get_models_metrics, get_models_results
from models.st_resnet_models import STResNet, STResNetExtra
from models.st_resnet_models import train_epoch_for_st_res_net, train_epoch_for_st_res_net_extra
from models.st_resnet_models import evaluate_st_res_net, evaluate_st_res_net_extra

In [None]:
data_sub_paths = get_data_sub_paths()
data_sub_paths

## Load data

In [None]:
USE_EXTRA = False # todo move extra model to own notebook

start_date = "2013-01-01"
end_date = "2015-01-01" 

data_dim_str = 'T24H-X255M-Y220M' # "T24H-X850M-Y880M" # "T1H-X1700M-Y1760M" #
conf = BaseConf()

if USE_EXTRA:
    conf.model_name = "ST-RESNET-Extra"  # needs to be created
else:     
    conf.model_name = "ST-RESNET"  # needs to be created

conf.data_path = f"./data/processed/{data_dim_str}_{start_date}_{end_date}/"

if not os.path.exists(conf.data_path):
    raise Exception(f"Directory ({conf.data_path}) needs to exist.")

conf.model_path =  f"{conf.data_path}models/{conf.model_name}/"
os.makedirs(conf.data_path, exist_ok=True)
os.makedirs(conf.model_path, exist_ok=True)

# logging config is set globally thus we only need to call this in this file
# imported function logs will follow the configuration
setup_logging(save_dir=conf.model_path, log_config='./logger/standard_logger_config.json', default_level=log.INFO)
log.info("=====================================BEGIN=====================================")

info = deepcopy(conf.__dict__)
info["start_time"] = strftime("%Y-%m-%dT%H:%M:%S")

# DATA LOADER SETUP
np.random.seed(conf.seed)
use_cuda = torch.cuda.is_available()
if use_cuda:
    torch.cuda.manual_seed( conf.seed)
else:
    torch.manual_seed(conf.seed)

device = torch.device("cuda:0" if use_cuda else "cpu")
log.info(f"Device: {device}")
info["device"] = device.type
conf.device = device

In [None]:
conf.batch_size = 256

# CRIME DATA
data_group = GridDataGroup(data_path=conf.data_path,
                           conf=conf)

loaders = GridDataLoaders(data_group=data_group,
                          conf=conf)

## Model setup

In [None]:
# SET THE HYPER PARAMETERS
conf.dropout = 0#0.2
conf.weight_decay = 1e-8
conf.lr = 1e-3
conf.checkpoint =  "best" # "latest" # 
conf.resume = False
conf.freqstr = data_group.t_range.freqstr  
conf.early_stopping = False
conf.max_epochs = 20

# SET LOSS FUNCTION
# size averaged - so more epochs or larger lr for smaller batches
loss_function = nn.MSELoss()  

# SETUP MODEL
dataset = loaders.train_loader.dataset
_, h_size, w_size = dataset.crimes.shape
_, n_ext_features = dataset.time_vectors.shape


# todo setup - hyper-optimiser
conf.n_layers = 5  # number of res-unit layers
conf.n_channels = 3 # inner channel size of the res-units 

if USE_EXTRA:
    train_epoch_fn = train_epoch_for_st_res_net_extra
    evaluate_fn = evaluate_st_res_net_extra
    model = STResNetExtra(n_layers=conf.n_layers,
                          n_channels=conf.n_channels,
                          y_size=h_size,
                          x_size=w_size,

                          lc=dataset.n_steps_c,
                          lp=dataset.n_steps_p,
                          lq=dataset.n_steps_q,

                          n_ext_features=n_ext_features,
                          n_demog_features=37,
                          n_demog_channels=10,
                          n_demog_layers=3,

                          n_gsv_features=512,
                          n_gsv_channels=10,
                          n_gsv_layers=3)
else:
    train_epoch_fn = train_epoch_for_st_res_net
    evaluate_fn = evaluate_st_res_net
    model = STResNet(n_layers=conf.n_layers,
                     n_channels=conf.n_channels,
                     y_size=h_size,
                     x_size=w_size,

                     lc=dataset.n_steps_c,
                     lp=dataset.n_steps_p,
                     lq=dataset.n_steps_q,

                     n_ext_features=n_ext_features)    
    
# todo implement model_arch for res-net
model.to(conf.device)

# SETUP OPTIMISER
parameters = model.parameters()
# todo (optional): setup model parameters dynamically 

optimiser = optim.Adam(params=parameters, lr=conf.lr, weight_decay=conf.weight_decay)

if conf.resume:
    try:
        # resume from previous check point or resume from best validaton score checkpoint
        # load model state
        model_state_dict = torch.load(f"{conf.model_path}model_{conf.checkpoint}.pth",
                                      map_location=conf.device.type)
        model.load_state_dict(model_state_dict)
        
        # load optimiser state
        optimiser_state_dict = torch.load(f"{conf.model_path}optimiser_{conf.checkpoint}.pth",
                                          map_location=conf.device.type)
        optimiser.load_state_dict(optimiser_state_dict) 

        # new optimiser hyper-parameters
        optimiser.param_groups[0]['lr'] = conf.lr
        optimiser.param_groups[0]['weight_decay'] = conf.weight_decay

    except Exception as e:
        log.error(f"Nothing to resume from, training from scratch \n\t-> {e}")


## Training loop

In [None]:
log.info(f"lr: \t\t{optimiser.param_groups[0]['lr']}")
log.info(f"weight_decay: \t{optimiser.param_groups[0]['weight_decay']}")
trn_epoch_losses, val_epoch_losses, stopped_early = train_model(model=model,
                                           optimiser=optimiser,
                                           loaders=loaders,
                                           train_epoch_fn=train_epoch_fn,
                                           loss_fn=loss_function,
                                           conf=conf)    

print(f"stopped_early: {stopped_early}") # use the current epoch instead
# if stopped_early -> continue with best_model - new hyper-parameters -> no n 

In [None]:
plt.figure(figsize=(15,5))
for x in [trn_epoch_losses, val_epoch_losses]:
    plt.plot(x[-10:],marker='|',alpha=.5)
plt.show()

## Evaluation

In [None]:
# Load latest or best validation model
# conf.checkpoint = "latest"
conf.checkpoint = "best"

log.info(f"Loading model from checkpoint ({conf.checkpoint}) for evaluation")

# resume from previous check point or resume from best validaton score checkpoint
# load model state
log.info(f"loading model from {conf.model_path}")
model_state_dict = torch.load(f"{conf.model_path}model_{conf.checkpoint}.pth",
                                map_location=conf.device.type)
model.load_state_dict(model_state_dict)

In [None]:
trn_y_counts, trn_y_true, trn_probas_pred, trn_t_range = evaluate_fn(model=model,
                                                                     batch_loader=loaders.train_loader,
                                                                     conf=conf) 
thresh = best_threshold(trn_y_true, trn_probas_pred) 

tst_y_counts, tst_y_true, tst_probas_pred, tst_t_range = evaluate_fn(model=model,
                                                                     batch_loader=loaders.test_loader,
                                                                     conf=conf)


tst_y_true = np.expand_dims(tst_y_true, axis=1)
tst_probas_pred = np.expand_dims(tst_probas_pred, axis=1) 

tst_y_true = data_group.shaper.squeeze(tst_y_true)
tst_probas_pred = data_group.shaper.squeeze(tst_probas_pred)


tst_y_pred = get_y_pred(thresh, tst_probas_pred)
save_metrics(y_true=tst_y_true,
             y_pred=tst_y_pred,
             probas_pred=tst_probas_pred,
             t_range=tst_t_range,
             shaper=data_group.shaper,                
             conf=conf)
compare_models(data_path=conf.data_path)

In [None]:
models_results = get_models_results(data_path=conf.data_path)

In [None]:
m = models_results[0]

def norm(x):
#     return x
    x = x - np.min(x)
    x = x / np.max(x)
    return x

i = 0
lim = 100
for m in models_results:
    
    plt.figure(figsize=(15,5))
    y_true = m.y_true[:lim,0,i]#.sum(1)
    plt.plot(norm(y_true),label='true')

    x0 = m.probas_pred[:lim,0,i]#.sum(1)
    plt.plot(norm(x0),label=m.model_name)

    plt.grid()
    plt.legend()
    plt.show()


In [None]:
m = models_results[1]
m.y_true.shape

In [None]:
targets = [m.shaper for m in models_results]
for t in targets:
    print(t.l)

----------------------------------------------------------------------------------------------------

## All in one

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import os
import logging as log
from time import strftime
from copy import deepcopy
from torch import nn, optim
import torch.nn.functional as F
from utils.data_processing import *
from logger.logger import setup_logging
from utils.configs import BaseConf
from utils.utils import write_json, Timer
from dataloaders.grid_loader import GridDataLoaders
from datasets.grid_dataset import GridDataGroup
from utils.metrics import PRCurvePlotter, ROCCurvePlotter, LossPlotter, best_threshold, get_y_pred
from sklearn.metrics import accuracy_score, average_precision_score, roc_auc_score
from models.model_result import ModelResult, ModelMetrics
from trainers.generic_trainer import train_model
from utils.plots import im
from utils.utils import pshape, get_data_sub_paths
from models.model_result import save_metrics, compare_models, get_models_metrics, get_models_results
from models.st_resnet_models import STResNet, STResNetExtra
from models.st_resnet_models import train_epoch_for_st_res_net, train_epoch_for_st_res_net_extra
from models.st_resnet_models import evaluate_st_res_net, evaluate_st_res_net_extra

In [None]:
data_sub_paths = get_data_sub_paths()
data_sub_paths

In [None]:
data_sub_paths = ['T24H-X850M-Y880M_2013-01-01_2015-01-01']

In [None]:
for data_sub_path in data_sub_paths:
    log.info(f"\n========================= {data_sub_path} =========================\n")    
    for USE_EXTRA in [False, True]:
        conf = BaseConf()

        if USE_EXTRA:
            conf.model_name = "ST-RESNET-Extra"  # needs to be created
        else:     
            conf.model_name = "ST-RESNET"  # needs to be created

        conf.data_path = f"./data/processed/{data_dim_str}_{start_date}_{end_date}/"

        if not os.path.exists(conf.data_path):
            raise Exception(f"Directory ({conf.data_path}) needs to exist.")

        conf.model_path =  f"{conf.data_path}models/{conf.model_name}/"
        os.makedirs(conf.data_path, exist_ok=True)
        os.makedirs(conf.model_path, exist_ok=True)

        # logging config is set globally thus we only need to call this in this file
        # imported function logs will follow the configuration
        setup_logging(save_dir=conf.model_path, log_config='./logger/standard_logger_config.json', default_level=log.INFO)
        log.info("=====================================BEGIN=====================================")

        info = deepcopy(conf.__dict__)
        info["start_time"] = strftime("%Y-%m-%dT%H:%M:%S")

        # DATA LOADER SETUP
        np.random.seed(conf.seed)
        use_cuda = torch.cuda.is_available()
        if use_cuda:
            torch.cuda.manual_seed( conf.seed)
        else:
            torch.manual_seed(conf.seed)

        device = torch.device("cuda:0" if use_cuda else "cpu")
        log.info(f"Device: {device}")
        info["device"] = device.type
        conf.device = device


        # SET THE HYPER PARAMETERS
        conf.dropout = 0#0.2
        conf.weight_decay = 0# 1e-8
        conf.checkpoint = "best" # ["best"|"latest"]
        conf.lr = 1e-3
        conf.batch_size = 256

        # CRIME DATA
        data_group = GridDataGroup(data_path=conf.data_path,
                                   conf=conf)

        loaders = GridDataLoaders(data_group=data_group,
                                  conf=conf)

        conf.checkpoint =  "best" # "latest" # 
        conf.resume = False
        conf.freqstr = data_group.t_range.freqstr  
        conf.early_stopping = False
        conf.max_epochs = 60

        # SET LOSS FUNCTION
        # size averaged - so more epochs or larger lr for smaller batches
        loss_function = nn.MSELoss()  

        # SETUP MODEL
        dataset = loaders.train_loader.dataset
        _, h_size, w_size = dataset.crimes.shape
        _, n_ext_features = dataset.time_vectors.shape


        # todo setup - hyper-optimiser
        conf.n_layers = 3  # number of res-unit layers
        conf.n_channels = 3 # inner channel size of the res-units 

        if USE_EXTRA:
            train_epoch_fn = train_epoch_for_st_res_net_extra
            evaluate_fn = evaluate_st_res_net_extra
            model = STResNetExtra(n_layers=conf.n_layers,
                                  n_channels=conf.n_channels,
                                  y_size=h_size,
                                  x_size=w_size,

                                  lc=dataset.n_steps_c,
                                  lp=dataset.n_steps_p,
                                  lq=dataset.n_steps_q,

                                  n_ext_features=n_ext_features,
                                  n_demog_features=37,
                                  n_demog_channels=10,
                                  n_demog_layers=3,

                                  n_gsv_features=512,
                                  n_gsv_channels=10,
                                  n_gsv_layers=3)
        else:
            train_epoch_fn = train_epoch_for_st_res_net
            evaluate_fn = evaluate_st_res_net
            model = STResNet(n_layers=conf.n_layers,
                             n_channels=conf.n_channels,
                             y_size=h_size,
                             x_size=w_size,

                             lc=dataset.n_steps_c,
                             lp=dataset.n_steps_p,
                             lq=dataset.n_steps_q,

                             n_ext_features=n_ext_features)    

        # todo implement model_arch for res-net
        model.to(conf.device)

        # SETUP OPTIMISER
        parameters = model.parameters()
        # todo (optional): setup model parameters dynamically 

        optimiser = optim.Adam(params=parameters, lr=conf.lr, weight_decay=conf.weight_decay)

        if conf.resume:
            try:
                # resume from previous check point or resume from best validaton score checkpoint
                # load model state
                model_state_dict = torch.load(f"{conf.model_path}model_{conf.checkpoint}.pth",
                                              map_location=conf.device.type)
                model.load_state_dict(model_state_dict)

                # load optimiser state
                optimiser_state_dict = torch.load(f"{conf.model_path}optimiser_{conf.checkpoint}.pth",
                                                  map_location=conf.device.type)
                optimiser.load_state_dict(optimiser_state_dict) 

                # new optimiser hyper-parameters
                optimiser.param_groups[0]['lr'] = conf.lr
                optimiser.param_groups[0]['weight_decay'] = conf.weight_decay

            except Exception as e:
                log.error(f"Nothing to resume from, training from scratch \n\t-> {e}")

        log.info(f"lr: \t\t{optimiser.param_groups[0]['lr']}")
        log.info(f"weight_decay: \t{optimiser.param_groups[0]['weight_decay']}")
        trn_epoch_losses, val_epoch_losses, stopped_early = train_model(model=model,
                                                   optimiser=optimiser,
                                                   loaders=loaders,
                                                   train_epoch_fn=train_epoch_fn,
                                                   loss_fn=loss_function,
                                                   conf=conf)    

        print(f"stopped_early: {stopped_early}") # use the current epoch instead
        # if stopped_early -> continue with best_model - new hyper-parameters -> no n 

        # Load latest or best validation model
        # conf.checkpoint = "latest"
        conf.checkpoint = "best"

        log.info(f"Loading model from checkpoint ({conf.checkpoint}) for evaluation")

        # resume from previous check point or resume from best validaton score checkpoint
        # load model state
        model_state_dict = torch.load(f"{conf.model_path}model_{conf.checkpoint}.pth",
                                        map_location=conf.device.type)
        model.load_state_dict(model_state_dict)

        trn_y_counts, trn_y_true, trn_probas_pred, trn_t_range = evaluate_fn(model=model,
                                                                                   batch_loader=loaders.train_loader,
                                                                                   conf=conf) 
        thresh = best_threshold(trn_y_true, trn_probas_pred) 

        tst_y_counts, tst_y_true, tst_probas_pred, tst_t_range = evaluate_fn(model=model,
                                                                   batch_loader=loaders.test_loader,
                                                                   conf=conf)


        tst_y_true = np.expand_dims(tst_y_true, axis=1)
        tst_probas_pred = np.expand_dims(tst_probas_pred, axis=1) 

        tst_y_true = data_group.shaper.squeeze(tst_y_true)
        tst_probas_pred = data_group.shaper.squeeze(tst_probas_pred)


        tst_y_pred = get_y_pred(thresh, tst_probas_pred)
        save_metrics(y_true=tst_y_true,
                     y_pred=tst_y_pred,
                     probas_pred=tst_probas_pred,
                     t_range=tst_t_range,
                     shaper=data_group.shaper,                
                     conf=conf)
        compare_models(data_path=conf.data_path)

In [None]:
for data_sub_path in get_data_sub_paths():
    log.info(f"{data_sub_path}\n")
    data_path = f"./data/processed/{data_sub_path}/"

    models_metrics = get_models_metrics(data_path)
    metrics_table = get_metrics_table(models_metrics)
    print(metrics_table)
    print("\n===================================================================================================\n")