In [1]:
import os
import logging as log
from time import strftime
from copy import deepcopy
from torch import nn, optim
import torch.nn.functional as F
from utils.data_processing import *
from logger.logger import setup_logging
from utils.configs import BaseConf
from utils.utils import write_json, Timer
from models.st_resnet_models import STResNet, STResNetExtra
from dataloaders.grid_loader import GridDataLoaders
from datasets.grid_dataset import GridDataGroup
from utils.metrics import PRCurvePlotter, ROCCurvePlotter, LossPlotter
from sklearn.metrics import accuracy_score, average_precision_score, roc_auc_score
from models.model_result import ModelResult, ModelMetrics
from utils.mock_data import mock_fnn_data_classification
from utils.plots import im
from utils.utils import pshape

In [2]:
start_date = "2013-01-01"
end_date = "2015-01-01" 

data_dim_str = "T1H-X1700M-Y1760M"  # needs to exist
model_name = "ST-RESNET"  # needs to be created
data_path = f"./data/processed/{data_dim_str}_{start_date}_{end_date}/"

if not os.path.exists(data_path):
    raise Exception(f"Directory ({data_path}) needs to exist.")

model_path = data_path + f"models/{model_name}/"
os.makedirs(data_path, exist_ok=True)
os.makedirs(model_path, exist_ok=True)

# logging config is set globally thus we only need to call this in this file
# imported function logs will follow the configuration
setup_logging(save_dir=model_path, log_config='./logger/standard_logger_config.json', default_level=log.INFO)
log.info("=====================================BEGIN=====================================")

timer = Timer()
# manually set the config
# manually set the config
conf_dict = {
    "seed": 3,
    "use_cuda": False,
    
    "use_crime_types": False,
    
    # data group/data set related
    "val_ratio": 0.1,  # ratio of the total dataset
    "tst_ratio": 0.2,# ratio of the total dataset
    "seq_len": 1,
    "flatten_grid": True,  # if the shaper should be used to squeeze the data
    
    # shaper related 
    "shaper_top_k": -1,  # if less then 0, top_k will not be applied
    "shaper_threshold": 0,

    
    # data loader related
    "sub_sample_train_set": True,
    "sub_sample_validation_set": True,
    "sub_sample_test_set": False,
    
    # training parameters
    "resume": False,
    "early_stopping": False,
    "tolerance": 1e-8,
    "lr": 1e-3,
    "weight_decay": 1e-8,
    "max_epochs": 1,
    "batch_size": 64,
    "dropout": 0.2,
    "shuffle": False,
    "num_workers": 6,
    
    # attached global variables - bad practice -find alternative
    "device": None,  # pytorch device object [CPU|GPU]
    "timer": Timer(),
    "model_name": model_name,
    "model_path": model_path,
    "checkpoint": "best",
    
    "use_seq_loss": True,
}
conf = BaseConf(conf_dict=conf_dict)

info = deepcopy(conf.__dict__)
info["start_time"] = strftime("%Y-%m-%dT%H:%M:%S")

# DATA LOADER SETUP
np.random.seed(conf.seed)
use_cuda = torch.cuda.is_available()
if use_cuda:
    torch.cuda.manual_seed( conf.seed)
else:
    torch.manual_seed(conf.seed)

device = torch.device("cuda:0" if use_cuda else "cpu")
log.info(f"Device: {device}")
info["device"] = device.type
conf.device = device

2019-10-28T02:59:03 | root | INFO | Device: cpu


In [3]:
conf.batch_size = 3
data_group = GridDataGroup(data_path=data_path, conf=conf)

2019-10-28T02:59:03 | root | INFO | Data shapes of files in generated_data.npz
2019-10-28T02:59:03 | root | INFO | 	crime_feature_indices shape (10,)
2019-10-28T02:59:04 | root | INFO | 	crime_types_grids shape (17520, 10, 24, 16)
2019-10-28T02:59:04 | root | INFO | 	crime_grids shape (17520, 1, 24, 16)
2019-10-28T02:59:05 | root | INFO | 	tract_count_grids shape (17520, 1, 24, 16)
2019-10-28T02:59:05 | root | INFO | 	demog_grid shape (1, 37, 24, 16)
2019-10-28T02:59:05 | root | INFO | 	street_grid shape (1, 512, 24, 16)
2019-10-28T02:59:05 | root | INFO | 	time_vectors shape (17521, 66)
2019-10-28T02:59:05 | root | INFO | 	weather_vectors shape (8760, 11)
2019-10-28T02:59:05 | root | INFO | 	x_range shape (16,)
2019-10-28T02:59:05 | root | INFO | 	y_range shape (24,)
2019-10-28T02:59:05 | root | INFO | 	t_range shape (17521,)


In [4]:
loaders = GridDataLoaders(data_group=data_group,conf=conf)

### Evaluation Code

In [5]:
batch_loader = loaders.test_loader


probas_pred = np.zeros(batch_loader.dataset.target_shape,dtype=np.float)
# new_targets = np.ones_like(batch_loader.dataset.targets,dtype=np.float)
# new_targets = new_targets * np.arange(len(new_targets)).reshape(-1,1,1)
# batch_loader.dataset.targets = new_targets

y_true = batch_loader.dataset.crimes[-len(probas_pred):]
pshape(y_true,probas_pred)
for batch_indices, batch_seq_c, batch_seq_p, batch_seq_q, batch_seq_e, batch_seq_t in batch_loader:
#     pshape(batch_indices, batch_seq_c, batch_seq_p, batch_seq_q, batch_seq_e, batch_seq_t)
#     break
    
    for i,t in zip(batch_indices, batch_seq_c):
        probas_pred[i] = t[-1]
        

        
np.equal(probas_pred, y_true).all()

(3403, 24, 16)
(3403, 24, 16)


True

In [9]:
batch_loader = loaders.test_loader  # currently looking at the test data

dataset = batch_loader.dataset

_, h_size, w_size = dataset.crimes.shape
_, n_ext_features = dataset.time_vectors.shape

# model = STResNet(n_layers=3,
#                  n_channels=1,
#                  y_size=h_size,
#                  x_size=w_size,
    
#                  lc=dataset.n_steps_c,
#                  lp=dataset.n_steps_p,
#                  lq=dataset.n_steps_q,

#                  n_ext_features=n_ext_features)


model = STResNetExtra(n_layers=3,
                      n_channels=1, y_size=h_size,
                      x_size=w_size,

                      lc=dataset.n_steps_c,
                      lp=dataset.n_steps_p,
                      lq=dataset.n_steps_q,

                      n_ext_features=n_ext_features,
                      n_demog_features=37,
                      n_demog_channels=10,
                      n_demog_layers=3,

                      n_gsv_features=512,
                      n_gsv_channels=10,
                      n_gsv_layers=3)

demog_grid = torch.Tensor(dataset.demog_grid).to(conf.device)
street_grid = torch.Tensor(dataset.street_grid).to(conf.device)

for batch_indices, batch_seq_c, batch_seq_p, batch_seq_q, batch_seq_e, batch_seq_t in batch_loader:
    pshape(batch_indices, batch_seq_c, batch_seq_p, batch_seq_q, batch_seq_e, batch_seq_t)
    
    batch_seq_c = torch.Tensor(batch_seq_c).to(conf.device)
    batch_seq_p = torch.Tensor(batch_seq_p).to(conf.device)
    batch_seq_q = torch.Tensor(batch_seq_q).to(conf.device)
    batch_seq_e = torch.Tensor(batch_seq_e).to(conf.device)
    batch_seq_t = torch.Tensor(batch_seq_t).to(conf.device)
    
    probas_pred = model(seq_c=batch_seq_c,
                        seq_p=batch_seq_p,
                        seq_q=batch_seq_q,
                        seq_e=batch_seq_e,
                        seq_demog=demog_grid,  # TODO MOVE TO LOADER AND MAKE BATCHES OF THE SAME THING
                        seq_gsv=street_grid,
                       )
    
    print(probas_pred.size())
    
    break

(3,)
(3, 3, 24, 16)
(3, 3, 24, 16)
(3, 3, 24, 16)
(3, 1, 66)
(3, 1, 24, 16)
torch.Size([3, 1, 24, 16])


In [7]:
demog_grid = torch.Tensor(dataset.demog_grid).to(conf.device)
street_grid = torch.Tensor(dataset.street_grid).to(conf.device)

In [8]:
dataset.demog_grid.shape

(1, 37, 24, 16)