In [1]:
import os, tqdm
import numpy as np
from cartoframes.viz import *
import pandas as pd
import geopandas as gpd

Note: NumExpr detected 64 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
NumExpr defaulting to 8 threads.


In [2]:
from sklearn.metrics import r2_score
import numpy as np

# metric
def metric(pred, label):
    assert label.shape == pred.shape
    
    with np.errstate(divide = 'ignore', invalid = 'ignore'):
        mask = np.not_equal(label, 0)
        mask = mask.astype(np.float32)
        mask /= np.mean(mask)
        male = np.abs(np.subtract(np.log(pred), np.log(label))).astype(np.float32)
        male = np.nan_to_num(male * mask)
        male = np.mean(male)
        mae = np.abs(np.subtract(pred, label)).astype(np.float32)
        rmse = np.square(mae)
        mape = np.divide(mae, label)
        mae = np.nan_to_num(mae * mask)
        mae = np.mean(mae)
        rmse = np.nan_to_num(rmse * mask)
        rmse = np.sqrt(np.mean(rmse))
        mape = np.nan_to_num(mape * mask)
        mape = np.median(mape) # np.mean(mape) -- author leverages median
    return male, rmse, mape



In [3]:
streetmap = {
    'style': 'mapbox://styles/mapbox/streets-v9',
    'token': 'pk.eyJ1IjoiaHNtNjkxMSIsImEiOiJjazl0and6aDUwOWF2M2RvemdrYjllczV3In0.qGmaAF6v-1LAF9C-dnMLBg'
}
mybasemap = {
    #'style': 'mapbox://styles/mapbox/streets-v9',
    'style': 'mapbox://styles/mapbox/satellite-v9',
    'token': 'pk.eyJ1IjoiaHNtNjkxMSIsImEiOiJjazl0and6aDUwOWF2M2RvemdrYjllczV3In0.qGmaAF6v-1LAF9C-dnMLBg'
}

In [5]:
for dname in reversed(['fc', 'kc', 'poa', 'sp']):
    print(dname)
    data = np.load(f'../datasets/{dname}/data.npz')

sp
poa
kc
fc


In [10]:
data['idx_geo'].shape

(83136, 60)

(66510, 60)

array([[57434, 19383, 39653, ..., 61490, 53853, 52480],
       [31853, 56368, 51453, ..., 57008, 41237,    18],
       [59053, 57082, 62046, ..., 56824, 65719, 58205],
       ...,
       [39601,  7456,  8826, ...,  6926, 46585, 18400],
       [16702,    43,  6792, ..., 35812, 64632, 37672],
       [22894,  5087, 63998, ..., 42854, 53499,  8761]])

In [8]:
list(data)

['dist_eucli',
 'dist_geo',
 'idx_eucli',
 'idx_geo',
 'X_test',
 'X_train',
 'y_test',
 'y_train']

In [7]:
data['X_train']

array([[ 3.80115231e+01, -8.45332863e+01,  2.50600000e-01, ...,
         1.06500000e+03,  1.00000000e+00,  0.00000000e+00],
       [ 3.79727518e+01, -8.45222551e+01,  8.91000000e-02, ...,
         1.07000000e+03,  2.00000000e+00,  0.00000000e+00],
       [ 3.80568142e+01, -8.44710521e+01,  2.58300000e-01, ...,
         1.16500000e+03,  1.00000000e+00,  0.00000000e+00],
       ...,
       [ 3.79766370e+01, -8.44942216e+01,  1.68600000e-01, ...,
         9.36000000e+02,  1.00000000e+00,  1.00000000e+00],
       [ 3.79949380e+01, -8.44570052e+01,  1.49200000e-01, ...,
         1.08000000e+03,  2.00000000e+00,  0.00000000e+00],
       [ 3.80873618e+01, -8.45466751e+01,  1.36200000e-01, ...,
         1.23200000e+03,  2.00000000e+00,  0.00000000e+00]])