In [1]:
import os, tqdm
import numpy as np
from cartoframes.viz import *
import pandas as pd
import geopandas as gpd

Note: NumExpr detected 64 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
NumExpr defaulting to 8 threads.


In [2]:
from sklearn.metrics import r2_score
import numpy as np

# metric
def metric(label, pred):
    assert label.shape == pred.shape
    
    with np.errstate(divide = 'ignore', invalid = 'ignore'):
        mask = (label == label) & (pred == pred)
        mask = mask.astype(np.float32)
        mask /= np.mean(mask)
        
        male = np.abs(np.subtract(np.log(pred), np.log(label))).astype(np.float32)
        mae = np.abs(np.subtract(pred, label)).astype(np.float32)
        
        male = np.nan_to_num(male * mask)
        male = np.mean(male)
        
        mae = np.nan_to_num(mae * mask)
        mae = np.mean(mae)
        
        rmse = np.square(mae)
        rmse = np.nan_to_num(rmse * mask)
        rmse = np.sqrt(np.mean(rmse))
        
        mape = np.divide(mae, label)
        mape = np.nan_to_num(mape * mask)
        mape = np.median(mape*mask)
        
        print('masked:', np.sum(mask == 0))
    return male, rmse, mape

In [3]:
streetmap = {
    'style': 'mapbox://styles/mapbox/streets-v9',
    'token': 'pk.eyJ1IjoiaHNtNjkxMSIsImEiOiJjazl0and6aDUwOWF2M2RvemdrYjllczV3In0.qGmaAF6v-1LAF9C-dnMLBg'
}
mybasemap = {
    #'style': 'mapbox://styles/mapbox/streets-v9',
    'style': 'mapbox://styles/mapbox/satellite-v9',
    'token': 'pk.eyJ1IjoiaHNtNjkxMSIsImEiOiJjazl0and6aDUwOWF2M2RvemdrYjllczV3In0.qGmaAF6v-1LAF9C-dnMLBg'
}

In [26]:
for dname in ['fc', 'kc', 'poa', 'sp']:

    print(dname)
    data = np.load(f'{dname}/data.npz')

    dict1 = {'lat':data['X_train'][:,0], 'lng': data['X_train'][:,1], 'price': data['y_train']}
    dict2 = {'lat':data['X_test'][:,0], 'lng': data['X_test'][:,1], 'price': data['y_test']}
    attr_names = []
    for a in range(2, data['X_train'].shape[1]):
        dict1.update({f'attr{a-2}': data['X_train'][:, a]})
        dict2.update({f'attr{a-2}': data['X_test'][:, a]})
        attr_names.append(f'attr{a-2}')
    df1 = pd.DataFrame(dict1)
    df2 = pd.DataFrame(dict2)
    df = pd.concat([df1, df2])

    train_gdf = gpd.GeoDataFrame(df1.copy(), geometry=gpd.points_from_xy(x=df1.lng, y=df1.lat))
    train_gdf.crs = 'EPSG:4326'
    test_gdf = gpd.GeoDataFrame(df2.copy(), geometry=gpd.points_from_xy(x=df2.lng, y=df2.lat))
    test_gdf.crs = 'EPSG:4326'
    house_gdf = gpd.GeoDataFrame(df.copy(), geometry=gpd.points_from_xy(x=df.lng, y=df.lat))
    house_gdf.crs = 'EPSG:4326'
    #print(np.exp(df['price'].values).mean())
    gdf = house_gdf
    for attr in attr_names:
        print(attr, gdf[attr].nunique())
        if gdf[attr].nunique() < 30:
            gdf[attr] = gdf[attr].astype(str)
    gdfcpy = gdf.copy()
    
    
    from sklearn.metrics import r2_score
    import numpy as np

    # metric
    def metric(pred, label):
        assert label.shape == pred.shape

        with np.errstate(divide = 'ignore', invalid = 'ignore'):
            mask = np.not_equal(label, 0)
            mask = mask.astype(np.float32)
            mask /= np.mean(mask)
            male = np.abs(np.subtract(np.log(pred), np.log(label))).astype(np.float32)
            male = np.nan_to_num(male * mask)
            male = np.mean(male)
            mae = np.abs(np.subtract(pred, label)).astype(np.float32)
            rmse = np.square(mae)
            mape = np.divide(mae, label)
            mae = np.nan_to_num(mae * mask)
            mae = np.mean(mae)
            rmse = np.nan_to_num(rmse * mask)
            rmse = np.sqrt(np.mean(rmse))
            mape = np.nan_to_num(mape * mask)
            mape = np.median(mape) # np.mean(mape) -- author leverages median
        return male, rmse, mape

    y_pred = data['y_train'][data['idx_eucli'].T].mean(0)[len(data['y_train']):]
    y_label = data['y_test']
    
    y_label = np.exp(y_label)
    y_pred = np.exp(y_pred)
    print(metric(y_pred, y_label)) # should be same value)

fc
attr0 6447
attr1 18
attr2 11
attr3 6447
attr4 16372
attr5 79
attr6 3230
attr7 8
attr8 6
(0.16065635, 36573.207, 0.10828707013341088)
kc
attr0 13
attr1 30
attr2 1038
attr3 9782
attr4 6
attr5 2
attr6 5
attr7 5
attr8 12
attr9 946
attr10 306
attr11 116
attr12 70
attr13 70
attr14 777
attr15 8689
(0.23552844, 273458.25, 0.17957226793015507)
poa
attr0 9
attr1 2644
attr2 10
attr3 2
attr4 2
(0.34244424, 191036.6, 0.28655311603460365)
sp
attr0 10
attr1 467
attr2 10
attr3 2
attr4 2
(0.32094547, 316261.47, 0.2725276342773437)
