In [1]:
import math
import sys,os
import numpy as np
import pandas as pd

from mgwr.gwr import GWR,GWRResults
from mgwr.sel_bw import Sel_BW

In [2]:
zillow = pd.read_csv("Zillow-test-dataset/zillow_5k.csv",sep=',')
zillow.head()

Unnamed: 0,utmX,utmY,value,nbaths,nbeds,area,age
0,379979.957285,3752562.0,87.397,2.0,3.0,1311.0,61.0
1,411210.623578,3762339.0,121.211,2.0,4.0,1200.0,62.0
2,400287.486356,3743925.0,76.145,1.0,2.0,724.0,70.0
3,391029.680899,3776406.0,136.32,2.0,3.0,1890.0,54.0
4,371824.11116,3750099.0,100.499,4.0,5.0,2166.0,71.0


In [3]:
#Converting things into matrices
y = zillow.value.values.reshape(-1,1)
X = zillow.iloc[:,3:].values
k = zillow.shape[1]
u = zillow.utmX
v = zillow.utmY
n = zillow.shape[0]
coords = np.array(list(zip(u,v)))

### `mgwr`

#### Adaptive Bisquare Kernel

In [6]:
%%time
#Bandwidth searching
opt_bw_adap = Sel_BW(coords,y,X).search(verbose=True)
print("Optimal Bandwidth:",opt_bw_adap)

#Fitting the model with optimal bandwidth
pysal_result_adap=GWR(coords,y,X,opt_bw_adap).fit()

Bandwidth:  1941.0 , score:  62998.99
Bandwidth:  3109.0 , score:  63155.34
Bandwidth:  1218.0 , score:  62834.64
Bandwidth:  772.0 , score:  62623.87
Bandwidth:  496.0 , score:  62394.91
Bandwidth:  326.0 , score:  62176.77
Bandwidth:  220.0 , score:  62010.24
Bandwidth:  155.0 , score:  61900.35
Bandwidth:  115.0 , score:  61877.92
Bandwidth:  90.0 , score:  61928.28
Bandwidth:  130.0 , score:  61872.72
Bandwidth:  140.0 , score:  61880.08
Bandwidth:  125.0 , score:  61868.41
Bandwidth:  121.0 , score:  61867.18
Bandwidth:  119.0 , score:  61869.76
Bandwidth:  123.0 , score:  61868.12
Optimal Bandwidth: 121.0
CPU times: user 3min 28s, sys: 20.9 s, total: 3min 49s
Wall time: 34.3 s


In [7]:
%%time
#Bandwidth searching
opt_bw_fixed = Sel_BW(coords,y,X,fixed=True,kernel="gaussian").search(verbose=True)
print("Optimal Bandwidth:",opt_bw_fixed)

#Fitting the model with optimal bandwidth
pysal_result_fixed=GWR(coords,y,X,opt_bw_fixed,fixed=True,kernel="gaussian").fit()

Bandwidth:  85088.9 , score:  63401.59
Bandwidth:  137674.41 , score:  63412.23
Bandwidth:  52587.49 , score:  63376.05
Bandwidth:  32501.41 , score:  63319.11
Bandwidth:  20086.84 , score:  63189.10
Bandwidth:  12414.56 , score:  62823.09
Bandwidth:  7672.57 , score:  62284.31
Bandwidth:  4741.99 , score:  61925.83
Bandwidth:  2930.69 , score:  61529.61
Bandwidth:  1811.3 , score:  61093.24
Bandwidth:  1119.44 , score:  61264.26
Bandwidth:  2238.85 , score:  61276.52
Bandwidth:  1547.02 , score:  61014.93
Bandwidth:  1383.71 , score:  61028.14
Bandwidth:  1647.97 , score:  61035.32
Bandwidth:  1484.65 , score:  61012.03
Bandwidth:  1446.09 , score:  61014.87
Bandwidth:  1508.47 , score:  61012.11
Bandwidth:  1469.91 , score:  61012.67
Bandwidth:  1493.74 , score:  61011.91
Bandwidth:  1499.37 , score:  61011.93
Bandwidth:  1490.27 , score:  61011.93
Bandwidth:  1495.89 , score:  61011.91
Bandwidth:  1497.22 , score:  61011.91
Bandwidth:  1495.07 , score:  61011.90
Bandwidth:  1494.56 

### FastGWR

#### Adaptive Kernel

In [8]:
#Running FastGWR 1 Processor with auto bandwidth search
!fastgwr run -np 1 -data Zillow-test-dataset/zillow_5k.csv -out results_adap.csv -adaptive -constant

------------------------------------------------------------
Starting FastGWR with 1 Processors
Spatial Kernel: Adaptive Bisquare
Data Input Path: Zillow-test-dataset/zillow_5k.csv
Output Result Path: results_adap.csv
Constant: True
Optimal Bandwidth Searching...
Range: 50 5000
BW, AICc 1941.0 62998.99119669126
BW, AICc 3109.0 63155.33894463485
BW, AICc 1218.0 62834.63981029019
BW, AICc 772.0 62623.87488968963
BW, AICc 496.0 62394.91059361154
BW, AICc 326.0 62176.772952292216
BW, AICc 220.0 62010.24083279318
BW, AICc 155.0 61900.34904388845
BW, AICc 115.0 61877.92073505618
BW, AICc 90.0 61928.281301868126
BW, AICc 130.0 61872.71759124702
BW, AICc 140.0 61880.08193188456
BW, AICc 125.0 61868.40676858912
BW, AICc 121.0 61867.18296256345
BW, AICc 119.0 61869.759931302826
BW, AICc 123.0 61868.11546142209
Fitting GWR Using Bandwidth: 121.0
Diagnostic Information:
AICc: 61867.18296256346
ENP: 528.2622212380004
R2: 0.7741417970962883
Total Time Elapsed: 25.72 seconds
-------------------------

In [9]:
#Running FastGWR 4 Processor
!fastgwr run -np 4 -data Zillow-test-dataset/zillow_5k.csv -out results_adap.csv -adaptive -constant

------------------------------------------------------------
Starting FastGWR with 4 Processors
Spatial Kernel: Adaptive Bisquare
Data Input Path: Zillow-test-dataset/zillow_5k.csv
Output Result Path: results_adap.csv
Constant: True
Optimal Bandwidth Searching...
Range: 50 5000
BW, AICc 1941.0 62998.99119669126
BW, AICc 3109.0 63155.33894463484
BW, AICc 1218.0 62834.63981029017
BW, AICc 772.0 62623.874889689614
BW, AICc 496.0 62394.91059361154
BW, AICc 326.0 62176.772952292216
BW, AICc 220.0 62010.240832793184
BW, AICc 155.0 61900.34904388847
BW, AICc 115.0 61877.920735056185
BW, AICc 90.0 61928.281301868104
BW, AICc 130.0 61872.717591247
BW, AICc 140.0 61880.08193188456
BW, AICc 125.0 61868.40676858913
BW, AICc 121.0 61867.18296256346
BW, AICc 119.0 61869.75993130282
BW, AICc 123.0 61868.11546142208
Fitting GWR Using Bandwidth: 121.0
Diagnostic Information:
AICc: 61867.18296256346
ENP: 528.2622212380004
R2: 0.7741417970962883
Total Time Elapsed: 12.28 seconds
-------------------------

#### Fixed Gaussian kernel

In [10]:
#Running FastGWR 4 Processor
!fastgwr run -np 4 -data Zillow-test-dataset/zillow_5k.csv -out results_fixed.csv -fixed -constant

------------------------------------------------------------
Starting FastGWR with 4 Processors
Spatial Kernel: Fixed Gaussian
Data Input Path: Zillow-test-dataset/zillow_5k.csv
Output Result Path: results_fixed.csv
Constant: True
Optimal Bandwidth Searching...
Range: 0.0 222763.307902534
BW, AICc 85088.90071953091 63401.587701945275
BW, AICc 137674.4071830031 63412.23473987967
BW, AICc 52587.49331169169 63376.049939236196
BW, AICc 32501.40740783922 63319.11111531032
BW, AICc 20086.844820266873 63189.09679320165
BW, AICc 12414.562587572345 62823.090184067856
BW, AICc 7672.572115997337 62284.30873222015
BW, AICc 4741.990471575008 61925.82638349446
BW, AICc 2930.692371147503 61529.608314945304
BW, AICc 1811.2981004275057 61093.23641471484
BW, AICc 1119.4365650072116 61264.25581814983
BW, AICc 2238.846990876096 61276.52126845023
BW, AICc 1547.0177653763492 61014.93422823765
BW, AICc 1383.7069156917014 61028.14490111669
BW, AICc 1647.9710955939706 61035.3151728618
BW, AICc 1484.64790448897

### Results validation

In [11]:
print("Adaptive kernel: pysal AICc - ", pysal_result_adap.aicc)
print("Adaptive kernel: pysal R2 - ",pysal_result_adap.R2)

print("Fixed kernel: pysal AICc - ", pysal_result_fixed.aicc)
print("Fixed kernel: pysal R2 - ",pysal_result_fixed.R2)

Adaptive kernel: pysal AICc -  61867.18296256348
Adaptive kernel: pysal R2 -  0.7741417970962875
Fixed kernel: pysal AICc -  61011.90437602184
Fixed kernel: pysal R2 -  0.8443763943519637


In [12]:
fastGWR_result_fixed = pd.read_csv("results_fixed.csv")
fastGWR_result_adap = pd.read_csv("results_adap.csv")
fastGWR_result_adap.head()

Unnamed: 0,index,residual,influ,b_intercept,b_nbaths,b_nbeds,b_area,b_age,se_intercept,se_nbaths,se_nbeds,se_area,se_age
0,0.0,17.952168,0.044251,-77.8472,-7.207957,11.062545,0.090687,0.157855,111.05345,35.571519,25.893463,0.06061,1.221988
1,1.0,87.133387,0.067529,91.20069,3.714871,-31.688964,0.153827,-1.974029,100.210214,33.220679,21.463895,0.031428,1.175032
2,2.0,37.481355,0.069983,65.292923,7.131999,6.72188,0.06556,-1.352434,127.311788,29.505288,22.689325,0.042178,1.544978
3,3.0,-79.10811,0.051625,89.411082,-8.403962,-57.824964,0.191006,-0.827811,66.517513,27.883677,18.393406,0.035424,0.56427
4,4.0,-188.088409,0.195136,62.813506,70.535078,-0.582226,0.081513,-3.239616,92.111046,21.715976,18.5916,0.029862,0.996363


In [13]:
#Validate model residual
print(np.allclose(fastGWR_result_fixed.residual,pysal_result_fixed.resid_response.reshape(-1)))
print(np.allclose(fastGWR_result_adap.residual,pysal_result_adap.resid_response.reshape(-1)))

True
True


In [14]:
#Validate parameter estimates
print(np.allclose(np.array(fastGWR_result_fixed.iloc[:,3:8]),pysal_result_fixed.params))
print(np.allclose(np.array(fastGWR_result_adap.iloc[:,3:8]),pysal_result_adap.params))

True
True


In [15]:
#Validate parameter estimates standard errors
print(np.allclose(np.array(fastGWR_result_adap.iloc[:,8:13]),pysal_result_adap.bse))
print(np.allclose(np.array(fastGWR_result_fixed.iloc[:,8:13]),pysal_result_fixed.bse))

True
True
