In [14]:
import math
import sys,os
import numpy as np
import pandas as pd

from mgwr.gwr import GWR,MGWR
from mgwr.sel_bw import Sel_BW

In [15]:
zillow = pd.read_csv("Zillow-test-dataset/zillow_1k.csv",sep=',')
zillow.head()

Unnamed: 0,utmX,utmY,value,nbaths,nbeds,area,age
0,374161.33383,3757568.0,56.976,1.0,2.0,954.0,71.0
1,412657.82291,3761734.0,157.982,2.0,4.0,1614.0,45.0
2,391600.47564,3782212.0,512.612,4.0,4.0,2962.0,54.0
3,375056.064681,3748579.0,111.437,2.0,3.0,1292.0,65.0
4,388693.693038,3779865.0,128.327,1.0,3.0,1169.0,78.0


In [16]:
#Converting things into matrices
y = zillow.value.values.reshape(-1,1)
X = zillow.iloc[:,3:].values
k = zillow.shape[1]
u = zillow.utmX
v = zillow.utmY
n = zillow.shape[0]
coords = np.array(list(zip(u,v)))

In [17]:
X = (X - np.mean(X,axis=0))/np.std(X, axis=0)
y = (y - np.mean(y,axis=0))/np.std(y, axis=0)

### `mgwr`

In [18]:
%%time
#Bandwidth searching
selector = Sel_BW(coords,y,X,multi=True)
bws = selector.search(verbose=True)
print("Optimal Bandwidths:",bws)

#Fitting the model with optimal bandwidth
pysal_result_mgwr=MGWR(coords,y,X,selector=selector).fit()

HBox(children=(FloatProgress(value=0.0, description='Backfitting', max=200.0, style=ProgressStyle(description_…

Current iteration: 1 ,SOC: 0.0054557
Bandwidths: 133.0, 52.0, 116.0, 82.0, 50.0
Current iteration: 2 ,SOC: 0.0044876
Bandwidths: 158.0, 105.0, 120.0, 55.0, 50.0
Current iteration: 3 ,SOC: 0.0026065
Bandwidths: 182.0, 127.0, 122.0, 55.0, 43.0
Current iteration: 4 ,SOC: 0.0022962
Bandwidths: 202.0, 195.0, 122.0, 55.0, 43.0
Current iteration: 5 ,SOC: 0.0016683
Bandwidths: 202.0, 285.0, 125.0, 55.0, 43.0
Current iteration: 6 ,SOC: 0.0010319
Bandwidths: 202.0, 365.0, 125.0, 55.0, 43.0
Current iteration: 7 ,SOC: 0.0013286
Bandwidths: 202.0, 999.0, 125.0, 55.0, 43.0
Current iteration: 8 ,SOC: 0.0004091
Bandwidths: 202.0, 999.0, 125.0, 55.0, 43.0
Current iteration: 9 ,SOC: 0.0001911
Bandwidths: 202.0, 999.0, 125.0, 55.0, 43.0
Current iteration: 10 ,SOC: 0.0001056
Bandwidths: 202.0, 999.0, 125.0, 55.0, 43.0
Current iteration: 11 ,SOC: 6.57e-05
Bandwidths: 202.0, 999.0, 125.0, 55.0, 43.0
Current iteration: 12 ,SOC: 4.4e-05
Bandwidths: 202.0, 999.0, 125.0, 55.0, 43.0
Current iteration: 13 ,SOC: 3

HBox(children=(FloatProgress(value=0.0, description='Inference', max=1.0, style=ProgressStyle(description_widt…


CPU times: user 6min 35s, sys: 4.31 s, total: 6min 39s
Wall time: 1min 40s


In [27]:
MGWR(coords,y,X,selector=selector).bw_init

110.0

### FastGWR

In [None]:
#Running FastGWR 4 Processor
!fastgwr run -np 4 -data Zillow-test-dataset/zillow_1k.csv -out results_mgwr.csv -adaptive -constant -mgwr


------------------------------------------------------------
Starting FastGWR with 4 Processors
Spatial Kernel: Adaptive Bisquare
Data Input Path: Zillow-test-dataset/zillow_1k.csv
Output Result Path: results_adap.csv
Constant: True
MGWR Backfitting...
Data are standardized
Initialization Done...
Iter: 1 SOC: 5.46e-03
bws: [133.0, 52.0, 116.0, 82.0, 50.0]
Iter: 2 SOC: 4.49e-03
bws: [158.0, 105.0, 120.0, 55.0, 50.0]
Iter: 3 SOC: 2.61e-03
bws: [182.0, 127.0, 122.0, 55.0, 43.0]


### Results validation

In [25]:
print("MGWR pysal AICc - ", pysal_result_mgwr.aicc)
print("MGWR pysal R2 - ",pysal_result_mgwr.R2)

MGWR pysal AICc -  1679.0150747382786
MGWR pysal R2 -  0.7660681409024748


In [None]:
fastGWR_result_mgwr = pd.read_csv("results_mgwr.csv")
fastGWR_result_mgwr.head()

In [None]:
#Validate model residual
print(np.allclose(fastGWR_result_mgwr.residual,
                  pysal_result_mgwr.resid_response.reshape(-1)))


In [None]:
#Validate parameter estimates
print(np.allclose(np.array(fastGWR_result_mgwr.iloc[:,3:8]),
                  pysal_result_mgwr.params))



In [None]:
#Validate parameter estimates standard errors
print(np.allclose(np.array(fastGWR_result_mgwr.iloc[:,8:13]),
                  pysal_result_mgwr.bse))
