In [1]:
# Sneak peek into some geometric feature engineering to add centroids of the parcels
import numpy as np
import geopandas as gpd

stp_sales_2015 = gpd.read_file('./data/stpete_sales_2015.shp')
stp_sales_2015["x"] = stp_sales_2015.centroid.map(lambda p: p.x)
stp_sales_2015["y"] = stp_sales_2015.centroid.map(lambda p: p.y)
stp_sales_2015.head()

Unnamed: 0,AVE_FAM_SZ,AVE_HH_SZ,CentroidX,CentroidY,HS_PER_AC,MEDFINCOME,MEDHHINC,MEDOOHVAL,MED_AGE,PCT_AMERI,...,taxable_va,tif_flag_t,tot_effect,tot_gross_,tot_living,total_livi,views,year_built,x,y
0,2.777506,2.19,-82.642882,27.81057,4.596851,68229,54321,125200,46,0.0,...,34513,,1164,1386,1048,1,,1960,-82.642882,27.81057
1,2.777506,2.19,-82.646179,27.811467,4.596851,68229,54321,125200,46,0.0,...,47710,,1672,2102,1486,1,,1986,-82.646179,27.811467
2,2.777506,2.19,-82.64493,27.812377,4.596851,68229,54321,125200,46,0.0,...,128088,,1626,1872,1468,1,,1979,-82.64493,27.812377
3,2.777506,2.19,-82.643034,27.813685,4.596851,68229,54321,125200,46,0.0,...,37048,,1855,2440,1680,1,,1967,-82.643034,27.813685
4,2.777506,2.19,-82.644526,27.813679,4.596851,68229,54321,125200,46,0.0,...,138960,,1524,1841,1453,1,,1942,-82.644526,27.813679


In [2]:
# Open the St. Pete Census ACS in PySAL
import pysal
stp_parcels = pysal.open('./data/stpete_cenacs_2014.shp')

In [3]:
# Create a queen contiguity 1st order weights matrix and show the histogram
w = pysal.queen_from_shapefile("./data/stpete_cenacs_2014.shp")
w.histogram

[(1, 1),
 (2, 2),
 (3, 10),
 (4, 20),
 (5, 48),
 (6, 57),
 (7, 58),
 (8, 21),
 (9, 8),
 (10, 2),
 (11, 2),
 (12, 0),
 (13, 0),
 (14, 1),
 (15, 0),
 (16, 0),
 (17, 1)]

In [4]:
# Get the minimum nearest neighbor distance for the census block groups
thresh = pysal.min_threshold_dist_from_shapefile("./data/stpete_sales_2015.shp")
thresh

0.005025135774761472

In [5]:
# Create distance based W matrix based on threshold value
wt = pysal.threshold_binaryW_from_shapefile("./data/stpete_sales_2015.shp", thresh)
wt.histogram

[(1, 1),
 (2, 2),
 (3, 2),
 (4, 1),
 (5, 9),
 (6, 11),
 (7, 15),
 (8, 14),
 (9, 28),
 (10, 21),
 (11, 52),
 (12, 56),
 (13, 57),
 (14, 75),
 (15, 81),
 (16, 71),
 (17, 78),
 (18, 85),
 (19, 90),
 (20, 104),
 (21, 99),
 (22, 95),
 (23, 102),
 (24, 99),
 (25, 109),
 (26, 122),
 (27, 114),
 (28, 96),
 (29, 95),
 (30, 120),
 (31, 98),
 (32, 105),
 (33, 116),
 (34, 113),
 (35, 86),
 (36, 97),
 (37, 98),
 (38, 79),
 (39, 97),
 (40, 86),
 (41, 113),
 (42, 89),
 (43, 71),
 (44, 81),
 (45, 73),
 (46, 72),
 (47, 69),
 (48, 59),
 (49, 41),
 (50, 45),
 (51, 41),
 (52, 39),
 (53, 29),
 (54, 38),
 (55, 26),
 (56, 25),
 (57, 30),
 (58, 25),
 (59, 18),
 (60, 21),
 (61, 20),
 (62, 16),
 (63, 16),
 (64, 12),
 (65, 8),
 (66, 6),
 (67, 2),
 (68, 4),
 (69, 4),
 (70, 2),
 (71, 0),
 (72, 2),
 (73, 0),
 (74, 1)]

In [6]:
# Export a weights matrix as a .gal file for the 2015 sales
gal = pysal.open('./data/stpete_sales_2015.gal','w')
gal.write(wt)
gal.close()

In [7]:
# Create the spatial lag for a feature in 2015 sales
f = pysal.open("./data/stpete_sales_2015.dbf")
f.header
y = np.array(f.by_col['MEDHHINC'])
yl = pysal.lag_spatial(wt,y)
yl

array([ 2333272.,  3116603.,  3203971., ...,  2520874.,  2979909.,
        2747461.])

In [8]:
# Append this and write the shapefile
stp_sales_2015['medinc_lag'] = yl.tolist()
stp_sales_2015.to_file('./data/stpete_sales_2015.shp')
stp_sales_2015.head()

Unnamed: 0,AVE_FAM_SZ,AVE_HH_SZ,CentroidX,CentroidY,HS_PER_AC,MEDFINCOME,MEDHHINC,MEDOOHVAL,MED_AGE,PCT_AMERI,...,tif_flag_t,tot_effect,tot_gross_,tot_living,total_livi,views,year_built,x,y,medinc_lag
0,2.777506,2.19,-82.642882,27.81057,4.596851,68229,54321,125200,46,0.0,...,,1164,1386,1048,1,,1960,-82.642882,27.81057,2333272.0
1,2.777506,2.19,-82.646179,27.811467,4.596851,68229,54321,125200,46,0.0,...,,1672,2102,1486,1,,1986,-82.646179,27.811467,3116603.0
2,2.777506,2.19,-82.64493,27.812377,4.596851,68229,54321,125200,46,0.0,...,,1626,1872,1468,1,,1979,-82.64493,27.812377,3203971.0
3,2.777506,2.19,-82.643034,27.813685,4.596851,68229,54321,125200,46,0.0,...,,1855,2440,1680,1,,1967,-82.643034,27.813685,2426029.0
4,2.777506,2.19,-82.644526,27.813679,4.596851,68229,54321,125200,46,0.0,...,,1524,1841,1453,1,,1942,-82.644526,27.813679,2777055.0


In [41]:
# Calculate the Moran's I for 2015 sales
f = pysal.open("./data/stpete_sales_2015.dbf")
f.header
y = np.array(f.by_col['price'])
y = y.astype(int)
mi = pysal.Moran(y, wt, two_tailed=False)
mi.I

0.51024209394158593

In [9]:
# Calculate pseudo significance for Global Moran's I
np.random.seed(12345)
mir = pysal.Moran(y, wt, permutations = 9999)
mir.p_sim

0.0001

In [10]:
# Calculate the LISA statistic for 2015 sales
lm = pysal.Moran_Local(y,wt)
lm.n
# len(lm.Is)
# lm.p_sim
lm.Is
lm.p_sim

array([ 0.255,  0.222,  0.493, ...,  0.263,  0.429,  0.434])

In [11]:
# Add the lisa values to the 2015 sales as a new feature
stp_sales_2015['lisa'] = lm.Is.tolist()
stp_sales_2015.head()

Unnamed: 0,AVE_FAM_SZ,AVE_HH_SZ,CentroidX,CentroidY,HS_PER_AC,MEDFINCOME,MEDHHINC,MEDOOHVAL,MED_AGE,PCT_AMERI,...,tot_effect,tot_gross_,tot_living,total_livi,views,year_built,x,y,medinc_lag,lisa
0,2.777506,2.19,-82.642882,27.81057,4.596851,68229,54321,125200,46,0.0,...,1164,1386,1048,1,,1960,-82.642882,27.81057,2333272.0,-0.007315
1,2.777506,2.19,-82.646179,27.811467,4.596851,68229,54321,125200,46,0.0,...,1672,2102,1486,1,,1986,-82.646179,27.811467,3116603.0,0.007635
2,2.777506,2.19,-82.64493,27.812377,4.596851,68229,54321,125200,46,0.0,...,1626,1872,1468,1,,1979,-82.64493,27.812377,3203971.0,-0.000506
3,2.777506,2.19,-82.643034,27.813685,4.596851,68229,54321,125200,46,0.0,...,1855,2440,1680,1,,1967,-82.643034,27.813685,2426029.0,-0.019241
4,2.777506,2.19,-82.644526,27.813679,4.596851,68229,54321,125200,46,0.0,...,1524,1841,1453,1,,1942,-82.644526,27.813679,2777055.0,-0.014889


In [12]:
# Write it to the original shapefile
stp_sales_2015.to_file('./data/stpete_sales_2015.shp')