In [3]:
import numpy as np 
import pandas as pd
import os
import psycopg2
import geopandas as gpd

import importlib
from Secrets import secrets
# importlib.reload(secrets)

import matplotlib.pyplot as plt
import seaborn as sns

import time
from geopy.distance import geodesic

# Did opening of the Whole Foods Market store impact the house prices in the area?

- Analyzed the impact on house prices when Wholefoods opened a stored in Kensington London in June 2007

In [85]:
engine = psycopg2.connect(database="postgres", user=secrets.user(), password=secrets.password(), host=secrets.host(), port='5432')
cur = engine.cursor()
cur.execute("""
select * from

(select p.lsoa, p.brn, p.price, p.date, p.date_1st, p.lat as p_lat, p.longt as p_long, 
e.potential_energy_rating, e.construction_age_band, e.number_habitable_rooms, e.total_floor_area,
p.type, p.new, p.duration, s.lat as s_lat, s.longt as s_long,
CAST(ROUND((p.price/e.total_floor_area)::numeric, 3 ) as integer) as sqm_price, 
h.index as hpi

from stores as s

join prices as p
on p.lsoa = s.lsoa

join epc as e
on e.brn = p.brn

join hpi_ir as h
on h.date = p.date_1st

where name = 'Whole Foods Market' and left(s.postcode, 2) = 'W8'
and p.year in (2006, 2007, 2008)) as t;
;

""")
rows = cur.fetchall()
result = pd.DataFrame(rows, columns = [desc[0] for desc in cur.description])
result["date"] = pd.to_datetime(result["date"], format="%Y-%m-%d")
print(result.shape)
result.head()

(2868, 18)


Unnamed: 0,lsoa,brn,price,date,date_1st,p_lat,p_long,potential_energy_rating,construction_age_band,number_habitable_rooms,total_floor_area,type,new,duration,s_lat,s_long,sqm_price,hpi
0,E01002825,1666327000.0,2245000.0,2006-10-20,2006-10-01,51.504183,-0.199815,C,2003-2006,3.0,143.34,F,Y,L,51.501716,-0.190179,15662,64.320775
1,E01002850,843288100.0,685000.0,2007-12-07,2007-12-01,51.493208,-0.191359,C,1900-1929,3.0,47.0,F,N,L,51.501716,-0.190179,14574,73.972006
2,E01002817,7142362000.0,1980000.0,2006-04-13,2006-04-01,51.496538,-0.200909,C,2003-2006,12.0,717.88,S,N,F,51.501716,-0.190179,2758,60.934832
3,E01002839,8594261000.0,675000.0,2007-02-28,2007-02-01,51.493203,-0.185785,C,before 1900,3.0,57.0,F,N,L,51.501716,-0.190179,11842,67.216465
4,E01002839,199930900.0,312000.0,2008-01-08,2008-10-01,51.494511,-0.190717,E,before 1900,1.0,39.48,F,N,L,51.501716,-0.190179,7903,66.277955


In [129]:
def findDistance(row):
    property_dist = (row["p_lat"], row["p_long"])
    store_dist = (row["s_lat"], row["s_long"])
    dist_meters = geodesic(property_dist, store_dist).meters
    return dist_meters
    
result["dis"] = result.apply(findDistance, axis=1)
result["dis_bins"] = pd.cut(result["dis"], [0, 500, 1000, 100000],labels=["<200", "500-1000",  "1000+"])

In [144]:
# remove an price impact of the hpi index
def AdjForHPI(row):
    hpi_jan2006 = result[result["date_1st"] == result["date_1st"].min()]["hpi"].unique()[0]
    hpi = row["hpi"]
    sqm_price = row["sqm_price"]
    hpi_growth = (hpi/hpi_jan2006)
    sqm_price_adj = sqm_price/hpi_growth
    
    return sqm_price_adj
    
result["sqm_price_adj"] = result[["date", "hpi", "sqm_price"]].apply(AdjForHPI, axis=1)


result["before"] = [0 if x < pd.to_datetime("20070601", format="%Y%m%d") else 1 for x in result["date_1st"]]

In [113]:
result.head(2)

Unnamed: 0,lsoa,brn,price,date,date_1st,p_lat,p_long,potential_energy_rating,construction_age_band,number_habitable_rooms,...,type,new,duration,s_lat,s_long,sqm_price,hpi,dis,dis_bins,sqm_price_adj
0,E01002825,1666327000.0,2245000.0,2006-10-20,2006-10-01,51.504183,-0.199815,C,2003-2006,3.0,...,F,Y,L,51.501716,-0.190179,15662,64.320775,723.183262,500-1000,14453.136595
1,E01002850,843288100.0,685000.0,2007-12-07,2007-12-01,51.493208,-0.191359,C,1900-1929,3.0,...,F,N,L,51.501716,-0.190179,14574,73.972006,950.091782,500-1000,11694.388668


In [147]:
temp = result.groupby(["dis_bins", "before"], as_index=False).agg({"brn": "count", "sqm_price": "mean" , "sqm_price_adj": "mean"})
temp = pd.pivot_table(temp, index="dis_bins", columns="before", values="sqm_price_adj")
temp.columns = ["before", "after"]
temp["diff"] = temp["after"] - temp["before"]
temp

Unnamed: 0_level_0,before,after,diff
dis_bins,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
<200,8686.327753,9610.028435,923.700681
500-1000,8125.883925,8931.110991,805.227067
1000+,6971.664662,7018.10764,46.442978
