In [1]:
import pandas as pd
import numpy as np

import googlemaps
import gmaps

import settings

In [2]:
COL_RENAME_MAP = {
    'BlockNo': 'block_no',
    'LotNo': 'lot_no',
    'QualCode(s)': 'qual_codes',
    'PropLocation': 'location',
    'PropClass(s)': 'class',
    'SalesDate': 'sale_date',
    'SalePrice': 'sale_price',
    'NU\nCode': 'nu_code',
    '2017 Assmt\nTaxable': 'assessment_taxable_2017',
    '2017 Assmt\nExempt': 'exempt_2017',
    '2017 Assmt\nTotal': 'assessment_2017',
    '2017 Taxes\nTotal': 'taxes_2017',
    'VCS': 'vcs',
    'Year\nBuilt': 'year_built',
    'Design': 'design',
    'Square\nFootage': 'sq_ft',
    '2018 Assmt\nTaxable': 'assessment_taxable_2018',
    '2018 Assmt\nExempt': 'assessment_exempt_2018',
    '2018 Assmt\nTotal': 'assessment_2018',
    '2018 Taxes\nEstimated': 'taxes_2018',
}

gm = googlemaps.Client(key=settings.GMAPS_API_KEY)
gmaps.configure(api_key=settings.GMAPS_API_KEY)

In [3]:
df = pd.read_csv('data/jc-assessment.csv', low_memory=False).rename(index=str, columns=COL_RENAME_MAP)

df['change_pct'] = df['taxes_2018'] / df['taxes_2017']
df['change_dollars'] = df['taxes_2018'] - df['taxes_2017']

In [4]:
df

Unnamed: 0,block_no,lot_no,qual_codes,location,class,sale_date,sale_price,nu_code,assessment_taxable_2017,exempt_2017,...,vcs,year_built,design,sq_ft,assessment_taxable_2018,assessment_exempt_2018,assessment_2018,taxes_2018,change_pct,change_dollars
0,12609.0,00002 01,,367 FIRST ST.,2,41719,1.0,4,865400,,...,DTVL,2016.0,Rowhouse,6716.0,2706600,,2706600,43846.92,0.649572,-23654.28
1,27503.0,11,C0108,108 E.SHEARWATER CT.,2,41974,1525000.0,26,597300,,...,PL01,1988.0,Townhouse,4355.0,1558700,,1558700,25250.94,0.541989,-21338.46
2,11101.0,9,,323 FOURTH ST.,2,42516,10.0,3,437600,,...,DTHC,2017.0,Rowhouse,7048.0,850500,,850500,13778.10,0.403662,-20354.70
3,14106.0,22,,249 GROVE ST.,2,37358,150000.0,,591500,,...,DTVV,2016.0,Rowhouse,2496.0,1648900,,1648900,26712.18,0.578975,-19424.82
4,27503.0,11.206,C0012,206-12 WEST SHEARWATER CT,2,35276,249000.0,26,383500,,...,PL01,1996.0,Condo,2260.0,685400,,685400,11103.48,0.371192,-18809.52
5,27503.0,11.207,C0017,207-17 WEST SHEARWATER CT,2,39022,650000.0,26,382900,,...,PL01,1996.0,Condo,2260.0,694200,,694200,11246.04,0.376547,-18620.16
6,4502.0,4,,54 SHERMAN AVE.,2,42352,1.0,24,307400,,...,HTHT,1905.0,Duplex,2640.0,350000,,350000,5670.00,0.236475,-18307.20
7,16203.0,1.01,,136 DELAWARE AVE.,2,42612,800000.0,,369300,,...,WSSP,1900.0,Colonial,1160.0,713300,,713300,11555.46,0.401156,-17249.94
8,27503.0,11,C0103,103 E.SHEARWATER CT.,2,32149,1000000.0,,504600,,...,PL01,1988.0,Townhouse,3910.0,1438500,,1438500,23303.70,0.592084,-16055.10
9,27503.0,11.100,C0081,100-81 SHEARWATER CT.,2,37582,394000.0,10,295200,,...,PL01,1988.0,Condo,1740.0,537900,,537900,8713.98,0.378447,-14311.62


In [5]:
df['change_dollars'].describe()

count    43545.000000
mean       132.645318
std       3030.786323
min     -23654.280000
25%      -1507.200000
50%         38.880000
75%       1292.400000
max      36066.180000
Name: change_dollars, dtype: float64

In [6]:
df['change_pct'].describe()

count    43545.000000
mean         1.631901
std          2.069756
min          0.236475
25%          0.787615
50%          1.007176
75%          1.476841
max         23.076923
Name: change_pct, dtype: float64

In [15]:
df['address'] = df['location'].apply(np.char.strip) + ' Jersey City, NJ'

In [18]:
unique_addresses = set(list(df['address']))

In [20]:
ADDRESS_LAT_LNG_CACHE = {}

def get_lat_lng(addr):
    if addr in ADDRESS_LAT_LNG_CACHE:
        return ADDRESS_LAT_LNG_CACHE[addr]
    try:
        loc = gm.geocode(addr)[0]['geometry']['location']
    except Exception as e:
        print("Failed on address with exception:", addr, e)
        return None
    ADDRESS_LAT_LNG_CACHE[addr] = loc
    return loc

In [21]:
address_map = {}
for addr in unique_addresses:
    lat_lng = get_lat_lng(addr)
    if lat_lng is not None:
        address_map[addr] = lat_lng
    else:
        print("Failed to geocode", addr)

KeyboardInterrupt: 

In [None]:
#df['lat_lng'] = np.vectorize(get_lat_lng)(df['address'])
df['lat_lng'] = df['address'].apply(get_lat_lng)

In [None]:
df

In [None]:
df['lat'] = 
df['lng'] = 

In [None]:
locations = df['lat', 'lng']
weight = df['change_pct']
center_lat = df['lat'].mean()
center_lng = df['lng'].mean()

In [None]:
heatmap_layer = gmaps.heatmap_layer(locations, weight, dissipating = True)
heatmap_layer.max_intensity = 5
heatmap_layer.point_radius = 15
# draw the heatmap into a figure
fig = gmaps.figure()
fig = gmaps.figure(center = [center_lat,center_lng], zoom_level=10)
fig.add_layer(heatmap_layer)