In [1]:
import os
import json
import pickle
import requests
import urllib.parse
from enum import Enum
from collections import namedtuple

import folium
import xmltodict
import numpy as np
import pandas as pd
from branca import colormap


In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
some_references = '''
https://www.zillow.com/research/data/
https://python-visualization.github.io/folium/plugins.html
https://python-graph-gallery.com/292-choropleth-map-with-folium/
http://colorbrewer2.org/#type=sequential&scheme=Oranges&n=3
https://github.com/python-visualization/folium/blob/master/examples/GeoJSON_and_choropleth.ipynb
https://nbviewer.jupyter.org/github/python-visualization/folium/blob/master/examples/GeoJSON_and_choropleth.ipynb
https://towardsdatascience.com/data-101s-spatial-visualizations-and-analysis-in-python-with-folium-39730da2adf
'''

# Create a Heatmap of Housing Prices in Atlanta, GA

In [4]:
ZillowHomeInfo = namedtuple('ZillowHomeInfo', 'zpid lat lon value')

In [5]:
session = requests.session()

def get_search_results(address, city_state):
    params = {
        'zws-id': os.environ['ZILLOW_WSID'],
        'address': address,
        'citystatezip': city_state.replace(' ', '+')
    }
    response = session.get('https://www.zillow.com/webservice/GetSearchResults.htm', params=params)
    if not response.ok:
        raise Exception('Fail code ({}): {}'.format(response.status_code, response.text))

    homes = list()
    d = xmltodict.parse(response.text)
    try:
        results = d['SearchResults:searchresults']['response']['results']['result']
        code = int(d['SearchResults:searchresults']['message']['code'])
    except Exception as e:
        code = -1
        
    if code == 0 and isinstance(results, list):
        for res in results:
            zpid = res.get('zpid')
            if zpid is not None:
                # look for real estate details
                real_estate = res.get('localRealEstate')
                if real_estate is not None:
                    # look for the house valuation
                    value = real_estate.get('region', {}).get('zindexValue')
                    if value is not None:
                        # if a valuation exists, parse it and lat/lon info
                        value = float(value.replace(',', ''))
                        address = res.get('address')
                        if address is not None:
                            lat, long = address.get('latitude'), address.get('longitude')
                            homes.append(ZillowHomeInfo(zpid=zpid, lat=float(lat), lon=float(long), value=value))
                    
    return homes


def get_comps(zpid):
    
    def inner(comp):
        if comp is not None:
            try:
                zpid = comp.get('zpid')
                lat = comp.get('address', {}).get('latitude')
                lon = comp.get('address', {}).get('longitude')
                value = comp.get('localRealEstate', {}).get('region', {}).get('zindexValue')
            except Exception as e:
                zpid = lat = lon = value = None
                
            if not any([e is None for e in [zpid, lat, lon, value]]):
                return {
                    'zpid': zpid,
                    'lat': float(lat),
                    'long': float(lon),
                    'value': float(value.replace(',', ''))
                }
    
    # make call to Zillow to get comps response
    params = {
        'zws-id': os.environ['ZILLOW_WSID'],
        'zpid': zpid,
        'count': 25
    }
    response = session.get('http://www.zillow.com/webservice/GetComps.htm', params=params)
    if not response.ok:
        raise Exception('Fail code ({}): {}'.format(response.status_code, response.text))
        
    # parse the response and create ZillowHomeInfo objects
    d = xmltodict.parse(response.text)
    code = int(d['Comps:comps']['message']['code'])
    if code == 0:
        comp_list = d['Comps:comps']['response']['properties']['comparables']['comp']
        for comp in map(inner, comp_list):
            if comp is not None:
                comp_info = ZillowHomeInfo(zpid=comp['zpid'], lat=comp['lat'], lon=comp['long'], value=comp['value'])
                yield comp_info

                
def meets_criteria(home):
    val = (
        home.lat > 33.6 and
        home.lat < 33.9 and
        home.lon < -84.2 and
        home.lon > -84.5
    )
    return val


In [6]:
# set to True to generate a new heatmap for a city
data_fn = 'data/data.pkl'
generate_new = False

if generate_new:
    node_map = dict()
    all_nodes = set()
    node_data = dict()
    city = 'Atlanta'
    state = 'GA'
    print('City: {}'.format(city))
    i = 0
    n_iter = 0
    max_iter = np.inf
    size_target = 10000

    # get initial results to seed the graph
    homes = get_search_results(
        address=city,
        city_state='{city}+{state}'.format(city=city, state=state)
    )
    if len(homes) == 0:
        raise ValueError('No results found for city {}.'.format(city))

    for home in homes:
        node_data[home.zpid] = home

    done = False
    while len(homes) > 0 and not done:
        n_iter += 1

        home = homes.pop(-1)

        comps = get_comps(home.zpid)
        for comp in comps:

            if comp.zpid not in all_nodes:

                if meets_criteria(comp):
                    # add to set of all nodes
                    all_nodes.add(comp.zpid)

                    # add to the queue
                    homes.append(comp)

                    # add to the id -> home info mapping
                    node_data[comp.zpid] = comp

            # add to the id -> comps mapping
            if home.zpid not in node_map:
                node_map[home.zpid] = list()
            node_map[home.zpid].append(comp.zpid)

        # iterate until we have reached the desired condition
        if len(node_data) >= size_target:
            print('Desired # elements reached')
            done = True
            print(len(node_data), len(homes))
        elif n_iter == max_iter:
            print('Maximum # iterations reached')
            done = True
            print(len(node_data), len(homes))
        elif len(homes) == 0:
            print('All homes traversed/dead end reached.')
            done = True
            print(len(node_data), len(homes))
        # randomly print out how many homes we've found
        elif np.random.normal() < -2:
            print(len(node_data))

    with open(data_fn, 'wb') as fp:
        pickle.dump({'node_data': node_data, 'node_map': node_map}, fp)
else:
    with open(data_fn, 'rb') as fp:
        data = pickle.load(fp)
    node_data, node_map = data['node_data'], data['node_map']
    
print('# houses: {}'.format(len(node_data)))


# houses: 9938


In [7]:
from folium.plugins import HeatMap

data = np.array([
    [home.lat, home.lon, home.value]
    for home in node_data.values()
])

m = folium.Map(
    location=data[:, :2].mean(axis=0),
    control_scale=True,
    zoom_start=11
)

radius = 10
hm = HeatMap(
    data,
    radius=radius,
    blur=int(2 * radius)
)
hm.add_to(m)

m.save('atlanta_heatmap.html')

# uncomment here to see map in notebook
# m

# Visualize the Price per Sq Ft of US States

In [8]:
def local_states_geo_json():
    fn_states_geo = os.path.join('data', 'us-states.json')
    if not os.path.exists(fn_states_geo):
        url = 'https://raw.githubusercontent.com/python-visualization/folium/master/examples/data'
        us_states = '{}/us-states.json'.format(url)
        geo_json_states = requests.get(us_states).json()
        with open(fn_states_geo, 'w') as fp:
            json.dump(geo_json_states, fp)
            
    return fn_states_geo


def make_color_map(df, col_keys, col_data):
    
    keys = df[col_keys].values
    values = df[col_data].values
    df.set_index('State', drop=True, inplace=True)
    state_to_value = {state: df.loc[state, 'MedianPPSQFT'] for state in df.index}
    
    cmap = colormap.linear.OrRd_07.scale(
        int(np.percentile(values, 5)),
        int(np.percentile(values, 95))
    )
    cmap.caption = 'Zillow Median Price Per Square Foot ($ in thousands)'
    
    def value_to_color(entry):
        return {
            'fillColor': cmap(state_to_value.get(entry['id'], 0)),
            'weight': 1,
            'fillOpacity': 1.0,
        }
    
    return cmap, value_to_color


In [10]:
# load data downloaded from Zillow
df = pd.read_csv('data/State_MedianValuePerSqft_AllHomes.csv')
data = df[['State', df.columns[-1]]]
data.rename(columns={df.columns[-1]: 'MedianPPSQFT'}, inplace=True)

# create a color map
cmap, f_value_to_color = make_color_map(data, col_keys='State', col_data='MedianPPSQFT')

# add the US states Geo JSON data to the Map
state_values = folium.GeoJson(
    local_states_geo_json(),
    style_function=f_value_to_color
)

m = folium.Map(location=[37, -102], zoom_start=4)
cmap.add_to(m)
state_values.add_to(m)
m.save('ppsqft_map.html')

# uncomment here to see map in notebook
# m
