In [1]:
###############################################################################
#
# FILE: stadiums_and_establishments.ipynb
#
# BY: Timur Abbiasov
#
# DATE: Aug 24 2020
#
# DESC: This code contains the code to produce descripritive maps to describe stadium locations and establishments around them
#
# COMMENT: 
#
###############################################################################

In [1]:
################################ Libraries #####################################

import sqlalchemy as db
import pandas as pd
import geopandas as gpd

import collections
from webcolors import hex_to_rgb

import os
from dotenv import load_dotenv
from datetime import datetime

import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
import seaborn as sns
import pydeck

################################################################################

In [2]:
################################# Options ######################################

pd.options.display.max_rows = 999

################################################################################

In [3]:
################################ Constants #####################################

year = 2018
project_folder = '/home/tuser/stadiums'
output_folder = '/home/tuser/stadiums/data/processed/'

################################################################################


################################ Env variables #####################################

load_dotenv()
PSQL_USER = os.getenv('PSQL_USER')
PSQL_PASS = os.getenv('PSQL_PASS')
MAPBOX_API_KEY = os.getenv('MAPBOX_API_KEY')

In [4]:
def get_stadiums(buffer = 0):
    """Read in the stadiums table with geo"""
    
    # PostgreSQL connection
    engine = db.create_engine(f'postgresql://{PSQL_USER}:{PSQL_PASS}@134.209.70.145/dataname2')
    
    # Read stadiums into a geodataframe
    if not buffer: 
        get_stadiums_with_geo = f"""
            SELECT
                sname_place_id as stadium_id,
                sport as stadium_sport,
                parent_sname_place_id as stadium_parent_id,
                location_name,
                city,
                cbg as stadium_cbg,
                ST_SetSRID(ST_POINT(longitude, latitude), 4326)::geography AS geom
            FROM
                stadiums
                ;
            """
    else: 
        get_stadiums_with_geo = f"""
            SELECT
                sname_place_id as stadium_id,
                sport as stadium_sport,
                parent_sname_place_id as stadium_parent_id,
                location_name,
                city,
                cbg as stadium_cbg,
                ST_Buffer(ST_SetSRID(ST_POINT(longitude, latitude), 4326)::geography, {buffer}) AS geom
            FROM
                stadiums
                ;
            """
    
    # Get the establishment count (visits) by industry and distance bin
    results = gpd.read_postgis(get_stadiums_with_geo, con = engine)
    engine.dispose()
    
    return results

In [5]:
def get_establishments_around_stadiums():
    """Read in the nearby establishments data"""
    
    # PostgreSQL connection
    engine = db.create_engine(f'postgresql://{PSQL_USER}:{PSQL_PASS}@134.209.70.145/dataname2')
       
    # Create the stadiums with geo table
    create_temp_stadiums_table_with_geo = f"""
    CREATE TEMPORARY TABLE IF NOT EXISTS stadiums_with_geo AS
    SELECT
        sname_place_id,
        sport,
        parent_sname_place_id,
        location_name,
        city,
        state,
        cbg,
        ST_SetSRID(ST_POINT(longitude, latitude), 4326)::geography AS s_location
    FROM
        stadiums
        ;

    CREATE INDEX IF NOT EXISTS stadiums_with_geo_sg_idx
    ON stadiums_with_geo (sname_place_id);

    CREATE INDEX IF NOT EXISTS stadiums_with_geo_location_idx
    ON stadiums_with_geo
    USING GIST(s_location);
    """
    
    # Create the all-establishments with geo table
    create_temp_places_table_with_geo = f"""
    CREATE TEMPORARY TABLE IF NOT EXISTS places_with_geo AS
    SELECT
        sname_place_id,
        parent_sname_place_id,
        naics_code / 10000 AS naics_2digit,
        ST_SetSRID(ST_POINT(longitude, latitude), 4326)::geography AS p_location
    FROM
        establishments
    UNION
    SELECT
        sname_place_id,
        parent_sname_place_id,
        naics_code / 10000 AS naics_2digit,
        ST_SetSRID(ST_POINT(longitude, latitude), 4326)::geography AS p_location
    FROM
        restaurants
    ;
    CREATE INDEX IF NOT EXISTS places_with_geo_sg_idx
    ON places_with_geo (sname_place_id);

    CREATE INDEX IF NOT EXISTS places_with_geo_location_idx
    ON places_with_geo
    USING GIST(p_location);
    """
    
    with engine.connect() as connection:
        result = connection.execute(create_temp_stadiums_table_with_geo)
    with engine.connect() as connection:
        result = connection.execute(create_temp_places_table_with_geo)
    
    # Get a summary of all establishments near stadiums
    get_places_close_to_stadiums = f"""
 
    SELECT
        p.sname_place_id AS place_id,
        p.naics_2digit AS naics_2digit,
        p.parent_sname_place_id AS place_parent_id,
        s.sname_place_id AS stadium_id,
        p.p_location as geom,
        ST_Distance(s.s_location, p.p_location) AS distance
    FROM
        stadiums_with_geo AS s
    LEFT JOIN
        places_with_geo AS p
    ON
        ST_DWithin(s.s_location, p.p_location, 3000)
    AND
        s.sname_place_id != p.sname_place_id
    ;
    """
    
    # Get the establishment count (visits) by industry and distance bin
    results = gpd.read_postgis(get_places_close_to_stadiums, con = engine)
    engine.dispose()
    
    return results

In [6]:
# Read in stadiums data and stadiums with 3km buffer separately: 

stadiums = get_stadiums(buffer=False)
stadiums_buff = get_stadiums(buffer=3000)

In [8]:
# Read in establishments data:

establishments = get_establishments_around_stadiums()
establishments = establishments.merge(stadiums[['stadium_id', 'city', 'stadium_cbg']], on = 'stadium_id', how = 'left')

In [7]:
# Define sereral utilities:

def get_palette_colors(categories, palette_name='husl'):
    pal = dict(zip(
                categories,
                [ list(map(lambda x: x * 255, p)) for p in sns.color_palette(palette_name, len(categories))]
            ))
    return pal

def color_lookup(categories, palette_name='husl'):
    col = collections.OrderedDict(
                zip(
                    categories,
                    [list(map(lambda x: x,p)) for p in sns.color_palette(palette_name, len(categories))]
                )
            )
    return col

def listit(t):
    return list(map(listit, t)) if isinstance(t, (list, tuple)) else t

In [11]:
# Assemble a table of establishments in the main NAICS categories that are located in Jacksonville:

naics_main_codes = [44, 45, 52, 61, 62, 71, 72, 81]
naics_main_names = ['Retail','Retail','Finance', 'Education', 'Health', 'Recreation','FoodAccommodation','Other Services']
naics_main_dict = dict(zip(naics_main_codes,naics_main_names))

establishments_features = establishments.copy()
establishments_features = establishments_features[establishments_features['city']=='jacksonville']
establishments_features = establishments_features[establishments_features['naics_2digit'].isin(naics_main_codes)]
establishments_features['industry'] = establishments_features['naics_2digit'].apply(lambda x: naics_main_dict[x])

naics_categories = list(establishments_features.groupby('industry')['place_id'].count().index)
naics_colors = get_palette_colors(naics_categories)

establishments_features.iloc[1]

establishments_features = establishments_features.__geo_interface__['features']
for f in establishments_features: 
    f['geometry']['coordinates']  = listit(f['geometry']['coordinates'])
    f['city'] = f['properties']['city']
    f['properties']['color'] = (naics_colors.get(f['properties']['industry'])  + [255]) if 'industry' in f['properties'] else [0,0,0,255]
    del f['id']
    del f['bbox']
    del f['type']

In [8]:
# Define features to display on the map (i.e. set appropriate colors and geometry definitions that can be used by PyDeck)

sports_palette = 'tab10'
sports_categories = list(stadiums.groupby('stadium_sport')['stadium_id'].count().index)

sports_colors = {'baseball':list(hex_to_rgb('#e70300')),
                 'basketball':list(hex_to_rgb('#00279a')),
                 'football':list(hex_to_rgb('#009500')),
                 'hockey': list(hex_to_rgb('#722ab5')),
                 'soccer':list(hex_to_rgb('#ffe200'))}

stadium_features = stadiums.__geo_interface__['features']
for f in stadium_features: 
    f['geometry']['coordinates']  = list(f['geometry']['coordinates'])
    f['name'] = f['properties']['location_name']
    f['city'] = f['properties']['city'].title() 
    f['properties']['color'] = sports_colors.get(f['properties']['stadium_sport']) 
    f['properties']['linecolor'] = sports_colors.get(f['properties']['stadium_sport'])
    del f['id']
    del f['bbox']
    del f['type']
    
stadium_buffer_features = stadiums_buff.__geo_interface__['features']
for f in stadium_buffer_features: 
    f['geometry']['coordinates']  = listit(f['geometry']['coordinates'])
    f['name'] = f['properties']['location_name']
    f['properties']['color'] = sports_colors.get(f['properties']['stadium_sport']) 
    f['properties']['linecolor'] = sports_colors.get(f['properties']['stadium_sport'])
    del f['id']
    del f['bbox']
    del f['type']
    
    
# Redefine RGB colors to take values between 0 and 1 (in order to create legend in matplotlib): 

sports_colors = {'baseball':list(c/255 for c in hex_to_rgb('#e70300')),
                 'basketball':list(c/255 for c in hex_to_rgb('#00279a')),
                 'football':list(c/255 for c in hex_to_rgb('#009500')),
                 'hockey': list(c/255 for c in hex_to_rgb('#722ab5')),
                 'soccer':list(c/255 for c in hex_to_rgb('#ffe200'))}

In [9]:
def make_legend(categories, w, h, div, cols , line = False):
    legend = plt.figure(figsize=(w, h))
    patches = [
        mpatches.Patch(facecolor= (color if (not line) else "white"), label=label, edgecolor=color, linewidth=2)
        for label, color in cols.items()]
    plt.legend(handles = patches, 
               labels=cols.keys(),
               prop={"size":12, 'family': 'serif'},
               ncol=int(len(cols)/div), 
               framealpha=1, 
               frameon=False)
    plt.axis('off')
    plt.rcParams["font.family"] = "Times New Roman"
    plt.show()
    return

In [11]:
# Define pydeck layers:

places = pydeck.Layer(
    'ScatterplotLayer',
    data=pd.DataFrame(establishments_features),
    stroked=False,
    pickable=False,
    filled=True,
    radius_scale=5000,
    radius_min_pixels=1,
    radius_max_pixels=3,
    get_fill_color= "properties.color",
    get_position="geometry.coordinates",
    opacity=1
)

stadiums_halo = pydeck.Layer(
    'ScatterplotLayer',
    data=pd.DataFrame(stadium_features),
    stroked=True,
    pickable=False,
    filled=False,
    radius_scale=3000,
    radius_min_pixels=10,
    radius_max_pixels=3000,
    line_width_min_pixels=2,
    get_line_color= "properties.linecolor",
    get_position="geometry.coordinates",
    opacity=1
)

stadium_points = pydeck.Layer(
    'ScatterplotLayer',
    data=pd.DataFrame(stadium_features),
    stroked=True,
    pickable=True,
    filled=True,
    radius_scale=500,
    line_width_min_pixels=1.5,
    radius_min_pixels=2,
    radius_max_pixels=10,
    get_fill_color= [255,255,255,255],
    get_line_color= "properties.linecolor",
    get_position="geometry.coordinates",
    opacity=1
)

stadium_buffers = pydeck.Layer(
    'GeoJsonLayer',
    data=stadium_buffer_features,
    pickable=False,
    stroked=True,
    filled = False,
    get_fill_color= "properties.color",
    get_line_color= "properties.linecolor",
    radius_scale = 1,
    opacity=1
)

# Set the viewport location

view_state = pydeck.ViewState(
    longitude=-98.0633245,
    latitude=39.0678879,
    zoom=3.2,
    min_zoom=3,
    max_zoom=20,
    pitch=0,
    bearing=0)

# Render

r = pydeck.Deck(
    layers=[places, stadium_points, stadiums_halo, stadium_buffers], 
    initial_view_state=view_state, 
    map_style = "mapbox://styles/tuser/cke93kui05nqq1anxektgrxqa",
    tooltip={"text": "{name}\n({city})", 
             "style": {"backgroundColor": 'transparent',
                       "font-family": "Arial",
                       "color": "rgba(50,50,50,200)",
                      }
            }
)

In [12]:
# Display the results on the interactive map:
# (zoom in on Jacksonville to see the establshments in the 3km radius from TIAA Bank Field)

make_legend(sports_categories, 10, 0.5, 1, sports_colors, line = True)
display(r.to_html('stadiums_map.html'))
make_legend(naics_categories, 4, 0.2, 1.5, color_lookup(naics_categories, 'husl'))