In [None]:
################################################################################                                                                                                                                                              
################################################################################
#
# FILE: export-couple-cbsa-expanded.ipynb
#
# BY: Dmitry Sedov 
#
# CREATED: Wed Apr 22 2020
#
# DESC: This code produces a table with restaurant visits expanded by 
#       origin-CBGs.
#
# EXEC:
#      
################################################################################
################################################################################

In [None]:
############################### Libraries ######################################

import os
import numpy as np

import sqlalchemy as db
import pandas as pd

################################################################################

In [None]:
######################### Constants and settings ##############################

# Pandas display options
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 1000)

year = 2019
month = 7
days_in_month = 31
output_folder_path = '/home/user/projects/urban/data/output/spatial-demand/main_demand'

################################################################################

In [None]:
completed_cbsa_list_file_path = os.path.join(output_folder_path, 'cbsa_list.csv')
completed_cbsa_table = pd.read_csv(completed_cbsa_list_file_path, 
                                   header = None, 
                                   names = ['cbsa'], 
                                   dtype = str)
completed_cbsa_table['completed'] = True

In [None]:
# Select the CBSAs

restaurants_by_cbsa_table_statement = """
SELECT
    cbsa,
    COUNT(sname_place_id) AS restaurant_count
FROM
    restaurants
GROUP BY
    cbsa
;
"""

cbgs_by_cbsa_table_statement = """
SELECT
    cbsa,
    COUNT(censusblockgroup) AS cbg_count
FROM
    cbgs
GROUP BY
    cbsa
;
"""

engine = db.create_engine('postgresql://{user}:{user_pass}@{host}/{dataname2}')
connection = engine.connect()

restaurants_by_cbsa_table = pd.read_sql(restaurants_by_cbsa_table_statement, 
                                        engine)

cbgs_by_cbsa_table = pd.read_sql(cbgs_by_cbsa_table_statement, 
                                 engine)

engine.dispose()

cbsa_table = pd.merge(cbgs_by_cbsa_table, 
                      restaurants_by_cbsa_table,
                      how = 'outer', 
                      on = 'cbsa', 
                      validate = 'one_to_one')

cbsa_table = pd.merge(cbsa_table, 
                      completed_cbsa_table,
                      how = 'left',
                      on = 'cbsa', 
                      validate = 'one_to_one')

cbsa_table.fillna({'completed': False}, inplace = True)

cbsa_table.sort_values('cbsa', inplace = True)

#medium_restaurants_filter = ((cbsa_table['restaurant_count'] >= 500) &
#                             (cbsa_table['restaurant_count'] <= 1000)
#                            )
#medium_cbgs_filter = ((cbsa_table['cbg_count'] >= 150) & 
#                      (cbsa_table['cbg_count'] <= 350)
#                     )

# cbsa_medium_table = cbsa_table[medium_restaurants_filter & medium_cbgs_filter]

#cbsa_medium_table.reset_index(drop = True, inplace = True)

#selected_cbsa_list = cbsa_medium_table['cbsa'].to_list()

In [None]:
selected_cbsa_list = cbsa_table.loc[(cbsa_table['completed'] == False) & 
                                    (cbsa_table['restaurant_count'] <= 5000) &
                                    (cbsa_table['restaurant_count'] >= 10) 
                                   ].reset_index(drop = True).loc[:,'cbsa'].to_list()

In [None]:
cbsa_table.loc[(cbsa_table['completed'] == False) & 
               (cbsa_table['restaurant_count'] <= 5000) &
               (cbsa_table['restaurant_count'] >= 10) 
              ].reset_index(drop = True)

In [None]:
selected_cbsa_list

In [None]:
############################### SQL statements #################################

selected_cbsas =  "('" + "','".join(selected_cbsa_list) + "')"
print(selected_cbsas)

# Create a restaurants table with visits breakdown by home CBG
restaurants_visits_breakdown_table_statement = """
CREATE TEMPORARY TABLE restaurant_visits_long AS (
    WITH restaurant_visits AS (
        SELECT
            r.sname_place_id,
            v.raw_visit_counts,
            v.raw_visitor_counts,
            v.visitor_home_cbgs
        FROM
            restaurants AS r
        LEFT JOIN
            visits AS v
        ON  
            r.sname_place_id = v.sname_place_id
        AND
            v.year = {year} 
        AND
            v.month = {month}
        WHERE 
            r.cbsa = '{selected_cbsa}'
    )
    SELECT 
        sname_place_id,
        raw_visit_counts,
        raw_visitor_counts,
        (json_each(visitor_home_cbgs)).*
    FROM
        restaurant_visits
);
ALTER TABLE restaurant_visits_long
RENAME COLUMN key TO home_cbg;
ALTER TABLE restaurant_visits_long
RENAME COLUMN value TO visitors_from_home_cbg;
"""

restaurants_visits_breakdown_export_statement = """
SELECT 
    sname_place_id,
    home_cbg,
    raw_visit_counts,
    raw_visitor_counts,
    visitors_from_home_cbg
FROM
    restaurant_visits_long
;
"""

################################################################################

In [None]:
####### Function to create the restaurant-choice data
def create_restaurants_expanded(selected_cbsa):
    print(f'Working with {selected_cbsa}.')
    cbsa_folder_path = os.path.join(output_folder_path, f'cbsa{selected_cbsa}')
    if not os.path.exists(cbsa_folder_path):
        os.makedirs(cbsa_folder_path)
    
    engine = db.create_engine('postgresql://{user}:{user_pass}@{host}/{dataname2}')
    connection = engine.connect()
    
    l_restaurants_visits_breakdown_table_statement = restaurants_visits_breakdown_table_statement.format(year = year, 
                                                                                                         month = month, 
                                                                                                         selected_cbsa = selected_cbsa)

    # Build a table expanding visits-from-cbgs
    result = engine.execute(l_restaurants_visits_breakdown_table_statement)

    # Get the expanded visits from CBGs (and corresponding distances) table
    restaurants_visits_breakdown = pd.read_sql(restaurants_visits_breakdown_export_statement, 
                                               engine)
    restaurants_visits_breakdown.to_csv(os.path.join(cbsa_folder_path, f'choices{selected_cbsa}.csv'),
                         index = False)
    engine.dispose()
    
    return None


In [None]:
for c in selected_cbsa_list:
    create_restaurants_expanded(c)

In [None]:
restaurants_visits_breakdown.sort_values('home_cbg').head()