In [71]:
################################################################################                                                                                                                                                              
################################################################################
#
# FILE: cbg_open_by_categories.ipynb
#
# BY: Dmitry Sedov 
#
# CREATED: Wed Feb 19 2020
#
# DESC: This code produces tables with categories open in CBGs by month.
#
# EXEC:
#      
################################################################################
################################################################################

In [72]:
################################ Libraries ######################################

import os
import sqlalchemy as db
import pandas as pd

################################################################################

In [73]:
############################### Constants ######################################

output_folder_path = '/home/user/projects/urban/data/output/reduced-form' 

visits_table_statement = """
SELECT 
    sname_place_id
FROM 
    visits
WHERE
    year = {year} AND
    month = {month}
;
"""

################################################################################

In [74]:
################## Get permanent part of data from SQL #########################


engine = db.create_engine('postgresql://{user}:{user_pass}@{host}/{dataname2}')
connection = engine.connect()

establishments_table_statement = """
SELECT 
    sname_place_id,
    naics_first2,
    cbg
FROM 
    establishments
;
"""
establishments = pd.read_sql(establishments_table_statement, engine)

restaurants_table_statement = """
SELECT 
    sname_place_id,
    cbg
FROM 
    restaurants
;
"""
restaurants = pd.read_sql(establishments_table_statement, engine)

################################################################################

In [75]:
############## Function to get month of visits to establishments ###############

def get_month_categories(vintage):
    
    # Parse the vintage
    year, month = vintage
    
    # Get the visits for that vinatage month
    print(f'Getting visits for {year}-{month}...')
    visits_table_statement_month = visits_table_statement.format(year = year,
                                                                 month = month)
    visits_month = pd.read_sql(visits_table_statement_month, engine)
    
    print('Merging...')
    est_open_this_month = pd.merge(establishments,
                                   visits_month,
                                   how = 'inner',
                                   validate = 'one_to_one')
    
    rest_open_this_month = pd.merge(restaurants,
                                    visits_month,
                                    how = 'inner',
                                    validate = 'one_to_one')
    
    print('Transforming...')
    cbg_est_open = est_open_this_month.groupby(['cbg',
                                                'naics_first2']
                                              ).size().reset_index(name = 'counts')
    cbg_est_open = cbg_est_open.pivot(index = 'cbg',
                                      columns = 'naics_first2',
                                      values = 'counts')
    cbg_est_open.columns = list(map(lambda x: 'naics' + str(x), 
                                               cbg_est_open.columns))
    
    cbg_rest_open = rest_open_this_month.groupby(['cbg']
                                                ).size().to_frame('rest_open')
    
    cbg_categs = pd.merge(cbg_est_open, 
                          cbg_rest_open, 
                          how = 'outer', 
                          validate = 'one_to_one',
                          left_index = True,
                          right_index = True)
    cbg_categs.fillna(0, inplace = True)
    cbg_categs = cbg_categs.astype('int64')
    cbg_categs.reset_index(inplace = True)
    
    cbg_categs['year'] = int(year)
    cbg_categs['month'] = int(month)
    
    # Export data, if file exists - appending to the exisiting one
    output_file_path = os.path.join(output_folder_path, 
                                    'cbg_categs.csv')
    if (os.path.exists(output_file_path)):
        # If file exists: append to csv
        cbg_categs.to_csv(path_or_buf = output_file_path, 
                          na_rep = '', 
                          index = False, 
                          header = False, 
                          mode = 'a')
        return 'appended to csv'
    else:
        # If not: create csv and export
        cbg_categs.to_csv(path_or_buf = output_file_path, 
                          na_rep = '',
                          index = False,
                          header = True,
                          mode = 'w')
        return 'created csv'

################################################################################

In [76]:
# Construct all of the data vinatages
vintages_2017 = [('2017', '{0:0=2d}'.format(x)) for x in range(6, 13)] 
vintages_2018 = [('2018', '{0:0=2d}'.format(x)) for x in range(1, 13)] 
vintages_2019 = [('2019', '{0:0=2d}'.format(x)) for x in range(1, 8)]                                                                                                                                                                         
vintages_all = vintages_2017 + vintages_2018 + vintages_2019

In [77]:
# Export all of the data
data_all = [get_month_categories(v) for v in vintages_all]

Getting visits for 2017-06...
Merging...
Transforming...
Getting visits for 2017-07...
Merging...
Transforming...
Getting visits for 2017-08...
Merging...
Transforming...
Getting visits for 2017-09...
Merging...
Transforming...
Getting visits for 2017-10...
Merging...
Transforming...
Getting visits for 2017-11...
Merging...
Transforming...
Getting visits for 2017-12...
Merging...
Transforming...
Getting visits for 2018-01...
Merging...
Transforming...
Getting visits for 2018-02...
Merging...
Transforming...
Getting visits for 2018-03...
Merging...
Transforming...
Getting visits for 2018-04...
Merging...
Transforming...
Getting visits for 2018-05...
Merging...
Transforming...
Getting visits for 2018-06...
Merging...
Transforming...
Getting visits for 2018-07...
Merging...
Transforming...
Getting visits for 2018-08...
Merging...
Transforming...
Getting visits for 2018-09...
Merging...
Transforming...
Getting visits for 2018-10...
Merging...
Transforming...
Getting visits for 2018-11...
M

In [78]:
data_all

['created csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv']