In [1]:
################################################################################                                                                                                                                                              
################################################################################
#
# FILE: panel_tables_combination.py
#
# BY: Dmitry Sedov 
#
# CREATED: Tue Feb 18 2020
#
# DESC: This code constructs the full monthly panel dataset from multiple files.
#
# EXEC:
#      
################################################################################
################################################################################

In [2]:
################################ Libraries ######################################

import os
import pandas as pd
import sqlalchemy as db
import numpy as np

################################################################################

In [3]:
################################# Options ######################################

input_folder_path = '/home/user/projects/urban/data/output/reduced-form'
output_folder_path = '/home/user/projects/urban/data/output/reduced-form'

################################################################################

In [4]:
################# Preimport the restaurant features dataset ####################

engine = db.create_engine('postgresql://{user}:{user_pass}@{host}/{dataname2}')
connection = engine.connect()

restaurants_table_statement = """
SELECT
        r.sname_place_id AS rest_id,
        r.cbg,
        r.cbsa,
        r.price,
        r.rating
FROM
    restaurants AS r
;
"""

cbgs_table_statement = """
SELECT
    CONCAT(c.statefips, c.countyfips, c.tractcode) AS ct,
    c.censusblockgroup AS cbg
FROM
    cbgs AS c
;
"""

restaurants = pd.read_sql(restaurants_table_statement, engine)
cbgs = pd.read_sql(cbgs_table_statement, engine)

engine.dispose()

# Merge restaurants with cbgs to get ct of each restaurant
restaurants = pd.merge(restaurants, cbgs, how = 'left', on = 'cbg')
assert restaurants.shape[0] == restaurants['rest_id'].nunique()

# Replace -1 prices in restaurants with NaNs
restaurants.replace(to_replace = {'price': -1.0}, 
                    value = np.nan, 
                    inplace = True)

################################################################################

In [5]:
####################### Function to merge a month of data ######################

def one_month_full(vintage):
    
    # Get year and month of the input data
    year, month = vintage
    
    print(f'Working on {year}-{month}.')
    
    # Construct the file paths
    input_file_name_e = f'sql_monthly_panel_{month}_{year}_e.csv'
    input_file_path_e = os.path.join(input_folder_path, input_file_name_e)
    input_file_name_d = f'sql_monthly_panel_{month}_{year}_d.csv'
    input_file_path_d = os.path.join(input_folder_path, input_file_name_d)
    
    data_e = pd.read_csv(input_file_path_e, 
                         usecols = ['rest_id', 
                                    'month', 
                                    'year', 
                                    'est_in_200m', 
                                    'est_in_400m', 
                                    'est_in_600m', 
                                    'est_in_cbg'])
    data_d = pd.read_csv(input_file_path_d)
    
    # One-to-one-merge
    data_month = pd.merge(data_e, 
                          data_d, 
                          on = ['rest_id', 'month', 'year'], 
                          how = 'outer', 
                          validate = 'one_to_one')
    
    # Merge with restaurant characteristics 
    data_month = pd.merge(data_month, 
                          restaurants, on = 'rest_id', 
                          how = 'outer', 
                          validate = 'one_to_one')
    
    # Export data, if file exists - appending to the exisiting one
    output_file_path = os.path.join(output_folder_path, 
                                    'full_monthly_panel.csv')
    if (os.path.exists(output_file_path)):
        # If file exists: append to csv
        data_month.to_csv(path_or_buf = output_file_path, 
                          na_rep = '', 
                          index = False, 
                          header = False, 
                          mode = 'a')
        return 'appended to csv'
    else:
        # If not: create csv and export
        data_month.to_csv(path_or_buf = output_file_path, 
                          na_rep = '', 
                          index = False, 
                          header = True, 
                          mode = 'w')
        return 'created csv'

################################################################################

In [6]:
# Construct all of the data vinatages
vintages_2017 = [('2017', '{0:0=2d}'.format(x)) for x in range(6, 13)] 
vintages_2018 = [('2018', '{0:0=2d}'.format(x)) for x in range(1, 13)] 
vintages_2019 = [('2019', '{0:0=2d}'.format(x)) for x in range(1, 8)]                                                                                                                                                                         
vintages_all = vintages_2017 + vintages_2018 + vintages_2019

In [7]:
# Export all of the data
data_all = [one_month_full(v) for v in vintages_all]

Working on 2017-06.
Working on 2017-07.
Working on 2017-08.
Working on 2017-09.
Working on 2017-10.
Working on 2017-11.
Working on 2017-12.
Working on 2018-01.
Working on 2018-02.
Working on 2018-03.
Working on 2018-04.
Working on 2018-05.
Working on 2018-06.
Working on 2018-07.
Working on 2018-08.
Working on 2018-09.
Working on 2018-10.
Working on 2018-11.
Working on 2018-12.
Working on 2019-01.
Working on 2019-02.
Working on 2019-03.
Working on 2019-04.
Working on 2019-05.
Working on 2019-06.
Working on 2019-07.


In [8]:
# Check how the export was done
data_all

['created csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv',
 'appended to csv']