In [None]:
################################################################################
################################################################################
#
# FILE: create_review_requests_table.ipynb 
#
# BY: Dmitry Sedov 
#
# CREATED: Thu Apr 9 2020
#
# DESC: This code creates a table with yelp ids for which reviews requests will
#       be made. 
#
# EXEC: 
#      
################################################################################
################################################################################

In [None]:
################################ Libraries #####################################

import sqlalchemy as db
import pandas as pd
import numpy as np

################################################################################

In [None]:
############################## SQL statements ##################################

get_restaurant_ids = """
SELECT 
    y_id,
    cbsa,
    COUNT (y_id) OVER (
        PARTITION BY cbsa
    ) AS cbsa_count
FROM 
    restaurants
WHERE
    y_id IS NOT NULL
"""

################################################################################

In [None]:
################################################################################

engine_master = db.create_engine('postgresql://{user}:{user_pass}@{host}/{dataname2}')

restaurants_for_requests = pd.read_sql(get_restaurant_ids, engine_master)

engine_master.dispose()

################################################################################

In [None]:
restaurants_for_requests.head(25)

In [None]:
restaurants_for_requests.shape

In [None]:
# Assign priority
restaurants_for_requests['priority'] = np.select(
    [restaurants_for_requests['cbsa_count'].between(1, 500, inclusive = True),
     restaurants_for_requests['cbsa_count'].between(501, 1000, inclusive = True), 
     restaurants_for_requests['cbsa_count'].between(1001, 5000, inclusive = True),
     restaurants_for_requests['cbsa_count'].between(5001, 10000, inclusive = True),
     restaurants_for_requests['cbsa_count'].between(10001, 500000, inclusive = True)
    ], 
    [5,
     3,
     1,
     2,
     4
    ], 
    default = 0
)

In [None]:
# Sort by market size
restaurants_for_requests.sort_values(['priority', 'cbsa_count'], ascending = [True, False], inplace = True)

In [None]:
# Status and error message columns
restaurants_for_requests['review_request_status'] = 'needed'
restaurants_for_requests['review_error_message'] = None

In [None]:
# Reset index
restaurants_for_requests.reset_index(drop = True, inplace = True)

In [None]:
restaurants_for_requests.head(25)

In [None]:
restaurants_for_requests.shape

In [None]:
# Export

engine_urban = db.create_engine('postgresql://{user}:{user_pass}@{host}/{dataname1}')
connection_urban = engine_urban.connect()

restaurants_for_requests.to_sql('review_requests', 
                                con = engine_urban, 
                                index = True, 
                                index_label = 'row_id',
                                dtype = {'review_error_message': db.types.VARCHAR}
                               )
connection_urban.execute('ALTER TABLE review_requests ADD PRIMARY KEY (row_id);')

engine_urban.dispose()