In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import metrics 

In [5]:
rfc_data = pd.read_csv('~/data/downtownrecovery/curated_data/all_model_features_20230714.csv')
lq_clusters =  pd.read_csv('~/data/downtownrecovery/recovery_clusters/lq_clusters_single_period_1015.csv')
rq_dwtn_clusters = pd.read_csv('~/data/downtownrecovery/recovery_clusters/rq_dwtn_clusters_0822.csv')
rq_city_clusters =  pd.read_csv('~/data/downtownrecovery/recovery_clusters/rq_city_clusters_0822.csv')
rq_dwtn_clusters_period_1 = pd.read_csv('~/data/downtownrecovery/recovery_clusters/rq_dwtn_clusters_1015_period_1.csv')
rq_dwtn_clusters_period_2 =  pd.read_csv('~/data/downtownrecovery/recovery_clusters/rq_dwtn_clusters_1015_period_2.csv')

In [6]:
rfc_data[['pct_hisp_city', 'pct_hisp_downtown']]

Unnamed: 0,pct_hisp_city,pct_hisp_downtown
0,0.017530,0.077640
1,0.077199,0.030308
2,11.124796,15.444015
3,37.867348,58.027329
4,6.394373,5.160839
...,...,...
61,33.716794,20.199713
62,37.427185,14.099454
63,26.803041,15.282392
64,32.420444,12.850295


In [7]:
# convert / km2 back to m2 for the sake of model consistency
rfc_data['population_density_downtown'] = rfc_data['population_density_downtown'] / 1000000
rfc_data['population_density_city'] = rfc_data['population_density_city'] / 1000000
rfc_data['housing_density_downtown'] = rfc_data['housing_density_downtown'] / 1000000
rfc_data['housing_density_city'] = rfc_data['housing_density_city'] / 1000000
rfc_data['employment_density_downtown'] = rfc_data['employment_density_downtown'] / 1000000


In [10]:
rfc_data = rfc_data.drop(columns = ['Unnamed: 0', 'X'])

In [11]:
rfc_data['display_name'] = rfc_data['city'].map({'Boston': "Boston, MA",
 'Portland': "Portland, OR",
 'Las Vegas': "Las Vegas, NV",
 'Detroit': "Detroit, MI",
 'New York': "New York, NY",
 'Los Angeles': "Los Angeles, CA",
 'Chicago': "Chicago, IL",
 'Houston': "Houston, TX",
 'Phoenix': "Phoenix, AZ",
 'Philadelphia': "Philadelphia, PA",
 'Vancouver': "Vancouver, BC",
 'Montreal': "Montreal, QC",
 'Calgary': "Calgary, AB",
 'Halifax': "Halifax, NS",
 'London': "London, ON",
 'Edmonton': "Edmonton, AB",
 'Mississauga': "Missisauga, ON",
 'Ottawa': "Ottawa, ON",
 'Winnipeg': "Winnipeg, MB",
 'Toronto': "Toronto, ON",
 'Quebec': "Quebec, QC",
 'Cleveland': "Cleveland, OH",
 'Honolulu': "Honolulu, HI",
 'Cincinnati': "Cincinnati, OH",
 'Pittsburgh': "Pittsburgh, PA",
 'Salt Lake City': "Salt Lake City, UT",
 'Fort Worth': "Forth Worth, TX",
 'Columbus': "Columbus, OH",
 'Indianapolis': "Indianapolis, IN",
 'Charlotte': "Charlotte, NC",
 'San Francisco': "San Francisco, CA",
 'Seattle': "Seattle, WA",
 'Denver': "Denver, CO",
 'Washington DC': "Washington DC",
 'Sacramento': "Sacramento, CA",
 'Kansas City': "Kansas City, MO",
 'Atlanta': "Atlanta, GA",
 'Omaha': "Omaha, NB",
 'Colorado Springs': "Colorado Springs, CO",
 'Raleigh': "Raleigh, NC",
 'Miami': "Miami, FL",
 'Memphis': "Memphis, TN",
 'St Louis': "St Louis, MO",
 'Orlando': "Orlando, FL",
 'San Antonio': "San Antonio, TX",
 'San Diego': "San Diego, CA",
 'Dallas': "Dallas, TX",
 'San Jose': "San Jose, CA",
 'Austin': "Austin, TX",
 'Jacksonville': "Jacksonville, FL",
 'Tulsa': "Tulsa, OK",
 'Bakersfield': "Bakersfield, CA",
 'Wichita': "Wichita, KS",
 'Tampa': "Tampa, FL",
 'New Orleans': "New Orleans, LA",
 'Nashville': "Nashville, TN",
 'Oklahoma City': "Oklahoma City, OK",
 'El Paso': "El Paso, TX",
 'Louisville': "Louisville, KY",
 'Baltimore': "Baltimore, MD",
 'Milwaukee': "Milwaukee, WI",
 'Albuquerque': "Albuquerque, NM",
 'Tucson': "Tucson, AZ",
 'Fresno': "Fresno, CA",
 'Oakland': "Oakland, CA",
 'Minneapolis':"Minneapolis, MN"})

In [12]:
display(rfc_data)

Unnamed: 0,city,total_pop_downtown,total_pop_city,pct_singlefam_downtown,pct_singlefam_city,pct_multifam_downtown,pct_multifam_city,pct_mobile_home_and_others_downtown,pct_mobile_home_and_others_city,pct_renter_downtown,...,summer_avg_temp,fall_avg_temp,employment_density_downtown,housing_density_city,housing_density_downtown,pct_hisp_city,pct_hisp_downtown,population_density_city,population_density_downtown,display_name
0,Halifax,27888,405118,14.491299,60.929150,85.575636,36.858588,44.779116,37.875654,82.257519,...,64.550000,53.816667,3.094740e-10,2.725820e-11,2.274246e-09,0.017530,0.077640,6.345336e-11,4.245259e-09,"Halifax, NS"
1,London,34468,394162,27.727273,66.881289,72.196970,33.082774,41.641414,28.878174,69.795609,...,68.950000,53.566667,1.606785e-10,1.869569e-10,1.520036e-09,0.077199,0.030308,4.413710e-10,2.646090e-09,"London, ON"
2,Portland,1036,1138313,0.000000,61.323654,100.000000,37.158414,0.000000,1.517932,89.322917,...,70.566667,57.750000,1.401166e-07,5.543949e-10,5.966708e-10,11.124796,15.444015,1.292752e-09,1.609768e-09,"Portland, OR"
3,Las Vegas,42592,1194541,26.560382,63.083891,71.568787,34.020153,1.870831,2.895956,81.998943,...,88.533333,70.333333,4.010364e-09,2.186182e-10,1.331134e-09,37.867348,58.027329,5.430274e-10,3.074435e-09,"Las Vegas, NV"
4,Detroit,7150,1179021,2.931405,75.422920,97.068595,23.855104,0.000000,0.721976,87.470671,...,74.333333,57.350000,2.992989e-08,8.714055e-10,1.613171e-09,6.394373,5.160839,1.786087e-09,2.254089e-09,"Detroit, MI"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61,San Diego,41159,2206746,4.078114,58.083997,95.735398,39.829237,0.186488,2.086766,74.629440,...,72.933333,71.133333,9.420295e-09,4.125719e-10,2.329054e-09,33.716794,20.199713,1.115279e-09,3.373031e-09,"San Diego, CA"
62,Dallas,19788,2879554,3.710994,57.617568,96.128609,41.008623,0.160397,1.373809,87.229547,...,90.266667,72.850000,4.820285e-08,4.202111e-10,2.455316e-09,37.427185,14.099454,1.071232e-09,3.542271e-09,"Dallas, TX"
63,San Jose,2107,1563539,0.000000,63.549979,98.368794,33.244017,1.631206,3.206004,86.401833,...,75.333333,68.383333,3.681682e-08,2.599184e-10,1.601176e-09,26.803041,15.282392,7.636246e-10,2.392679e-09,"San Jose, CA"
64,Austin,10848,1586610,2.064277,59.633895,97.688504,37.193962,0.247219,3.172143,66.739640,...,88.800000,73.883333,4.485651e-08,2.159254e-10,1.917907e-09,32.420444,12.850295,5.390854e-10,2.571750e-09,"Austin, TX"


In [26]:
downtown_rfc_table = rfc_data[['city','total_pop_downtown', 'pct_multifam_downtown','pct_renter_downtown','median_age_downtown','bachelor_plus_downtown','median_hhinc_downtown','median_rent_downtown','pct_vacant_downtown','median_year_structure_built','median_no_rooms','pct_commute_auto_city','pct_commute_public_transit_city','pct_commute_bicycle_city','pct_commute_walk_city','housing_units_downtown','average_commute_time_city','pct_jobs_agriculture_forestry_fishing_hunting','pct_jobs_mining_quarrying_oil_gas', 'pct_jobs_utilities','pct_jobs_construction', 'pct_jobs_manufacturing','pct_jobs_wholesale_trade', 'pct_jobs_retail_trade','pct_jobs_transport_warehouse', 'pct_jobs_information','pct_jobs_finance_insurance','pct_jobs_real_estate','pct_jobs_professional_science_techical','pct_jobs_management_of_companies_enterprises','pct_jobs_administrative_support_waste','pct_jobs_educational_services','pct_jobs_healthcare_social_assistance','pct_jobs_arts_entertainment_recreation','pct_jobs_accomodation_food_services','pct_jobs_public_administration', 'employment_entropy', 'population_density_downtown', 'employment_density_downtown', 'housing_density_downtown','days_school_closing', 'days_workplace_closing','days_cancel_large_events', 'days_cancel_all_events','days_stay_home_requirements', 'days_income_support','days_mask_mandates', 'winter_avg_temp', 'summer_avg_temp']]

#'pct_nhwhite_downtown','pct_nhblack_downtown','pct_nhasian_downtown','pct_hisp_downtown',

city_rfc_table = rfc_data[['city','total_pop_city', 'pct_multifam_city','pct_renter_city','median_age_city','bachelor_plus_city','median_hhinc_city','median_rent_city','pct_vacant_city','pct_commute_auto_city','pct_commute_public_transit_city','pct_commute_bicycle_city','pct_commute_walk_city','housing_units_city','average_commute_time_city','pct_jobs_agriculture_forestry_fishing_hunting','pct_jobs_mining_quarrying_oil_gas', 'pct_jobs_utilities','pct_jobs_construction', 'pct_jobs_manufacturing','pct_jobs_wholesale_trade', 'pct_jobs_retail_trade','pct_jobs_transport_warehouse', 'pct_jobs_information','pct_jobs_finance_insurance','pct_jobs_real_estate','pct_jobs_professional_science_techical','pct_jobs_management_of_companies_enterprises','pct_jobs_administrative_support_waste','pct_jobs_educational_services','pct_jobs_healthcare_social_assistance','pct_jobs_arts_entertainment_recreation','pct_jobs_accomodation_food_services','pct_jobs_public_administration', 'employment_entropy', 'population_density_city', 'employment_density_downtown', 'housing_density_city', 'days_school_closing', 'days_workplace_closing','days_cancel_large_events', 'days_cancel_all_events','days_stay_home_requirements', 'days_income_support','days_mask_mandates', 'winter_avg_temp', 'summer_avg_temp']]

#'pct_nhwhite_city','pct_nhblack_city','pct_nhasian_city','pct_hisp_city',

lq_rfc_table = rfc_data[['city','total_pop_downtown', 'pct_multifam_downtown','pct_renter_downtown','median_age_downtown','bachelor_plus_downtown','median_hhinc_downtown','median_rent_downtown','pct_vacant_downtown','median_year_structure_built','median_no_rooms','pct_commute_auto_city','pct_commute_public_transit_city','pct_commute_bicycle_city','pct_commute_walk_city','housing_units_downtown','average_commute_time_city','pct_jobs_agriculture_forestry_fishing_hunting','pct_jobs_mining_quarrying_oil_gas', 'pct_jobs_utilities','pct_jobs_construction', 'pct_jobs_manufacturing','pct_jobs_wholesale_trade', 'pct_jobs_retail_trade','pct_jobs_transport_warehouse', 'pct_jobs_information','pct_jobs_finance_insurance','pct_jobs_real_estate','pct_jobs_professional_science_techical','pct_jobs_management_of_companies_enterprises','pct_jobs_administrative_support_waste','pct_jobs_educational_services','pct_jobs_healthcare_social_assistance','pct_jobs_arts_entertainment_recreation','pct_jobs_accomodation_food_services','pct_jobs_public_administration', 'employment_entropy', 'population_density_downtown', 'employment_density_downtown', 'housing_density_downtown','days_school_closing', 'days_workplace_closing','days_cancel_large_events', 'days_cancel_all_events','days_stay_home_requirements', 'days_income_support','days_mask_mandates', 'winter_avg_temp', 'summer_avg_temp']]

#lq_rfc_table = rfc_data[['city', 'total_pop_downtown', 'total_pop_city','pct_singlefam_downtown', 'pct_singlefam_city', 'pct_multifam_downtown','pct_multifam_city','pct_mobile_home_and_others_city', 'pct_renter_downtown','pct_renter_city', 'median_age_downtown', 'median_age_city','bachelor_plus_downtown', 'bachelor_plus_city', 'median_hhinc_downtown','median_hhinc_city', 'median_rent_downtown', 'median_rent_city','pct_vacant_downtown', 'pct_vacant_city', 'pct_commute_auto_downtown', 'pct_commute_auto_city','pct_commute_public_transit_downtown','pct_commute_public_transit_city', 'pct_commute_bicycle_downtown','pct_commute_bicycle_city', 'pct_commute_walk_downtown','pct_commute_walk_city','pct_commute_others_city', 'housing_units_downtown','housing_units_city', 'average_commute_time_downtown','average_commute_time_city','pct_jobs_agriculture_forestry_fishing_hunting','pct_jobs_mining_quarrying_oil_gas', 'pct_jobs_utilities','pct_jobs_construction', 'pct_jobs_manufacturing','pct_jobs_wholesale_trade', 'pct_jobs_retail_trade','pct_jobs_transport_warehouse', 'pct_jobs_information','pct_jobs_finance_insurance', 'pct_jobs_real_estate','pct_jobs_professional_science_techical','pct_jobs_management_of_companies_enterprises','pct_jobs_administrative_support_waste','pct_jobs_educational_services','pct_jobs_healthcare_social_assistance','pct_jobs_arts_entertainment_recreation','pct_jobs_accomodation_food_services','pct_jobs_public_administration', 'employment_entropy','population_density_downtown', 'population_density_city','employment_density_downtown', 'housing_density_downtown','housing_density_city', 'days_school_closing', 'days_workplace_closing','days_cancel_large_events', 'days_cancel_all_events','days_stay_home_requirements', 'days_income_support','days_mask_mandates', 'winter_avg_temp', 'summer_avg_temp']]

#'pct_nhwhite_downtown','pct_nhwhite_city', 'pct_nhblack_downtown', 'pct_nhblack_city','pct_nhasian_downtown', 'pct_nhasian_city', 'pct_hisp_downtown','pct_hisp_city', 

#lq_rfc_table['population_lq'] = lq_rfc_table['total_pop_downtown']/lq_rfc_table['total_pop_city']

In [27]:
rq_dwtn_clusters = rq_dwtn_clusters.replace('Québec','Quebec')
rq_dwtn_clusters = rq_dwtn_clusters.replace('Montréal','Montreal')
downtown_rfc_table = downtown_rfc_table[~downtown_rfc_table['city'].isin(['Dallas','Orlando','Mississauga','Hamilton','Oklahoma City'])]
downtown_rfc_table.loc[:,'cluster'] = downtown_rfc_table.loc[:,'city'].map(dict(zip(rq_dwtn_clusters['city'], rq_dwtn_clusters['cluster'])))

In [28]:
rq_city_clusters = rq_city_clusters.replace('Québec','Quebec')
rq_city_clusters = rq_city_clusters.replace('Montréal','Montreal')
city_rfc_table = city_rfc_table[~city_rfc_table['city'].isin(['Dallas','Orlando','Mississauga','Hamilton','Oklahoma City'])]
city_rfc_table.loc[:,'cluster'] = city_rfc_table.loc[:,'city'].map(dict(zip(rq_city_clusters['city'], rq_city_clusters['cluster'])))

In [29]:
lq_clusters = lq_clusters.replace('Québec','Quebec')
lq_clusters = lq_clusters.replace('Montréal','Montreal')
lq_clusters['cluster_map'] = lq_clusters['cluster'].map({0:"L1",1:"L3",2:"L2",3:"L4",4:"L1",5:"L1"})
lq_rfc_table = lq_rfc_table[~lq_rfc_table['city'].isin(['Dallas','Orlando','Mississauga','Hamilton','Oklahoma City'])].copy()
lq_rfc_table.loc[:,'cluster'] = lq_rfc_table.loc[:,'city'].map(dict(zip(lq_clusters['city'], lq_clusters['cluster_map'])))

In [30]:
rq_dwtn_clusters_period_1 = rq_dwtn_clusters_period_1.replace('Québec','Quebec')
rq_dwtn_clusters_period_1 = rq_dwtn_clusters_period_1.replace('Montréal','Montreal')
rq_dwtn_clusters_period_1['cluster_map'] = rq_dwtn_clusters_period_1['cluster'].map({0:"R1.1",1:"R1.5",2:"R1.3",3:"R1.4",4:"R1.2",5:"R1.1"})
downtown_rfc_table = downtown_rfc_table[~downtown_rfc_table['city'].isin(['Dallas','Orlando','Mississauga','Hamilton','Oklahoma City'])]
downtown_rfc_table.loc[:,'cluster_period_1'] = downtown_rfc_table.loc[:,'city'].map(dict(zip(rq_dwtn_clusters_period_1['city'], rq_dwtn_clusters_period_1['cluster_map'])))

In [31]:
rq_dwtn_clusters_period_2 = rq_dwtn_clusters_period_2.replace('Québec','Quebec')
rq_dwtn_clusters_period_2 = rq_dwtn_clusters_period_2.replace('Montréal','Montreal')
rq_dwtn_clusters_period_2['cluster_map'] = rq_dwtn_clusters_period_2['cluster'].map({0:"R2.5",1:"R2.1",2:"R2.3",3:"R2.1",4:"R2.4",5:"R2.2"})
downtown_rfc_table = downtown_rfc_table[~downtown_rfc_table['city'].isin(['Dallas','Orlando','Mississauga','Hamilton','Oklahoma City'])]
downtown_rfc_table.loc[:,'cluster_period_2'] = downtown_rfc_table.loc[:,'city'].map(dict(zip(rq_dwtn_clusters_period_2['city'], rq_dwtn_clusters_period_2['cluster_map'])))

In [42]:
set_seed = 1

In [43]:

from sklearn.model_selection import RandomizedSearchCV
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
# Number of features to consider at every split
max_features = ['sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]

# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}

X = downtown_rfc_table.drop(columns=['city','cluster','cluster_period_1','cluster_period_2']).to_numpy()
y = downtown_rfc_table['cluster_period_2'].to_numpy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = set_seed)

rf = RandomForestClassifier( random_state = set_seed)
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2,  random_state = set_seed, n_jobs = -1)
rf_random.fit(X_train, y_train)
rf_random.best_params_

Fitting 3 folds for each of 100 candidates, totalling 300 fits




[CV] END bootstrap=True, max_depth=90, max_features=sqrt, min_samples_leaf=4, min_samples_split=5, n_estimators=400; total time=   0.5s
[CV] END bootstrap=True, max_depth=50, max_features=sqrt, min_samples_leaf=4, min_samples_split=10, n_estimators=600; total time=   0.8s
[CV] END bootstrap=True, max_depth=80, max_features=sqrt, min_samples_leaf=4, min_samples_split=10, n_estimators=600; total time=   0.9s
[CV] END bootstrap=True, max_depth=80, max_features=sqrt, min_samples_leaf=4, min_samples_split=10, n_estimators=600; total time=   0.9s
[CV] END bootstrap=True, max_depth=90, max_features=sqrt, min_samples_leaf=4, min_samples_split=5, n_estimators=400; total time=   0.9s
[CV] END bootstrap=True, max_depth=50, max_features=sqrt, min_samples_leaf=4, min_samples_split=10, n_estimators=600; total time=   1.0s
[CV] END bootstrap=True, max_depth=80, max_features=sqrt, min_samples_leaf=4, min_samples_split=10, n_estimators=600; total time=   1.0s
[CV] END bootstrap=True, max_depth=90, max_

{'n_estimators': 1000,
 'min_samples_split': 10,
 'min_samples_leaf': 1,
 'max_features': 'sqrt',
 'max_depth': 100,
 'bootstrap': False}

In [44]:
def generate_random_forest_classifier(X, y, indextable):
    sc = StandardScaler()
    X = sc.fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.001, 
                                                        random_state = set_seed
                                                        )
    model = RandomForestClassifier(n_estimators = 1000,
                                   min_samples_split = 10,
                                   min_samples_leaf = 1,
                                   max_features = 'sqrt',
                                   random_state = set_seed,
                                   max_depth = 100,
                                   bootstrap = True) 
    model.fit(X_train, y_train)
    train_score = model.score(X_train, y_train)
    test_score = model.score(X_test, y_test)
    #feature_importances = pd.Series(model.feature_importances_, index = indextable.drop(columns=['city','cluster','cluster_period_1','cluster_period_2']).columns).sort_values(ascending = False)
    feature_importances = pd.Series(model.feature_importances_, index = indextable.columns).sort_values(ascending = False) #,'cluster_period_1','cluster_period_2'
    return feature_importances, train_score, test_score

In [45]:
#Generate RFC Downtown
X_dwtn = downtown_rfc_table.drop(columns=['city','cluster','cluster_period_1','cluster_period_2']).to_numpy()
y_dwtn = downtown_rfc_table['cluster'].to_numpy()
feature_importances, train_score, test_score = generate_random_forest_classifier(X_dwtn,y_dwtn, downtown_rfc_table.drop(columns=['city','cluster','cluster_period_1','cluster_period_2']))
print('train score: '+str(train_score))
print('test score: '+str(test_score))
feature_importances[0:60]

train score: 0.9672131147540983
test score: 0.0


pct_jobs_information                             0.050694
pct_jobs_professional_science_techical           0.050088
pct_commute_public_transit_city                  0.036146
pct_jobs_public_administration                   0.032539
pct_jobs_construction                            0.031297
pct_commute_bicycle_city                         0.029324
pct_renter_downtown                              0.029032
pct_commute_auto_city                            0.028934
pct_jobs_agriculture_forestry_fishing_hunting    0.028743
pct_jobs_management_of_companies_enterprises     0.027658
winter_avg_temp                                  0.027314
employment_density_downtown                      0.027209
pct_multifam_downtown                            0.026452
pct_jobs_finance_insurance                       0.025542
pct_jobs_accomodation_food_services              0.024901
pct_vacant_downtown                              0.024334
pct_commute_walk_city                            0.023606
total_pop_down

In [46]:
#Generate RFC City
X_city = city_rfc_table.drop(columns=['city','cluster']).to_numpy()
y_city = city_rfc_table['cluster'].to_numpy()
feature_importances, train_score, test_score = generate_random_forest_classifier(X_city,y_city, city_rfc_table.drop(columns=['city','cluster']))
print('train score: '+str(train_score))
feature_importances[0:60]

train score: 0.9344262295081968


pct_jobs_information                             0.043303
employment_density_downtown                      0.040934
pct_jobs_retail_trade                            0.039469
bachelor_plus_city                               0.038400
pct_commute_bicycle_city                         0.036469
days_cancel_large_events                         0.031272
population_density_city                          0.030011
days_stay_home_requirements                      0.029746
pct_multifam_city                                0.029207
pct_commute_auto_city                            0.028154
housing_density_city                             0.027461
pct_renter_city                                  0.025135
pct_jobs_public_administration                   0.024926
average_commute_time_city                        0.024149
pct_commute_public_transit_city                  0.023854
pct_jobs_finance_insurance                       0.023534
pct_jobs_management_of_companies_enterprises     0.023288
pct_jobs_accom

In [47]:
#Generate RFC LQ
X_lq = lq_rfc_table.drop(columns=['city','cluster']).to_numpy()
y_lq = lq_rfc_table['cluster'].to_numpy()
feature_importances, train_score, test_score = generate_random_forest_classifier(X_lq,y_lq, lq_rfc_table.drop(columns=['city','cluster']))
print('train score: '+str(train_score))
feature_importances[0:60]

train score: 1.0


pct_jobs_professional_science_techical           0.055936
median_hhinc_downtown                            0.052654
winter_avg_temp                                  0.048340
bachelor_plus_downtown                           0.045191
average_commute_time_city                        0.038261
employment_density_downtown                      0.037072
pct_jobs_information                             0.036053
pct_jobs_educational_services                    0.031265
pct_jobs_manufacturing                           0.027202
pct_jobs_public_administration                   0.026518
median_rent_downtown                             0.025039
pct_jobs_retail_trade                            0.023591
pct_jobs_finance_insurance                       0.021721
total_pop_downtown                               0.021423
pct_jobs_transport_warehouse                     0.021134
pct_jobs_utilities                               0.020859
pct_commute_public_transit_city                  0.019854
housing_units_

In [51]:
#Generate RFC Downtown Period 1 Jun 20 - May 21
X_dwtn = downtown_rfc_table.drop(columns=['city','cluster','cluster_period_1','cluster_period_2']).to_numpy()
y_dwtn = downtown_rfc_table['cluster_period_1'].to_numpy()
feature_importances, train_score, test_score = generate_random_forest_classifier(X_dwtn,y_dwtn, downtown_rfc_table.drop(columns=['city','cluster','cluster_period_1','cluster_period_2']))
print('train score: '+str(train_score))
print('test score: '+str(test_score))
feature_importances.to_csv('~/data/downtownrecovery/rfc_outputs/downtown_period1.csv')
display(pd.DataFrame(feature_importances.reset_index()))

train score: 1.0
test score: 1.0


Unnamed: 0,index,0
0,pct_jobs_information,0.063499
1,pct_jobs_professional_science_techical,0.058263
2,pct_multifam_downtown,0.037596
3,pct_jobs_construction,0.036688
4,pct_commute_public_transit_city,0.036301
5,pct_renter_downtown,0.03517
6,pct_commute_auto_city,0.031075
7,pct_commute_walk_city,0.027426
8,pct_jobs_educational_services,0.026394
9,employment_density_downtown,0.02514


In [52]:
#Generate RFC Downtown Period 2 Jun 21 - May 22
X_dwtn = downtown_rfc_table.drop(columns=['city','cluster','cluster_period_1','cluster_period_2']).to_numpy()
y_dwtn = downtown_rfc_table['cluster_period_2'].to_numpy()
feature_importances, train_score, test_score = generate_random_forest_classifier(X_dwtn,y_dwtn, downtown_rfc_table.drop(columns=['city','cluster','cluster_period_1','cluster_period_2']))
print('train score: '+str(train_score))
print('test score: '+str(test_score))
feature_importances.to_csv('~/data/downtownrecovery/rfc_outputs/downtown_period2.csv')
display(pd.DataFrame(feature_importances.reset_index()))

train score: 0.9672131147540983
test score: 1.0


Unnamed: 0,index,0
0,pct_jobs_professional_science_techical,0.057053
1,pct_multifam_downtown,0.038658
2,summer_avg_temp,0.037935
3,pct_commute_auto_city,0.034444
4,pct_commute_public_transit_city,0.03411
5,employment_density_downtown,0.03371
6,winter_avg_temp,0.0323
7,pct_commute_bicycle_city,0.032269
8,days_income_support,0.030851
9,pct_jobs_manufacturing,0.030467


In [53]:
#Generate RFC LQ
X_dwtn = lq_rfc_table.drop(columns=['city','cluster']).to_numpy()
y_dwtn = lq_rfc_table['cluster'].to_numpy()
feature_importances, train_score, test_score = generate_random_forest_classifier(X_dwtn,y_dwtn, lq_rfc_table.drop(columns=['city','cluster']))
print('train score: '+str(train_score))
print('test score: '+str(test_score))
feature_importances.to_csv('~/data/downtownrecovery/rfc_outputs/lq.csv')
display(pd.DataFrame(feature_importances.reset_index()))

train score: 1.0
test score: 1.0


Unnamed: 0,index,0
0,pct_jobs_professional_science_techical,0.055936
1,median_hhinc_downtown,0.052654
2,winter_avg_temp,0.04834
3,bachelor_plus_downtown,0.045191
4,average_commute_time_city,0.038261
5,employment_density_downtown,0.037072
6,pct_jobs_information,0.036053
7,pct_jobs_educational_services,0.031265
8,pct_jobs_manufacturing,0.027202
9,pct_jobs_public_administration,0.026518


In [None]:
normalized_visits = get_table_as_pandas_df('0714_combined_metrics_df')

In [None]:
normalized_visits_chronicle = normalized_visits[normalized_visits['metric']=='downtown'][['city','normalized_visits_by_total_visits','week']]
normalized_visits_chronicle = normalized_visits_chronicle[normalized_visits_chronicle['normalized_visits_by_total_visits'].notna()]
normalized_visits_chronicle['city'] = normalized_visits_chronicle['city'].replace('Québec','Quebec')
normalized_visits_chronicle = normalized_visits_chronicle[normalized_visits_chronicle['city'].isin(['Calgary','Quebec','San Francisco','Los Angeles','San Jose','Seattle','New York','Washington DC','Salt Lake City','Atlanta','Cleveland'])]
display(normalized_visits_chronicle.sort_values(['city','week']).reset_index(drop='True'))

In [None]:
normalized_visits_chronicle['week'].value_counts()

In [None]:
normalized_visits_by_state_scalingnormalized_visits[normalized_visits['metric']=='downtown']['normalized_visits_by_state_scaling'].isna().sum()

In [None]:
normalized_visits = get_table_as_pandas_df('0714_combined_metrics_df')[['city','metric','normalized_visits_by_total_visits','week']].replace('Montréal','Montreal').replace('Québec','Quebec')
normalized_visits = normalized_visits[~normalized_visits["city"].isin(['Dallas','Orlando','Mississauga','Hamilton','Oklahoma City'])].copy()
normalized_visits_city = normalized_visits[normalized_visits["metric"]=="metro"].copy()
normalized_visits_city['cluster'] = normalized_visits_city['city'].map(dict(zip(rq_city_clusters['city'], rq_city_clusters['cluster']))).astype(int)

In [None]:
city_rfc_table = city_rfc_table[~city_rfc_table['city'].isin(['Dallas','Orlando','Mississauga','Hamilton','Oklahoma City'])]
city_rfc_table.loc[:,'cluster'] = city_rfc_table.loc[:,'city'].map(dict(zip(rq_city_clusters['city'], rq_city_clusters['cluster'])))

In [None]:
mean_city = normalized_visits_city.groupby(['cluster','week']).mean()['normalized_visits_by_total_visits'].reset_index()

In [None]:
max_city = normalized_visits_city.groupby(['cluster','week']).max()['normalized_visits_by_total_visits'].reset_index()

In [None]:
min_city = normalized_visits_city.groupby(['cluster','week']).min()['normalized_visits_by_total_visits'].reset_index()

In [None]:
mean_city["week"] = pd.to_datetime(mean_city["week"])

In [None]:
mean_city_rolling = pd.DataFrame()
for i in mean_city["cluster"].unique():
    mean_city_rolling_cluster = mean_city[mean_city["cluster"]==i].copy()
    mean_city_rolling_cluster['rolling'] = mean_city_rolling_cluster['normalized_visits_by_total_visits'].rolling(window=9).mean().shift(-4)
    mean_city_rolling = pd.concat([mean_city_rolling,mean_city_rolling_cluster])

In [None]:
import seaborn as sns
#sns.set_style('darkgrid')
sns.set(rc={'figure.figsize':(14,8)})
ax = sns.lineplot(data=mean_city_rolling, x ='week', y = 'rolling', hue='cluster', palette='tab10')
plt.ylabel('City RQ')
plt.xlabel('Date')
plt.show()

In [None]:
import datetime
import seaborn as sns
datetime.date(2020, 6, 1)

In [None]:
normalized_visits_dwtn = normalized_visits[normalized_visits["metric"]=="downtown"].copy()
normalized_visits_dwtn['cluster'] = normalized_visits_dwtn['city'].map(dict(zip(rq_dwtn_clusters['city'], rq_dwtn_clusters['cluster']))).astype(int)
#mean_dwtn = normalized_visits_dwtn.groupby(['cluster','week']).mean()['normalized_visits_by_total_visits'].reset_index()
normalized_visits_dwtn["week"] = pd.to_datetime(normalized_visits_dwtn["week"]).dt.date
normalized_visits_dwtn[normalized_visits_dwtn.isnull()]

In [None]:
 grouped.loc[:,('rolling', 'median')]

In [None]:
normalized_visits_dwtn = normalized_visits[normalized_visits["metric"]=="downtown"].copy()
normalized_visits_dwtn['cluster'] = normalized_visits_dwtn['city'].map(dict(zip(rq_dwtn_clusters['city'], rq_dwtn_clusters['cluster']))).astype(int)
#mean_dwtn = normalized_visits_dwtn.groupby(['cluster','week']).mean()['normalized_visits_by_total_visits'].reset_index()
normalized_visits_dwtn["week"] = pd.to_datetime(normalized_visits_dwtn["week"]).dt.date
normalized_visits_dwtn = normalized_visits_dwtn.dropna()
normalized_visits_dwtn = normalized_visits_dwtn[normalized_visits_dwtn["week"]>datetime.date(2020, 6, 1)]
normalized_visits_dwtn['rolling'] = normalized_visits_dwtn.sort_values(by = 'week').groupby(['city'])['normalized_visits_by_total_visits'].transform(lambda x : x.rolling(window=19).mean().shift(-9))
normalized_visits_dwtn = normalized_visits_dwtn.dropna()

# compute the min, median and max
grouped = normalized_visits_dwtn.groupby(["cluster", "week"]).agg({'rolling': ['min', 'median', 'max']}).unstack("cluster")
grouped = grouped.dropna()

# plot medians
ax = grouped.loc[:,('rolling', 'median')].plot(figsize = (16, 12), sharey = True)

# Getting the color palette used
palette = sns.color_palette()

# index is now the number of clusters and indexes into grouped by cluster #
index = 0
for index in np.arange(0, len(normalized_visits_dwtn['cluster'].unique())):
        if index < 10: 
            ax.fill_between(grouped.index, grouped.loc[:,('rolling', 'median', index)], 
                    grouped.loc[:,('rolling', 'max', index)], alpha=.2, color=palette[index])
            ax.fill_between(grouped.index, 
                    grouped.loc[:,('rolling', 'min', index)] , grouped.loc[:,('rolling', 'median', index)], alpha=.2, color=palette[index])
plt.ylabel('Downtown RQ')
plt.xlabel('Date')
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.title('Downtown recovery cluster extrema')
plt.show()

In [None]:
normalized_visits_dwtn = normalized_visits[normalized_visits["metric"]=="downtown"].copy()
normalized_visits_dwtn['cluster'] = normalized_visits_dwtn['city'].map(dict(zip(rq_dwtn_clusters['city'], rq_dwtn_clusters['cluster']))).astype(int)
mean_dwtn = normalized_visits_dwtn.groupby(['cluster','week']).mean()['normalized_visits_by_total_visits'].reset_index()
mean_dwtn["week"] = pd.to_datetime(mean_dwtn["week"]).dt.date
mean_dwtn = mean_dwtn[mean_dwtn["week"]>datetime.date(2020, 6, 1)]
mean_dwtn['rolling'] = mean_dwtn.groupby('cluster')['normalized_visits_by_total_visits'].transform(lambda x : x.rolling(window=19).mean().shift(-9))
sns.set(rc={'figure.figsize':(14,10)})
ax = sns.lineplot(data=mean_dwtn, x ='week', y = 'rolling', hue='cluster', palette='tab10')
plt.ylabel('Downtown RQ')
plt.xlabel('Date')
plt.show()

In [None]:
normalized_visits_dwtn[normalized_visits_dwtn["week"]==datetime.date(2020,12,7)]

In [None]:
clusters = get_table_as_pandas_df('rq_dwtn_clusters_0823_period_2').replace('Montréal','Montreal').replace('Québec','Quebec')
normalized_visits_dwtn = normalized_visits[normalized_visits["metric"]=="downtown"].copy()
normalized_visits_dwtn['cluster'] = normalized_visits_dwtn['city'].map(dict(zip(clusters['city'], clusters['cluster']))).astype(int)
normalized_visits_dwtn['rolling'] = normalized_visits_dwtn.groupby('city')['normalized_visits_by_total_visits'].transform(lambda x : x.rolling(window=9).mean().shift(-4))
normalized_visits_dwtn["week"] = pd.to_datetime(normalized_visits_dwtn["week"]).dt.date
normalized_visits_dwtn.week.unique()

In [None]:
clusters = get_table_as_pandas_df('rq_dwtn_clusters_0823_period_2').replace('Montréal','Montreal').replace('Québec','Quebec')
normalized_visits_dwtn = normalized_visits[normalized_visits["metric"]=="downtown"].copy()
normalized_visits_dwtn['cluster'] = normalized_visits_dwtn['city'].map(dict(zip(clusters['city'], clusters['cluster']))).astype(int)
normalized_visits_dwtn['rolling'] = normalized_visits_dwtn.groupby('city')['normalized_visits_by_total_visits'].transform(lambda x : x.rolling(window=5).mean().shift(-3))
normalized_visits_dwtn["week"] = pd.to_datetime(mean_dwtn["week"]).dt.date
#mean_dwtn = normalized_visits_dwtn.groupby(['cluster','week']).mean()['normalized_visits_by_total_visits'].reset_index()
#mean_dwtn["week"] = pd.to_datetime(mean_dwtn["week"]).dt.date
#mean_dwtn = mean_dwtn[mean_dwtn["week"]>datetime.date(2020, 6, 1)]
#mean_dwtn['rolling'] = mean_dwtn.groupby('cluster')['normalized_visits_by_total_visits'].transform(lambda x : x.rolling(window=9).mean().shift(-4))
sns.set(rc={'figure.figsize':(14,10)})
ax = sns.lineplot(data=normalized_visits_dwtn, x ='week', y = 'rolling', hue='cluster', palette='tab10')
plt.ylabel('Downtown RQ')
plt.xlabel('Date')
plt.show()

In [None]:
clusters = get_table_as_pandas_df('rq_dwtn_clusters_0823_period_2').replace('Montréal','Montreal').replace('Québec','Quebec')
normalized_visits_dwtn = normalized_visits[normalized_visits["metric"]=="downtown"].copy()
normalized_visits_dwtn['cluster'] = normalized_visits_dwtn['city'].map(dict(zip(clusters['city'], clusters['cluster']))).astype(int)
normalized_visits_dwtn['rolling'] = normalized_visits_dwtn.groupby('city')['normalized_visits_by_total_visits'].transform(lambda x : x.rolling(window=5).mean().shift(-3))
#normalized_visits_dwtn["week"] = pd.to_datetime(mean_dwtn["week"]).dt.date
normalized_visits_dwtn["color"] = normalized_visits_dwtn["cluster"].map({0:'blue', 1:'green',2:'orange',3:'red',4:'pink',5:'black'})
sns.set(rc={'figure.figsize':(14,10)})
for city in normalized_visits_dwtn['city'].unique():
    city_data = normalized_visits_dwtn[normalized_visits_dwtn['city']==city]
    ax = plt.plot(city_data['week'], city_data['rolling'], color=city_data['color'].iloc[0], linewidth=0.5)
plt.ylabel('Downtown RQ')
plt.xlabel('Date')
plt.show()

In [None]:
import seaborn as sns

In [None]:
clusters = get_table_as_pandas_df('rq_dwtn_clusters_1015_period_1').replace('Montréal','Montreal').replace('Québec','Quebec')
normalized_visits = get_table_as_pandas_df('metrics_1015').replace('Montréal','Montreal').replace('Québec','Quebec')
normalized_visits = normalized_visits[~normalized_visits['city'].isin(['Orlando','Dallas','Hamilton','Mississauga','Oklahoma City'])]
normalized_visits_dwtn = normalized_visits[normalized_visits["metric"]=="downtown"].copy()
normalized_visits_dwtn['cluster'] = normalized_visits_dwtn['city'].map(dict(zip(clusters['city'], clusters['cluster']))).astype(int)
normalized_visits_dwtn['rolling'] = normalized_visits_dwtn.groupby('city')['normalized_visits_by_total_visits'].transform(lambda x : x.rolling(window=9).mean().shift(-4))
normalized_visits_dwtn = normalized_visits_dwtn.sort_values(by='week')
normalized_visits_dwtn = normalized_visits_dwtn[normalized_visits_dwtn['week']<'2022-06-01']
normalized_visits_dwtn["week"] = pd.to_datetime(normalized_visits_dwtn["week"].values)
sns.set(rc={'figure.figsize':(14,10)})
ax = sns.lineplot(data=normalized_visits_dwtn, x ='week', y = 'rolling', hue='cluster', palette='tab10', units='city', alpha=0.3, estimator=None)#, ci='sd')
plt.ylabel('Downtown RQ')
plt.xlabel('Date')
plt.show()

In [None]:
clusters = get_table_as_pandas_df('rq_dwtn_clusters_0823_period_2').replace('Montréal','Montreal').replace('Québec','Quebec')
normalized_visits = get_table_as_pandas_df('metrics_1015').replace('Montréal','Montreal').replace('Québec','Quebec')
normalized_visits = normalized_visits[~normalized_visits['city'].isin(['Orlando','Dallas','Hamilton','Mississauga','Oklahoma City'])]
normalized_visits_dwtn = normalized_visits[normalized_visits["metric"]=="downtown"].copy()
normalized_visits_dwtn['cluster'] = normalized_visits_dwtn['city'].map(dict(zip(clusters['city'], clusters['cluster']))).astype(int)
normalized_visits_dwtn['rolling'] = normalized_visits_dwtn.groupby('city')['normalized_visits_by_total_visits'].transform(lambda x : x.rolling(window=9).mean().shift(-4))
normalized_visits_dwtn = normalized_visits_dwtn.sort_values(by='week')
normalized_visits_dwtn = normalized_visits_dwtn[normalized_visits_dwtn['week']<'2022-06-01']
normalized_visits_dwtn["week"] = pd.to_datetime(normalized_visits_dwtn["week"].values)
#mean_dwtn = normalized_visits_dwtn.groupby(['cluster','week']).mean()['normalized_visits_by_total_visits'].reset_index()
#mean_dwtn["week"] = pd.to_datetime(mean_dwtn["week"]).dt.date
#mean_dwtn = mean_dwtn[mean_dwtn["week"]>datetime.date(2020, 6, 1)]
#mean_dwtn['rolling'] = mean_dwtn.groupby('cluster')['normalized_visits_by_total_visits'].transform(lambda x : x.rolling(window=9).mean().shift(-4))
sns.set(rc={'figure.figsize':(14,10)})
ax = sns.lineplot(data=normalized_visits_dwtn, x ='week', y = 'rolling', hue='cluster', palette='tab10', units='city', alpha=0.3, estimator=None)#, ci='sd')
plt.ylabel('Downtown RQ')
plt.xlabel('Date')
plt.show()

In [None]:
normalized_visits = get_table_as_pandas_df('0714_combined_metrics_df')[['city','metric','normalized_visits_by_total_visits','week']].replace('Montréal','Montreal').replace('Québec','Quebec')
normalized_visits = normalized_visits[~normalized_visits["city"].isin(['Dallas','Orlando','Mississauga','Hamilton','Oklahoma City'])].copy()
normalized_visits_lq = normalized_visits[normalized_visits["metric"]=="relative"].copy()
normalized_visits_lq['cluster'] = normalized_visits_city['city'].map(dict(zip(rq_city_clusters['city'], rq_city_clusters['cluster']))).astype(int)

In [None]:
import seaborn as sns
sns.set_style("whitegrid")

In [None]:
clusters = get_table_as_pandas_df('rq_dwtn_clusters_0823_period_1').replace('Montréal','Montreal').replace('Québec','Quebec')
normalized_visits = get_table_as_pandas_df('0714_combined_metrics_df').replace('Montréal','Montreal').replace('Québec','Quebec')
normalized_visits = normalized_visits[~normalized_visits['city'].isin(['Orlando','Dallas','Hamilton','Mississauga','Oklahoma City'])]
normalized_visits_dwtn = normalized_visits[normalized_visits["metric"]=="relative"].copy()
normalized_visits_dwtn['cluster'] = normalized_visits_dwtn['city'].map(dict(zip(clusters['city'], clusters['cluster']))).astype(int)
normalized_visits_dwtn['rolling'] = normalized_visits_dwtn.groupby('city')['normalized_visits_by_total_visits'].transform(lambda x : x.rolling(window=15).mean().shift(-7))
normalized_visits_dwtn = normalized_visits_dwtn.sort_values(by='week')
normalized_visits_dwtn = normalized_visits_dwtn[normalized_visits_dwtn['week']<'2022-06-01']
normalized_visits_dwtn["week"] = pd.to_datetime(normalized_visits_dwtn["week"].values)
#mean_dwtn = normalized_visits_dwtn.groupby(['cluster','week']).mean()['normalized_visits_by_total_visits'].reset_index()
#mean_dwtn["week"] = pd.to_datetime(mean_dwtn["week"]).dt.date
#mean_dwtn = mean_dwtn[mean_dwtn["week"]>datetime.date(2020, 6, 1)]
#mean_dwtn['rolling'] = mean_dwtn.groupby('cluster')['normalized_visits_by_total_visits'].transform(lambda x : x.rolling(window=9).mean().shift(-4))
sns.set(rc={'figure.figsize':(14,10)})
ax = sns.lineplot(data=normalized_visits_dwtn, x ='week', y = 'rolling', hue='cluster', palette='tab10', units='city', alpha=0.3, estimator=None)#, ci='sd')
plt.ylabel('LQ')
plt.xlabel('Date')
plt.show()

In [None]:
clusters = get_table_as_pandas_df('rq_dwtn_clusters_0823_period_2').replace('Montréal','Montreal').replace('Québec','Quebec')
normalized_visits = get_table_as_pandas_df('0714_combined_metrics_df').replace('Montréal','Montreal').replace('Québec','Quebec')
normalized_visits = normalized_visits[~normalized_visits['city'].isin(['Orlando','Dallas','Hamilton','Mississauga','Oklahoma City'])]
normalized_visits_dwtn = normalized_visits[normalized_visits["metric"]=="relative"].copy()
normalized_visits_dwtn['cluster'] = normalized_visits_dwtn['city'].map(dict(zip(clusters['city'], clusters['cluster']))).astype(int)
normalized_visits_dwtn['rolling'] = normalized_visits_dwtn.groupby('city')['normalized_visits_by_total_visits'].transform(lambda x : x.rolling(window=15).mean().shift(-7))
normalized_visits_dwtn = normalized_visits_dwtn.sort_values(by='week')
normalized_visits_dwtn = normalized_visits_dwtn[normalized_visits_dwtn['week']<'2022-06-01']
normalized_visits_dwtn["week"] = pd.to_datetime(normalized_visits_dwtn["week"].values)
#mean_dwtn = normalized_visits_dwtn.groupby(['cluster','week']).mean()['normalized_visits_by_total_visits'].reset_index()
#mean_dwtn["week"] = pd.to_datetime(mean_dwtn["week"]).dt.date
#mean_dwtn = mean_dwtn[mean_dwtn["week"]>datetime.date(2020, 6, 1)]
#mean_dwtn['rolling'] = mean_dwtn.groupby('cluster')['normalized_visits_by_total_visits'].transform(lambda x : x.rolling(window=9).mean().shift(-4))
sns.set(rc={'figure.figsize':(14,10)})
ax = sns.lineplot(data=normalized_visits_dwtn, x ='week', y = 'rolling', hue='cluster', palette='tab10', units='city', alpha=0.3, estimator=None)#, ci='sd')
plt.ylabel('LQ')
plt.xlabel('Date')
plt.show()

In [None]:
clusters = get_table_as_pandas_df('lq_clusters_0824_period_2_new').replace('Montréal','Montreal').replace('Québec','Quebec')
normalized_visits = get_table_as_pandas_df('0714_combined_metrics_df').replace('Montréal','Montreal').replace('Québec','Quebec')
normalized_visits = normalized_visits[~normalized_visits['city'].isin(['Orlando','Dallas','Hamilton','Mississauga','Oklahoma City'])]
normalized_visits_dwtn = normalized_visits[normalized_visits["metric"]=="relative"].copy()
normalized_visits_dwtn['cluster'] = normalized_visits_dwtn['city'].map(dict(zip(clusters['city'], clusters['cluster']))).astype(int)
normalized_visits_dwtn['rolling'] = normalized_visits_dwtn.groupby('city')['normalized_visits_by_total_visits'].transform(lambda x : x.rolling(window=15).mean().shift(-7))
normalized_visits_dwtn = normalized_visits_dwtn.sort_values(by='week')
normalized_visits_dwtn = normalized_visits_dwtn[normalized_visits_dwtn['week']<'2022-06-01']
normalized_visits_dwtn["week"] = pd.to_datetime(normalized_visits_dwtn["week"].values)
#mean_dwtn = normalized_visits_dwtn.groupby(['cluster','week']).mean()['normalized_visits_by_total_visits'].reset_index()
#mean_dwtn["week"] = pd.to_datetime(mean_dwtn["week"]).dt.date
#mean_dwtn = mean_dwtn[mean_dwtn["week"]>datetime.date(2020, 6, 1)]
#mean_dwtn['rolling'] = mean_dwtn.groupby('cluster')['normalized_visits_by_total_visits'].transform(lambda x : x.rolling(window=9).mean().shift(-4))
sns.set(rc={'figure.figsize':(14,10)})
ax = sns.lineplot(data=normalized_visits_dwtn, x ='week', y = 'rolling', hue='cluster', palette='tab10', units='city', alpha=0.3, estimator=None)#, ci='sd')
plt.ylabel('LQ')
plt.xlabel('Date')
plt.show()

In [None]:
display(get_table_as_pandas_df('lq_clusters_single_period_4'))

In [None]:
sns.set_theme(style="whitegrid")
clusters = get_table_as_pandas_df('lq_clusters_single_period_4').replace('Montréal','Montreal').replace('Québec','Quebec')
normalized_visits = get_table_as_pandas_df('0714_combined_metrics_df').replace('Montréal','Montreal').replace('Québec','Quebec')
normalized_visits = normalized_visits[~normalized_visits['city'].isin(['Orlando','Dallas','Hamilton','Mississauga','Oklahoma City'])]
normalized_visits_dwtn = normalized_visits[normalized_visits["metric"]=="relative"].copy()
normalized_visits_dwtn = normalized_visits_dwtn[normalized_visits_dwtn["normalized_visits_by_total_visits"].notna()]
normalized_visits_dwtn['cluster'] = normalized_visits_dwtn['city'].map(dict(zip(clusters['city'], clusters['cluster']))).astype(int)
normalized_visits_dwtn['rolling'] = normalized_visits_dwtn.groupby('city')['normalized_visits_by_total_visits'].transform(lambda x : x.rolling(window=13).mean().shift(-6))
normalized_visits_dwtn = normalized_visits_dwtn.sort_values(by='week')
#normalized_visits_dwtn = normalized_visits_dwtn[normalized_visits_dwtn['week']>'2020-11-01']
normalized_visits_dwtn = normalized_visits_dwtn[normalized_visits_dwtn['week']<'2022-06-01']
normalized_visits_dwtn["week"] = pd.to_datetime(normalized_visits_dwtn["week"])
normalized_visits_dwtn_avg = normalized_visits_dwtn.groupby(['week','cluster']).mean()['rolling'].reset_index()
sns.set(rc={'figure.figsize':(16,12)})
sns.set(font_scale = 1.15)
sns.set_style("whitegrid")
sns.lineplot(data=normalized_visits_dwtn, x ='week', y = 'rolling', hue='cluster', palette='tab10', units='city', alpha=0.25, estimator=None, linewidth=1.5)#, ci='sd')
sns.lineplot(data=normalized_visits_dwtn_avg, x ='week', y = 'rolling', hue='cluster', palette='tab10', units='cluster', alpha=0.8, estimator=None, linewidth=2.5)#, ci='sd')
plt.ylabel('Location Quotient')
plt.xlabel('Date (Year-Month)')
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)
plt.show()

In [None]:
display(get_table_as_pandas_df('rq_dwtn_clusters_0823_period_1'))

In [None]:
import seaborn as sns

In [None]:
sns.set_theme(style="whitegrid")
clusters = get_table_as_pandas_df('rq_dwtn_clusters_1015_period_1').replace('Montréal','Montreal').replace('Québec','Quebec')
normalized_visits = get_table_as_pandas_df('metrics_1015').replace('Montréal','Montreal').replace('Québec','Quebec')
normalized_visits = normalized_visits[~normalized_visits['city'].isin(['Orlando','Dallas','Hamilton','Mississauga','Oklahoma City'])]
normalized_visits_dwtn = normalized_visits[normalized_visits["metric"]=="downtown"].copy()
normalized_visits_dwtn = normalized_visits_dwtn[normalized_visits_dwtn["normalized_visits_by_total_visits"].notna()]
normalized_visits_dwtn['cluster'] = normalized_visits_dwtn['city'].map(dict(zip(clusters['city'], clusters['cluster']))).astype(int)
normalized_visits_dwtn = normalized_visits_dwtn.sort_values(by='week')
normalized_visits_dwtn['rolling'] = normalized_visits_dwtn.groupby('city')['normalized_visits_by_total_visits'].transform(lambda x : x.rolling(window=15).mean().shift(-7))
#normalized_visits_dwtn['cluster'] = normalized_visits_dwtn['cluster'].map({0:0,1:1,2:2,3:3,4:4,5:0})
normalized_visits_dwtn['cluster'] = normalized_visits_dwtn['cluster'].map({0:"R1.1",1:"R1.5",2:"R1.3",3:"R1.4",4:"R1.2",5:"R1.1"})
normalized_visits_dwtn = normalized_visits_dwtn[normalized_visits_dwtn['week']>'2020-06-01']
normalized_visits_dwtn = normalized_visits_dwtn[normalized_visits_dwtn['week']<'2021-06-01']
normalized_visits_dwtn["week"] = pd.to_datetime(normalized_visits_dwtn["week"])
normalized_visits_dwtn_avg = normalized_visits_dwtn.groupby(['week','cluster']).mean()['rolling'].reset_index()
sns.set(rc={'figure.figsize':(7.5,12)})
sns.set(font_scale = 1.15)
sns.set_style("whitegrid")
sns.lineplot(data=normalized_visits_dwtn, x ='week', y = 'rolling', hue='cluster', palette='tab10', units='city', alpha=0.25, estimator=None, linewidth=1.5)#, ci='sd')
sns.lineplot(data=normalized_visits_dwtn_avg, x ='week', y = 'rolling', hue='cluster', palette='tab10', units='cluster', alpha=0.8, estimator=None, linewidth=2.5)#, ci='sd')
plt.ylabel('Location Quotient')
plt.xlabel('Date (Year-Month)')
plt.ylim(0.1,1.6)
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)
plt.show()

In [None]:
display(get_table_as_pandas_df('rq_dwtn_clusters_1015_period_2'))

In [None]:
sns.set_theme(style="whitegrid")
clusters = get_table_as_pandas_df('rq_dwtn_clusters_1015_period_2').replace('Montréal','Montreal').replace('Québec','Quebec')
normalized_visits = get_table_as_pandas_df('metrics_1015').replace('Montréal','Montreal').replace('Québec','Quebec')
normalized_visits = normalized_visits[~normalized_visits['city'].isin(['Orlando','Dallas','Hamilton','Mississauga','Oklahoma City'])]
normalized_visits_dwtn = normalized_visits[normalized_visits["metric"]=="downtown"].copy()
normalized_visits_dwtn = normalized_visits_dwtn[normalized_visits_dwtn["normalized_visits_by_total_visits"].notna()]
normalized_visits_dwtn['cluster'] = normalized_visits_dwtn['city'].map(dict(zip(clusters['city'], clusters['cluster']))).astype(int)
normalized_visits_dwtn["week"] = pd.to_datetime(normalized_visits_dwtn["week"])
normalized_visits_dwtn = normalized_visits_dwtn.sort_values(by='week')
normalized_visits_dwtn['rolling'] = normalized_visits_dwtn.groupby('city')['normalized_visits_by_total_visits'].transform(lambda x : x.rolling(window=15).mean().shift(-7))
normalized_visits_dwtn['cluster'] = normalized_visits_dwtn['cluster'].map({0:0,1:1,2:2,3:3,4:4,5:3})
normalized_visits_dwtn = normalized_visits_dwtn[normalized_visits_dwtn['week']>'2021-06-01']
normalized_visits_dwtn = normalized_visits_dwtn[normalized_visits_dwtn['week']<'2022-05-15']
normalized_visits_dwtn["week"] = pd.to_datetime(normalized_visits_dwtn["week"])
normalized_visits_dwtn_avg = normalized_visits_dwtn.groupby(['week','cluster']).mean()['rolling'].reset_index()
sns.set(rc={'figure.figsize':(7.5,12)})
sns.set(font_scale = 1.15)
sns.set_style("whitegrid")
sns.lineplot(data=normalized_visits_dwtn, x ='week', y = 'rolling', hue='cluster', palette='husl', units='city', alpha=0.25, estimator=None, linewidth=1.5)#, ci='sd')
sns.lineplot(data=normalized_visits_dwtn_avg, x ='week', y = 'rolling', hue='cluster', palette='husl', units='cluster', alpha=0.8, estimator=None, linewidth=2.5)#, ci='sd')

plt.ylabel('Location Quotient')
plt.xlabel('Date (Year-Month)')
plt.ylim(0.1,1.6)
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)
plt.show()

In [None]:
sns.set_theme(style="whitegrid")
clusters = get_table_as_pandas_df('rq_dwtn_clusters_0823_period_2').replace('Montréal','Montreal').replace('Québec','Quebec')
normalized_visits = get_table_as_pandas_df('0714_combined_metrics_df').replace('Montréal','Montreal').replace('Québec','Quebec')
normalized_visits = normalized_visits[~normalized_visits['city'].isin(['Orlando','Dallas','Hamilton','Mississauga','Oklahoma City'])]
normalized_visits_dwtn = normalized_visits[normalized_visits["metric"]=="downtown"].copy()
normalized_visits_dwtn = normalized_visits_dwtn[~normalized_visits_dwtn["normalized_visits_by_total_visits"].isna()]
normalized_visits_dwtn = normalized_visits_dwtn[normalized_visits_dwtn['week']>'2021-06-01']
normalized_visits_dwtn = normalized_visits_dwtn[normalized_visits_dwtn['week']<'2022-05-31']
normalized_visits_dwtn["week"] = pd.to_datetime(normalized_visits_dwtn["week"])
normalized_visits_dwtn = normalized_visits_dwtn.sort_values(by='week')
normalized_visits_dwtn['cluster'] = normalized_visits_dwtn['city'].map(dict(zip(clusters['city'], clusters['cluster']))).astype(int)
normalized_visits_dwtn['normalized_visits_by_total_visits'] = normalized_visits_dwtn['normalized_visits_by_total_visits'].astype(float)
normalized_visits_dwtn['rolling'] = normalized_visits_dwtn.groupby('city')['normalized_visits_by_total_visits'].transform(lambda x : x.rolling(window=15).mean().shift(-7))
normalized_visits_dwtn['cluster'] = normalized_visits_dwtn['cluster'].map({0:0,1:1,2:2,3:3,4:4,5:1})
normalized_visits_dwtn_avg = normalized_visits_dwtn.groupby(['week','cluster']).mean()['rolling'].reset_index()
sns.set(rc={'figure.figsize':(7.5,12)})
sns.set(font_scale = 1.15)
sns.set_style("whitegrid")
sns.lineplot(data=normalized_visits_dwtn, x ='week', y = 'rolling', hue='cluster', palette='husl', units='city', alpha=0.25, estimator=None, linewidth=1.5)#, ci='sd')
sns.lineplot(data=normalized_visits_dwtn_avg, x ='week', y = 'rolling', hue='cluster', palette='husl', units='cluster', alpha=0.8, estimator=None, linewidth=2.5)#, ci='sd')
plt.ylabel('Location Quotient')
plt.xlabel('Date (Year-Month)')
plt.ylim(0.1,1.6)
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)
plt.show()

In [None]:
import seaborn as sns
sns.set_theme(style="whitegrid")
clusters = get_table_as_pandas_df('rq_dwtn_clusters_0831_all_2').replace('Montréal','Montreal').replace('Québec','Quebec')
normalized_visits = get_table_as_pandas_df('0714_combined_metrics_df').replace('Montréal','Montreal').replace('Québec','Quebec')
normalized_visits = normalized_visits[~normalized_visits['city'].isin(['Orlando','Dallas','Hamilton','Mississauga','Oklahoma City'])]
normalized_visits_dwtn = normalized_visits[normalized_visits["metric"]=="downtown"].copy()
normalized_visits_dwtn = normalized_visits_dwtn[normalized_visits_dwtn["normalized_visits_by_total_visits"].notna()]
normalized_visits_dwtn['cluster'] = normalized_visits_dwtn['city'].map(dict(zip(clusters['city'], clusters['cluster']))).astype(int)
normalized_visits_dwtn = normalized_visits_dwtn.sort_values(by='week')
normalized_visits_dwtn['rolling'] = normalized_visits_dwtn.groupby('city')['normalized_visits_by_total_visits'].transform(lambda x : x.rolling(window=15).mean().shift(-7))
normalized_visits_dwtn['cluster'] = normalized_visits_dwtn['cluster'].map({0:0,1:1,2:2,3:3,4:4,5:0})
normalized_visits_dwtn = normalized_visits_dwtn[normalized_visits_dwtn['week']>'2020-06-01']
normalized_visits_dwtn = normalized_visits_dwtn[normalized_visits_dwtn['week']<'2021-06-01']
normalized_visits_dwtn["week"] = pd.to_datetime(normalized_visits_dwtn["week"])
normalized_visits_dwtn_avg = normalized_visits_dwtn.groupby(['week','cluster']).mean()['rolling'].reset_index()
sns.set(rc={'figure.figsize':(15,12)})
sns.set(font_scale = 1.15)
sns.set_style("whitegrid")
sns.lineplot(data=normalized_visits_dwtn, x ='week', y = 'rolling', hue='cluster', palette='tab10', units='city', alpha=0.25, estimator=None, linewidth=1.5)#, ci='sd')
sns.lineplot(data=normalized_visits_dwtn_avg, x ='week', y = 'rolling', hue='cluster', palette='tab10', units='cluster', alpha=0.8, estimator=None, linewidth=2.5)#, ci='sd')
plt.ylabel('Location Quotient')
plt.xlabel('Date (Year-Month)')
plt.ylim(0.1,1.6)
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)
plt.show()

In [None]:
display(normalized_visits_dwtn)

In [None]:
normalized_visits_dwtn.groupby(['week','cluster']).mean()['rolling'].reset_index()

In [None]:
clusters = get_table_as_pandas_df('rq_dwtn_clusters_0823_period_1').replace('Montréal','Montreal').replace('Québec','Quebec')
normalized_visits_dwtn = normalized_visits[normalized_visits["metric"]=="downtown"].copy()
normalized_visits_dwtn['cluster'] = normalized_visits_dwtn['city'].map(dict(zip(clusters['city'], clusters['cluster']))).astype(int)
mean_dwtn = normalized_visits_dwtn.groupby(['cluster','week']).mean()['normalized_visits_by_total_visits'].reset_index()
mean_dwtn["week"] = pd.to_datetime(mean_dwtn["week"]).dt.date
mean_dwtn = mean_dwtn[mean_dwtn["week"]>datetime.date(2020, 6, 1)]
mean_dwtn['rolling'] = mean_dwtn.groupby('cluster')['normalized_visits_by_total_visits'].transform(lambda x : x.rolling(window=9).mean().shift(-4))
sns.set(rc={'figure.figsize':(14,10)})
ax = sns.lineplot(data=mean_dwtn, x ='week', y = 'rolling', hue='cluster', palette='tab10')
plt.ylabel('Downtown RQ')
plt.xlabel('Date')
plt.show()

In [None]:
clusters = get_table_as_pandas_df('rq_dwtn_clusters_0823_period_2').replace('Montréal','Montreal').replace('Québec','Quebec')
normalized_visits_dwtn = normalized_visits[normalized_visits["metric"]=="downtown"].copy()
normalized_visits_dwtn['cluster'] = normalized_visits_dwtn['city'].map(dict(zip(clusters['city'], clusters['cluster']))).astype(int)
mean_dwtn = normalized_visits_dwtn.groupby(['cluster','week']).mean()['normalized_visits_by_total_visits'].reset_index()
mean_dwtn["week"] = pd.to_datetime(mean_dwtn["week"]).dt.date
mean_dwtn = mean_dwtn[mean_dwtn["week"]>datetime.date(2020, 6, 1)]
mean_dwtn['rolling'] = mean_dwtn.groupby('cluster')['normalized_visits_by_total_visits'].transform(lambda x : x.rolling(window=9).mean().shift(-4))
sns.set(rc={'figure.figsize':(14,10)})
ax = sns.lineplot(data=mean_dwtn, x ='week', y = 'rolling', hue='cluster', palette='tab10')
plt.ylabel('Downtown RQ')
plt.xlabel('Date')
plt.show()