In [1]:
import pickle
import pandas as pd
import numpy as np
import datetime
import os

### Reading all the focal brands

In [None]:
with open('../top_brands.pickle', 'rb') as file:
    focal_brands = pickle.load(file)

focal_brands

Unnamed: 0,SUB_CATEGORY,BRANDS,Total_Spend,Total_Visits,Unique_PLACEKEY_Count
0,"Cosmetics, Beauty Supplies, and Perfume Stores",Sephora,1913415.32,702927,15
1,"Cosmetics, Beauty Supplies, and Perfume Stores",ULTA Beauty,1339844.15,404546,20
2,Full-Service Restaurants,Olive Garden,1058844.15,125651,15
3,Full-Service Restaurants,The Cheesecake Factory,974090.35,473745,5
4,Warehouse Clubs and Supercenters,Target,41977753.97,2053582,50
5,Warehouse Clubs and Supercenters,Walmart,33473235.0,2815949,47
6,Women's Clothing Stores,Anthropologie,840473.95,327685,7
7,Women's Clothing Stores,Victoria's Secret,662200.98,295606,14


### Read the visitation data of all the brands

In [4]:
brands_visits = pd.read_csv('../data/revision_visits_revenue_2019.csv')
brands_visits['brand_standard'] = brands_visits['brand'].apply(lambda x: x.strip().lower()) # For comparison with catalog.tsv
brands_visits['date'] = brands_visits['date'].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d').date())
brands_visits = brands_visits.rename(columns={'brand': 'brand_visitation'})
# Dropping unused columns
brands_visits = brands_visits.drop(columns=['spend_by_day', 'lat', 'lon', 'brand_standard'])
# Setting date as the index since we will group by date for calculation of metrics
brands_visits = brands_visits.sort_values('date').set_index('date')
brands_visits.head()

Unnamed: 0_level_0,PLACEKEY,visits_by_day,brand_visitation
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-01-01,222-223@62k-9nt-vcq,0,CITGO
2019-01-01,222-222@62j-srj-8y9,54,Lifetime Fitness
2019-01-01,223-223@62j-sy9-mtv,83,Stop & Shop
2019-01-01,zzw-222@62j-ptq-7yv,2,Visionworks
2019-01-01,223-222@62k-rdk-87q,13,CVS


### Reading info about Visits and Local Reviews

In [5]:
with open('../brand_visit_local_reviews.pickle', 'rb') as file:
    brand_visit_local_reviews = pickle.load(file)

brand_visit_local_reviews = brand_visit_local_reviews.drop(columns=['spend_by_day', 'lat', 'lon', 'brand_standard', 'ID', 'Name',
                                                                    'Genre', 'Type', 'Classification', 'Status', 'Name_Standard'])

brand_visit_local_reviews = brand_visit_local_reviews.sort_values('date')
brand_visit_local_reviews = brand_visit_local_reviews.fillna(0)
brand_visit_local_reviews.head()

Unnamed: 0,date,PLACEKEY,visits_by_day,brand_visitation,visits_past_60_days,visits_past_3_days,total_visits_across_stores_60_days,proportion_of_visits_60_days,localized_fb_reviews_60_days,localized_ig_reviews_60_days,localized_tw_reviews_60_days
2768343,2019-01-01,zzw-225@62j-sgb-ygk,15,The Container Store,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2830684,2019-01-01,222-222@62j-sc2-yn5,6,Staples,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2790725,2019-01-01,zzw-222@62j-t2c-f4v,11,Burger King,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2830715,2019-01-01,222-222@62j-sdt-45f,12,Staples,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2830746,2019-01-01,229-223@62j-r5p-66k,10,Staples,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Reading Distance for all the brands

In [6]:
with open('../neib_distance_km_brand.pkl', 'rb') as file:
    neib_distance_km = pickle.load(file)

# Excluding all the brands having more than 1 mile distance since we don't need them anyways for second-degree neighbor calculations
neib_distance_km = neib_distance_km[neib_distance_km['Distance_Km']<=16.0934]
neib_distance_km.head()

Unnamed: 0,SRC_PLACEKEY,DST_PLACEKEY,Distance_Km,DST_BRAND
0,222-222@628-zxy-rc5,222-222@628-zxy-rc5,0.0,CVS
1,222-222@628-zxy-rc5,222-222@628-zxy-tn5,0.255103,McDonald's
2,222-222@628-zxy-rc5,222-222@628-zy4-89z,3.765942,United States Postal Service (USPS)
3,222-222@628-zxy-rc5,222-222@628-zz4-c5z,3.790174,Wendy's
4,222-222@628-zxy-rc5,222-222@628-zzp-f2k,5.166696,Hilton Garden Inn


### Reading Spatial Distance

In [7]:
distance_results = pd.read_csv('../data/distance_results.csv')
distance_results.head()

Unnamed: 0,From_PLACEKEY,To_PLACEKEY,Distance_km
0,zzw-223@62j-pth-zs5,zzw-222@62j-sgj-q2k,27.251127
1,zzw-223@62j-pth-zs5,223-222@62k-phk-vfz,36.776232
2,zzw-223@62j-pth-zs5,222-223@62k-r7z-m49,53.38983
3,zzw-223@62j-pth-zs5,zzw-223@62j-pth-zs5,0.0
4,zzw-223@62j-pth-zs5,22c-222@62k-pq5-grk,100.139331


### Performing First Degree Neighbor Calculations

In [8]:
focal_brands_list = focal_brands['BRANDS'].tolist()
focal_brands_list

['Sephora',
 'ULTA Beauty',
 'Olive Garden',
 'The Cheesecake Factory',
 'Target',
 'Walmart',
 'Anthropologie',
 "Victoria's Secret"]

Select a focal brand and then extract all the PlaceKeys for this focal brand

In [10]:
foc_brand = focal_brands_list[0]
foc_brand

'Sephora'

Getting all the store keys for this specific focal brand

In [11]:
store_keys_foc_brand = brand_visit_local_reviews[brand_visit_local_reviews['brand_visitation'] == foc_brand]['PLACEKEY'].unique().tolist()
store_keys_foc_brand

['228-222@62k-phc-qxq',
 'zzw-222@62j-sbz-whq',
 'zzw-22t@62k-p73-2p9',
 '229-222@62k-qps-tsq',
 'zzw-227@62j-shr-8sq',
 'zzw-22p@62j-srj-ffz',
 '222-222@62j-t2f-sbk',
 'zzw-22m@629-2rt-cyv',
 'zzy-223@62k-p98-4qf',
 '229-222@62j-sj3-qfz',
 '22f-222@62j-shx-fcq',
 'zzw-225@62k-3q6-35z',
 'zzw-22k@62j-pth-zs5',
 'zzw-22g@62j-shx-tjv',
 'zzw-22d@62j-sj3-p9z']

Getting all the neighboring keys for each store of focal brand

In [12]:
all_neib_placekey = neib_distance_km[neib_distance_km['SRC_PLACEKEY'].isin(store_keys_foc_brand)]['DST_PLACEKEY'].unique().tolist()

Getting the first degree neighbors for all the stores of the focal brand considering the only ones which lie within a distance of 1mile (16.0934 km)

In [13]:
focal_stores_first_degree_neib = neib_distance_km[(neib_distance_km['SRC_PLACEKEY'].isin(store_keys_foc_brand)) &
                                                (neib_distance_km['DST_PLACEKEY'].isin(all_neib_placekey)) &
                                                (neib_distance_km['Distance_Km']<=16.0934)].reset_index(drop=True)
focal_stores_first_degree_neib

Unnamed: 0,SRC_PLACEKEY,DST_PLACEKEY,Distance_Km,DST_BRAND
0,222-222@62j-t2f-sbk,222-222@62j-ps8-2ff,14.341612,Whole Foods Market
1,222-222@62j-t2f-sbk,222-222@62j-psf-q75,14.619830,McDonald's
2,222-222@62j-t2f-sbk,222-222@62j-psr-3qz,10.413629,Public Storage
3,222-222@62j-t2f-sbk,222-222@62j-psr-f2k,9.777480,Stop & Shop
4,222-222@62j-t2f-sbk,222-222@62j-psr-g6k,9.526847,CVS
...,...,...,...,...
22877,zzy-223@62k-p98-4qf,zzy-224@62k-p8d-rtv,11.155713,Chipotle Mexican Grill
22878,zzy-223@62k-p98-4qf,zzy-224@62k-p8d-ty9,11.248436,Sunoco
22879,zzy-223@62k-p98-4qf,zzy-224@62k-pdh-fvf,15.001134,Dunkin'
22880,zzy-223@62k-p98-4qf,zzy-225@62k-nrz-249,14.790310,Popeyes Louisiana Kitchen


In [14]:
def neib_stores_brand_count(group_df):
    foc_store = group_df.name
    
    first_deg_neib_store_count = group_df['DST_PLACEKEY'].nunique()
    first_deg_neib_brand_count = group_df['DST_BRAND'].nunique()
    
    foc_store_all_first_degree_neibs = neib_distance_km[(neib_distance_km['SRC_PLACEKEY'] == foc_store) &
                                                        (neib_distance_km['Distance_Km'] <= 16.0934)]['DST_PLACEKEY'].to_list()
    foc_store_all_first_degree_neibs_brands = brands_visits[brands_visits['PLACEKEY'].isin(foc_store_all_first_degree_neibs)]['brand_visitation'].unique().tolist()
    foc_store_first_degree_neibs = focal_stores_first_degree_neib[focal_stores_first_degree_neib['SRC_PLACEKEY'] == foc_store]['DST_PLACEKEY'].to_list()
    
    # Excluding first degree neighboring brand, all the first degree neighbors brand stores, and calculating the second degree neighbors (less than 1 mile)
    second_neib_df = neib_distance_km[neib_distance_km['SRC_PLACEKEY'].isin(foc_store_first_degree_neibs) &
                                (~neib_distance_km['DST_BRAND'].isin(foc_store_all_first_degree_neibs_brands))]

    
    second_deg_neib_store_count = second_neib_df['DST_PLACEKEY'].nunique()
    second_deg_neib_brand_count = second_neib_df['DST_BRAND'].nunique()

    return pd.Series({'First_Deg_Neib_Store_Count': first_deg_neib_store_count,
                      'First_Deg_Neib_Brand_Count': first_deg_neib_brand_count,
                      'Second_Deg_Neib_Store_Count': second_deg_neib_store_count,
                      'Second_Deg_Neib_Brand_Count': second_deg_neib_brand_count})

In [15]:
focal_store_first_second_degree_neib = focal_stores_first_degree_neib.groupby('SRC_PLACEKEY').apply(neib_stores_brand_count).reset_index()
focal_store_first_second_degree_neib

  focal_store_first_second_degree_neib = focal_stores_first_degree_neib.groupby('SRC_PLACEKEY').apply(neib_stores_brand_count).reset_index()


Unnamed: 0,SRC_PLACEKEY,First_Deg_Neib_Store_Count,First_Deg_Neib_Brand_Count,Second_Deg_Neib_Store_Count,Second_Deg_Neib_Brand_Count
0,222-222@62j-t2f-sbk,1667,497,362,271
1,228-222@62k-phc-qxq,1667,536,471,297
2,229-222@62j-sj3-qfz,2501,644,264,173
3,229-222@62k-qps-tsq,589,265,96,73
4,22f-222@62j-shx-fcq,2441,639,273,177
5,zzw-222@62j-sbz-whq,1513,490,429,309
6,zzw-225@62k-3q6-35z,255,162,53,37
7,zzw-227@62j-shr-8sq,2231,609,374,209
8,zzw-22d@62j-sj3-p9z,2492,640,280,180
9,zzw-22g@62j-shx-tjv,2465,623,320,192


### Calculations for daily visits and social media likes

In [16]:
foc_brand_visits_reviews = brand_visit_local_reviews[brand_visit_local_reviews['brand_visitation'] == foc_brand]
foc_brand_visits_reviews = foc_brand_visits_reviews[foc_brand_visits_reviews['date'] > datetime.date(year=2019,month=3,day=1)]
foc_brand_visits_reviews = foc_brand_visits_reviews[['visits_by_day', 'localized_fb_reviews_60_days',
                                                     'localized_ig_reviews_60_days', 'localized_tw_reviews_60_days']].reset_index(drop=True)
foc_brand_visits_reviews = foc_brand_visits_reviews.rename(columns={'visits_by_day': 'Daily Visits',
                                                                    'localized_fb_reviews_60_days': 'Daily Facebook Likes',
                                                                    'localized_tw_reviews_60_days': 'Daily Twitter Likes',
                                                                    'localized_ig_reviews_60_days': 'Daily Instagram Likes'})
foc_brand_visits_reviews.head()

Unnamed: 0,Daily Visits,Daily Facebook Likes,Daily Instagram Likes,Daily Twitter Likes
0,52,0.028083,470.696092,0.463366
1,3,0.001589,26.633224,0.026218
2,3,0.005601,93.875523,0.092414
3,91,0.053774,901.3105,0.887275
4,8,0.00505,84.646188,0.083328


In [24]:
foc_brand_visits_reviews.describe().map('{:,.2f}'.format)

Unnamed: 0,Daily Visits,Daily Facebook Likes,Daily Instagram Likes,Daily Twitter Likes
count,4316.0,4316.0,4316.0,4316.0
mean,133.41,15.5,1485.43,1.44
std,275.86,117.18,3456.26,6.2
min,0.0,0.0,0.0,0.0
25%,7.0,0.18,78.2,0.05
50%,20.0,0.9,215.75,0.16
75%,67.0,4.44,762.71,0.61
max,3070.0,5346.33,63385.44,171.6


In [25]:
daily_visits_likes = foc_brand_visits_reviews.describe().map('{:,.2f}'.format).T.drop(['count'], axis=1)
daily_visits_likes

Unnamed: 0,mean,std,min,25%,50%,75%,max
Daily Visits,133.41,275.86,0.0,7.0,20.0,67.0,3070.0
Daily Facebook Likes,15.5,117.18,0.0,0.18,0.9,4.44,5346.33
Daily Instagram Likes,1485.43,3456.26,0.0,78.2,215.75,762.71,63385.44
Daily Twitter Likes,1.44,6.2,0.0,0.05,0.16,0.61,171.6


### Calculations for calculating the first degree and second degree neighbors for each focal stores

In [100]:
focal_store_first_second_degree_neib = focal_store_first_second_degree_neib.rename(columns={'First_Deg_Neib_Store_Count':'Number of first-degree neighboring stores',
                                                     'First_Deg_Neib_Brand_Count':'Number of first-degree neighboring brands',
                                                     'Second_Deg_Neib_Store_Count':'Number of second-degree neighboring stores',
                                                     'Second_Deg_Neib_Brand_Count':'Number of second-degree neighboring brands'})

focal_store_first_second_degree_neib

Unnamed: 0,SRC_PLACEKEY,Number of first-degree neighboring stores,Number of first-degree neighboring brands,Number of second-degree neighboring stores,Number of second-degree neighboring brands
0,223-22w@62j-sj3-mhq,2487,641,276,177
1,225-222@62k-prs-dgk,372,164,43,39
2,zzw-222@62j-srj-f4v,721,354,751,439
3,zzw-223@629-2rt-fpv,689,254,24,24
4,zzw-224@62k-nzs-nbk,647,297,635,334
5,zzw-226@62j-pth-rhq,990,374,558,369
6,zzw-227@62j-sxw-6hq,2463,673,139,114
7,zzw-22c@62j-shx-jy9,2467,623,320,192
8,zzw-22h@62k-p76-d7q,1773,575,233,173
9,zzw-22j@62j-sbz-wp9,1494,485,450,314


In [26]:
first_second_deg_counts = focal_store_first_second_degree_neib.describe().map('{:,.2f}'.format).T.drop(['count'], axis=1)
first_second_deg_counts

Unnamed: 0,mean,std,min,25%,50%,75%,max
First_Deg_Neib_Store_Count,1525.47,789.46,255.0,806.0,1667.0,2336.0,2501.0
First_Deg_Neib_Brand_Count,466.73,162.41,162.0,347.5,497.0,616.0,644.0
Second_Deg_Neib_Store_Count,337.8,201.61,24.0,248.0,320.0,450.0,753.0
Second_Deg_Neib_Brand_Count,219.6,122.78,24.0,174.0,192.0,303.0,440.0


### Merging both the statistics and storing them in a folder

In [27]:
dir_path = os.path.join('foc_brand_summary_statistics', foc_brand)
os.makedirs(dir_path, exist_ok=True)

In [28]:
foc_summary_statistics = pd.concat([daily_visits_likes, first_second_deg_counts], axis=0).rename(columns={'mean': 'Mean',
                                                                                                            'std': 'STDEV',
                                                                                                            'min': 'Min',
                                                                                                            'max': 'Max'})

foc_summary_statistics.to_latex(os.path.join(dir_path, 'summ_stats.tex'))
foc_summary_statistics

Unnamed: 0,Mean,STDEV,Min,25%,50%,75%,Max
Daily Visits,133.41,275.86,0.0,7.0,20.0,67.0,3070.0
Daily Facebook Likes,15.5,117.18,0.0,0.18,0.9,4.44,5346.33
Daily Instagram Likes,1485.43,3456.26,0.0,78.2,215.75,762.71,63385.44
Daily Twitter Likes,1.44,6.2,0.0,0.05,0.16,0.61,171.6
First_Deg_Neib_Store_Count,1525.47,789.46,255.0,806.0,1667.0,2336.0,2501.0
First_Deg_Neib_Brand_Count,466.73,162.41,162.0,347.5,497.0,616.0,644.0
Second_Deg_Neib_Store_Count,337.8,201.61,24.0,248.0,320.0,450.0,753.0
Second_Deg_Neib_Brand_Count,219.6,122.78,24.0,174.0,192.0,303.0,440.0
