In [1]:
import pickle
import pandas as pd
import numpy as np
import datetime
import os

### Reading all the focal brands

In [2]:
with open('../top_brands.pickle', 'rb') as file:
    focal_brands = pickle.load(file)

focal_brands

Unnamed: 0,SUB_CATEGORY,BRANDS,Total_Spend,Total_Visits,Unique_PLACEKEY_Count
0,"Cosmetics, Beauty Supplies, and Perfume Stores",Sephora,1913415.32,702927,15
1,"Cosmetics, Beauty Supplies, and Perfume Stores",ULTA Beauty,1339844.15,404546,20
2,Full-Service Restaurants,Olive Garden,1058844.15,125651,15
3,Full-Service Restaurants,The Cheesecake Factory,974090.35,473745,5
4,Warehouse Clubs and Supercenters,Target,41977753.97,2053582,50
5,Warehouse Clubs and Supercenters,Walmart,33473235.0,2815949,47
6,Women's Clothing Stores,Anthropologie,840473.95,327685,7
7,Women's Clothing Stores,Victoria's Secret,662200.98,295606,14


### Read the visitation data of all the brands

In [3]:
brands_visits = pd.read_csv('../data/revision_visits_revenue_2019.csv')
brands_visits['brand_standard'] = brands_visits['brand'].apply(lambda x: x.strip().lower()) # For comparison with catalog.tsv
brands_visits['date'] = brands_visits['date'].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d').date())
brands_visits = brands_visits.rename(columns={'brand': 'brand_visitation'})
# Dropping unused columns
brands_visits = brands_visits.drop(columns=['spend_by_day', 'lat', 'lon', 'brand_standard'])
# Setting date as the index since we will group by date for calculation of metrics
brands_visits = brands_visits.sort_values('date').set_index('date')
brands_visits.head()

Unnamed: 0_level_0,PLACEKEY,visits_by_day,brand_visitation
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-01-01,222-223@62k-9nt-vcq,0,CITGO
2019-01-01,222-222@62j-srj-8y9,54,Lifetime Fitness
2019-01-01,223-223@62j-sy9-mtv,83,Stop & Shop
2019-01-01,zzw-222@62j-ptq-7yv,2,Visionworks
2019-01-01,223-222@62k-rdk-87q,13,CVS


### Reading info about Visits and Local Reviews

In [4]:
with open('../brand_visit_local_reviews.pickle', 'rb') as file:
    brand_visit_local_reviews = pickle.load(file)

brand_visit_local_reviews = brand_visit_local_reviews.drop(columns=['spend_by_day', 'lat', 'lon', 'brand_standard', 'ID', 'Name',
                                                                    'Genre', 'Type', 'Classification', 'Status', 'Name_Standard'])

brand_visit_local_reviews = brand_visit_local_reviews.sort_values('date')
brand_visit_local_reviews = brand_visit_local_reviews.fillna(0)
brand_visit_local_reviews.head()

Unnamed: 0,date,PLACEKEY,visits_by_day,brand_visitation,visits_past_60_days,visits_past_3_days,total_visits_across_stores_60_days,proportion_of_visits_60_days,localized_fb_reviews_60_days,localized_ig_reviews_60_days,localized_tw_reviews_60_days
2768343,2019-01-01,zzw-225@62j-sgb-ygk,15,The Container Store,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2830684,2019-01-01,222-222@62j-sc2-yn5,6,Staples,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2790725,2019-01-01,zzw-222@62j-t2c-f4v,11,Burger King,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2830715,2019-01-01,222-222@62j-sdt-45f,12,Staples,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2830746,2019-01-01,229-223@62j-r5p-66k,10,Staples,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Reading Distance for all the brands

In [5]:
with open('../neib_distance_km_brand.pkl', 'rb') as file:
    neib_distance_km = pickle.load(file)

# Excluding all the brands having more than 1 mile distance since we don't need them anyways for second-degree neighbor calculations
neib_distance_km = neib_distance_km[neib_distance_km['Distance_Km']<=16.0934]
neib_distance_km.head()

Unnamed: 0,SRC_PLACEKEY,DST_PLACEKEY,Distance_Km,DST_BRAND
0,222-222@628-zxy-rc5,222-222@628-zxy-rc5,0.0,CVS
1,222-222@628-zxy-rc5,222-222@628-zxy-tn5,0.255103,McDonald's
2,222-222@628-zxy-rc5,222-222@628-zy4-89z,3.765942,United States Postal Service (USPS)
3,222-222@628-zxy-rc5,222-222@628-zz4-c5z,3.790174,Wendy's
4,222-222@628-zxy-rc5,222-222@628-zzp-f2k,5.166696,Hilton Garden Inn


### Reading Spatial Distance

In [6]:
distance_results = pd.read_csv('../data/distance_results.csv')
distance_results.head()

Unnamed: 0,From_PLACEKEY,To_PLACEKEY,Distance_km
0,zzw-223@62j-pth-zs5,zzw-222@62j-sgj-q2k,27.251127
1,zzw-223@62j-pth-zs5,223-222@62k-phk-vfz,36.776232
2,zzw-223@62j-pth-zs5,222-223@62k-r7z-m49,53.38983
3,zzw-223@62j-pth-zs5,zzw-223@62j-pth-zs5,0.0
4,zzw-223@62j-pth-zs5,22c-222@62k-pq5-grk,100.139331


### Performing First Degree Neighbor Calculations

In [7]:
focal_brands_list = focal_brands['BRANDS'].tolist()
focal_brands_list

['Sephora',
 'ULTA Beauty',
 'Olive Garden',
 'The Cheesecake Factory',
 'Target',
 'Walmart',
 'Anthropologie',
 "Victoria's Secret"]

Select a focal brand and then extract all the PlaceKeys for this focal brand

In [101]:
foc_brand = focal_brands_list[7]
foc_brand

"Victoria's Secret"

Getting all the store keys for this specific focal brand

In [102]:
store_keys_foc_brand = brand_visit_local_reviews[brand_visit_local_reviews['brand_visitation'] == foc_brand]['PLACEKEY'].unique().tolist()
store_keys_foc_brand

['zzw-22j@62j-sbz-wp9',
 '223-22w@62j-sj3-mhq',
 '225-222@62k-prs-dgk',
 'zzy-222@62k-r6j-zpv',
 'zzw-226@62j-pth-rhq',
 'zzw-222@62j-srj-f4v',
 'zzw-22z@62k-p4d-7wk',
 'zzw-22c@62j-shx-jy9',
 'zzw-223@629-2rt-fpv',
 'zzy-222@62j-sxw-6hq',
 'zzw-227@62j-sxw-6hq',
 'zzw-22h@62k-p76-d7q',
 'zzw-224@62k-nzs-nbk',
 'zzy-22f@62j-shz-vs5']

Getting all the neighboring keys for each store of focal brand

In [103]:
all_neib_placekey = neib_distance_km[neib_distance_km['SRC_PLACEKEY'].isin(store_keys_foc_brand)]['DST_PLACEKEY'].unique().tolist()

Getting the first degree neighbors for all the stores of the focal brand considering the only ones which lie within a distance of 1mile (16.0934 km)

In [104]:
focal_stores_first_degree_neib = neib_distance_km[(neib_distance_km['SRC_PLACEKEY'].isin(store_keys_foc_brand)) &
                                                (neib_distance_km['DST_PLACEKEY'].isin(all_neib_placekey)) &
                                                (neib_distance_km['Distance_Km']<=16.0934)].reset_index(drop=True)
focal_stores_first_degree_neib

Unnamed: 0,SRC_PLACEKEY,DST_PLACEKEY,Distance_Km,DST_BRAND
0,223-22w@62j-sj3-mhq,222-222@62j-qp2-brk,4.816846,FedEx
1,223-22w@62j-sj3-mhq,222-222@62j-sc8-6p9,14.605301,Papa Gino's
2,223-22w@62j-sj3-mhq,222-222@62j-scc-d35,15.892491,Courtyard by Marriott
3,223-22w@62j-sj3-mhq,222-222@62j-scc-mkz,14.638740,Mobil
4,223-22w@62j-sj3-mhq,222-222@62j-scc-snq,14.290026,Staples
...,...,...,...,...
20008,zzy-22f@62j-shz-vs5,zzy-22g@62j-sj3-mhq,2.459383,Saint Laurent
20009,zzy-22f@62j-shz-vs5,zzy-22g@62j-sxw-6c5,11.452817,Hot Topic
20010,zzy-22f@62j-shz-vs5,zzy-22h@62j-sxw-6hq,11.500742,Hannoush Jewelers
20011,zzy-22f@62j-shz-vs5,zzy-22j@62j-sj3-mhq,2.401164,Christian Louboutin


In [105]:
def neib_stores_brand_count(group_df):
    foc_store = group_df.name
    
    first_deg_neib_store_count = group_df['DST_PLACEKEY'].nunique()
    first_deg_neib_brand_count = group_df['DST_BRAND'].nunique()
    
    foc_store_all_first_degree_neibs = neib_distance_km[(neib_distance_km['SRC_PLACEKEY'] == foc_store) &
                                                        (neib_distance_km['Distance_Km'] <= 16.0934)]['DST_PLACEKEY'].to_list()
    foc_store_all_first_degree_neibs_brands = brands_visits[brands_visits['PLACEKEY'].isin(foc_store_all_first_degree_neibs)]['brand_visitation'].unique().tolist()
    foc_store_first_degree_neibs = focal_stores_first_degree_neib[focal_stores_first_degree_neib['SRC_PLACEKEY'] == foc_store]['DST_PLACEKEY'].to_list()
    
    # Excluding first degree neighboring brand, all the first degree neighbors brand stores, and calculating the second degree neighbors (less than 1 mile)
    second_neib_df = neib_distance_km[neib_distance_km['SRC_PLACEKEY'].isin(foc_store_first_degree_neibs) &
                                (~neib_distance_km['DST_BRAND'].isin(foc_store_all_first_degree_neibs_brands))]

    
    second_deg_neib_store_count = second_neib_df['DST_PLACEKEY'].nunique()
    second_deg_neib_brand_count = second_neib_df['DST_BRAND'].nunique()

    return pd.Series({'First_Deg_Neib_Store_Count': first_deg_neib_store_count,
                      'First_Deg_Neib_Brand_Count': first_deg_neib_brand_count,
                      'Second_Deg_Neib_Store_Count': second_deg_neib_store_count,
                      'Second_Deg_Neib_Brand_Count': second_deg_neib_brand_count})

In [106]:
focal_store_first_second_degree_neib = focal_stores_first_degree_neib.groupby('SRC_PLACEKEY').apply(neib_stores_brand_count).reset_index()
focal_store_first_second_degree_neib

  focal_store_first_second_degree_neib = focal_stores_first_degree_neib.groupby('SRC_PLACEKEY').apply(neib_stores_brand_count).reset_index()


Unnamed: 0,SRC_PLACEKEY,First_Deg_Neib_Store_Count,First_Deg_Neib_Brand_Count,Second_Deg_Neib_Store_Count,Second_Deg_Neib_Brand_Count
0,223-22w@62j-sj3-mhq,2487,641,276,177
1,225-222@62k-prs-dgk,372,164,43,39
2,zzw-222@62j-srj-f4v,721,354,751,439
3,zzw-223@629-2rt-fpv,689,254,24,24
4,zzw-224@62k-nzs-nbk,647,297,635,334
5,zzw-226@62j-pth-rhq,990,374,558,369
6,zzw-227@62j-sxw-6hq,2463,673,139,114
7,zzw-22c@62j-shx-jy9,2467,623,320,192
8,zzw-22h@62k-p76-d7q,1773,575,233,173
9,zzw-22j@62j-sbz-wp9,1494,485,450,314


### Calculations for daily visits and social media likes

In [107]:
foc_brand_visits_reviews = brand_visit_local_reviews[brand_visit_local_reviews['brand_visitation'] == foc_brand]
foc_brand_visits_reviews = foc_brand_visits_reviews[foc_brand_visits_reviews['date'] > datetime.date(year=2019,month=3,day=1)]
foc_brand_visits_reviews = foc_brand_visits_reviews[['visits_by_day', 'localized_fb_reviews_60_days',
                                                     'localized_ig_reviews_60_days', 'localized_tw_reviews_60_days']].reset_index(drop=True)
foc_brand_visits_reviews = foc_brand_visits_reviews.rename(columns={'visits_by_day': 'Daily Visits',
                                                                    'localized_fb_reviews_60_days': 'Daily Facebook Likes',
                                                                    'localized_tw_reviews_60_days': 'Daily Twitter Likes',
                                                                    'localized_ig_reviews_60_days': 'Daily Instagram Likes'})
foc_brand_visits_reviews.head()

Unnamed: 0,Daily Visits,Daily Facebook Likes,Daily Instagram Likes,Daily Twitter Likes
0,3,14.729789,244.353481,0.11927
1,15,41.088358,681.617605,0.332699
2,47,157.658075,2615.400656,1.276584
3,70,193.178714,3204.654984,1.5642
4,126,579.818052,9618.641573,4.694883


In [108]:
foc_brand_visits_reviews.describe().map('{:,.2f}'.format)

Unnamed: 0,Daily Visits,Daily Facebook Likes,Daily Instagram Likes,Daily Twitter Likes
count,3833.0,3833.0,3833.0,3833.0
mean,65.76,130.47,4546.9,1.66
std,101.82,466.87,7771.83,5.41
min,0.0,0.0,0.0,0.0
25%,14.0,9.32,883.03,0.21
50%,32.0,24.99,2036.0,0.53
75%,62.0,76.65,4291.63,1.26
max,1439.0,14074.44,92192.23,198.14


In [109]:
daily_visits_likes = foc_brand_visits_reviews.describe().map('{:,.2f}'.format).T.drop(['count'], axis=1)
daily_visits_likes

Unnamed: 0,mean,std,min,25%,50%,75%,max
Daily Visits,65.76,101.82,0.0,14.0,32.0,62.0,1439.0
Daily Facebook Likes,130.47,466.87,0.0,9.32,24.99,76.65,14074.44
Daily Instagram Likes,4546.9,7771.83,0.0,883.03,2036.0,4291.63,92192.23
Daily Twitter Likes,1.66,5.41,0.0,0.21,0.53,1.26,198.14


### Calculations for calculating the first degree and second degree neighbors for each focal stores

In [110]:
focal_store_first_second_degree_neib = focal_store_first_second_degree_neib.rename(columns={'First_Deg_Neib_Store_Count':'Number of first-degree neighboring stores',
                                                     'First_Deg_Neib_Brand_Count':'Number of first-degree neighboring brands',
                                                     'Second_Deg_Neib_Store_Count':'Number of second-degree neighboring stores',
                                                     'Second_Deg_Neib_Brand_Count':'Number of second-degree neighboring brands'})

focal_store_first_second_degree_neib

Unnamed: 0,SRC_PLACEKEY,Number of first-degree neighboring stores,Number of first-degree neighboring brands,Number of second-degree neighboring stores,Number of second-degree neighboring brands
0,223-22w@62j-sj3-mhq,2487,641,276,177
1,225-222@62k-prs-dgk,372,164,43,39
2,zzw-222@62j-srj-f4v,721,354,751,439
3,zzw-223@629-2rt-fpv,689,254,24,24
4,zzw-224@62k-nzs-nbk,647,297,635,334
5,zzw-226@62j-pth-rhq,990,374,558,369
6,zzw-227@62j-sxw-6hq,2463,673,139,114
7,zzw-22c@62j-shx-jy9,2467,623,320,192
8,zzw-22h@62k-p76-d7q,1773,575,233,173
9,zzw-22j@62j-sbz-wp9,1494,485,450,314


In [111]:
first_second_deg_counts = focal_store_first_second_degree_neib.describe().map('{:,.2f}'.format).T.drop(['count'], axis=1)
first_second_deg_counts

Unnamed: 0,mean,std,min,25%,50%,75%,max
Number of first-degree neighboring stores,1429.5,884.12,372.0,657.5,1242.0,2456.5,2487.0
Number of first-degree neighboring brands,447.86,185.97,164.0,279.75,429.5,634.25,673.0
Number of second-degree neighboring stores,313.21,218.01,24.0,151.25,275.5,426.0,751.0
Number of second-degree neighboring brands,203.21,123.01,24.0,118.5,177.5,297.0,439.0


### Merging both the statistics and storing them in a folder

In [112]:
dir_path = os.path.join('foc_brand_summary_statistics', foc_brand)
os.makedirs(dir_path, exist_ok=True)

In [113]:
foc_summary_statistics = pd.concat([daily_visits_likes, first_second_deg_counts], axis=0).rename(columns={'mean': 'Mean',
                                                                                                            'std': 'STDEV',
                                                                                                            'min': 'Min',
                                                                                                            'max': 'Max'})

foc_summary_statistics.to_latex(os.path.join(dir_path, 'summ_stats.tex'))
foc_summary_statistics

Unnamed: 0,Mean,STDEV,Min,25%,50%,75%,Max
Daily Visits,65.76,101.82,0.0,14.0,32.0,62.0,1439.0
Daily Facebook Likes,130.47,466.87,0.0,9.32,24.99,76.65,14074.44
Daily Instagram Likes,4546.9,7771.83,0.0,883.03,2036.0,4291.63,92192.23
Daily Twitter Likes,1.66,5.41,0.0,0.21,0.53,1.26,198.14
Number of first-degree neighboring stores,1429.5,884.12,372.0,657.5,1242.0,2456.5,2487.0
Number of first-degree neighboring brands,447.86,185.97,164.0,279.75,429.5,634.25,673.0
Number of second-degree neighboring stores,313.21,218.01,24.0,151.25,275.5,426.0,751.0
Number of second-degree neighboring brands,203.21,123.01,24.0,118.5,177.5,297.0,439.0
