In [1]:
import logging
import pickle
import pandas as pd
import numpy as np
from tqdm import tqdm
import datetime
from geopy.distance import geodesic

In [2]:
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO)

In [3]:
logging.info("Starting the Step 3 Calculations")

2024-11-22 15:29:09,997 - INFO - Starting the Step 3 Calculations


### Reading all the focal brands

In [4]:
with open('top_brands.pickle', 'rb') as file:
    focal_brands = pickle.load(file)

focal_brands

Unnamed: 0,SUB_CATEGORY,BRANDS,Total_Spend,Total_Visits,Unique_PLACEKEY_Count
0,"Cosmetics, Beauty Supplies, and Perfume Stores",Sephora,1913415.32,702927,15
1,"Cosmetics, Beauty Supplies, and Perfume Stores",ULTA Beauty,1339844.15,404546,20
2,Full-Service Restaurants,Olive Garden,1058844.15,125651,15
3,Full-Service Restaurants,The Cheesecake Factory,974090.35,473745,5
4,Warehouse Clubs and Supercenters,Target,41977753.97,2053582,50
5,Warehouse Clubs and Supercenters,Walmart,33473235.0,2815949,47
6,Women's Clothing Stores,Anthropologie,840473.95,327685,7
7,Women's Clothing Stores,Victoria's Secret,662200.98,295606,14


### Reading the visitation data for all the brands

In [5]:
brands_visits = pd.read_csv('data/revision_visits_revenue_2019.csv')
brands_visits['brand_standard'] = brands_visits['brand'].apply(lambda x: x.strip().lower()) # For comparison with catalog.tsv
brands_visits['date'] = brands_visits['date'].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d').date())
brands_visits = brands_visits.rename(columns={'brand': 'brand_visitation'})
# Dropping unused columns
brands_visits = brands_visits.drop(columns=['spend_by_day', 'lat', 'lon', 'brand_standard'])
# Setting date as the index since we will group by date for calculation of metrics
brands_visits = brands_visits.sort_values('date').set_index('date')
brands_visits.head()

Unnamed: 0_level_0,PLACEKEY,visits_by_day,brand_visitation
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-01-01,222-223@62k-9nt-vcq,0,CITGO
2019-01-01,222-222@62j-srj-8y9,54,Lifetime Fitness
2019-01-01,223-223@62j-sy9-mtv,83,Stop & Shop
2019-01-01,zzw-222@62j-ptq-7yv,2,Visionworks
2019-01-01,223-222@62k-rdk-87q,13,CVS


### Reading the Data for Local Reviews of all brands having social data

In [6]:
with open('brand_visit_local_reviews.pickle', 'rb') as file:
    brand_visit_local_reviews = pickle.load(file)

brand_visit_local_reviews = brand_visit_local_reviews.drop(columns=['spend_by_day', 'lat', 'lon', 'brand_standard', 'ID', 'Name',
                                                                    'Genre', 'Type', 'Classification', 'Status', 'Name_Standard'])

brand_visit_local_reviews = brand_visit_local_reviews.sort_values('date').set_index('date')
brand_visit_local_reviews = brand_visit_local_reviews.fillna(0)
brand_visit_local_reviews.head()

Unnamed: 0_level_0,PLACEKEY,visits_by_day,brand_visitation,visits_past_60_days,visits_past_3_days,total_visits_across_stores_60_days,proportion_of_visits_60_days,localized_fb_reviews_60_days,localized_ig_reviews_60_days,localized_tw_reviews_60_days
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019-01-01,zzw-225@62j-sgb-ygk,15,The Container Store,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-01-01,222-222@62j-sc2-yn5,6,Staples,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-01-01,zzw-222@62j-t2c-f4v,11,Burger King,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-01-01,222-222@62j-sdt-45f,12,Staples,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-01-01,229-223@62j-r5p-66k,10,Staples,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
brand_visit_local_reviews_list = brand_visit_local_reviews['brand_visitation'].unique().tolist()

### Reading the neib distance for all the neighbors

In [9]:
with open('neib_distance_km_brand.pkl', 'rb') as file:
    neib_distance_km = pickle.load(file)

# Excluding all the brands having more than 1 mile distance since we don't need them anyways for second-degree neighbor calculations
neib_distance_km = neib_distance_km[neib_distance_km['Distance_Km']<=16.0934]
neib_distance_km.head()

Unnamed: 0,SRC_PLACEKEY,DST_PLACEKEY,Distance_Km,DST_BRAND
0,222-222@628-zxy-rc5,222-222@628-zxy-rc5,0.0,CVS
1,222-222@628-zxy-rc5,222-222@628-zxy-tn5,0.255103,McDonald's
2,222-222@628-zxy-rc5,222-222@628-zy4-89z,3.765942,United States Postal Service (USPS)
3,222-222@628-zxy-rc5,222-222@628-zz4-c5z,3.790174,Wendy's
4,222-222@628-zxy-rc5,222-222@628-zzp-f2k,5.166696,Hilton Garden Inn


### Reading Spatial Distance

In [10]:
distance_results = pd.read_csv('data/distance_results.csv')
distance_results.head()

Unnamed: 0,From_PLACEKEY,To_PLACEKEY,Distance_km
0,zzw-223@62j-pth-zs5,zzw-222@62j-sgj-q2k,27.251127
1,zzw-223@62j-pth-zs5,223-222@62k-phk-vfz,36.776232
2,zzw-223@62j-pth-zs5,222-223@62k-r7z-m49,53.38983
3,zzw-223@62j-pth-zs5,zzw-223@62j-pth-zs5,0.0
4,zzw-223@62j-pth-zs5,22c-222@62k-pq5-grk,100.139331


Checking if only the focal brands are present in the FROM_Placekey

In [11]:
from_place_key_distance_brands = brands_visits[brands_visits['PLACEKEY'].isin(distance_results['From_PLACEKEY'].to_list())]['brand_visitation'].unique()
from_place_key_distance_brands

array(['Target', 'Walmart', 'Anthropologie', 'Olive Garden',
       "Victoria's Secret", 'ULTA Beauty', 'Sephora',
       'The Cheesecake Factory'], dtype=object)

Checking the unique brands present in TO_Placekey

In [12]:
to_place_key_distance_brands = brands_visits[brands_visits['PLACEKEY'].isin(distance_results['To_PLACEKEY'].to_list())]['brand_visitation'].unique()

### Reading Travel Time

In [13]:
with open('data/travel_time.pkl', 'rb') as file:
    travel_time_dict = pickle.load(file)
    
travel_time_keys = list(travel_time_dict.keys())
from_keys = [key[0] for key in travel_time_keys]
to_keys = [key[1] for key in travel_time_keys]
time_minutes = list(travel_time_dict.values())
time_minutes = [int(time_inst.split(' ')[0]) for time_inst in time_minutes]

travel_time = pd.DataFrame({'From_PLACEKEY': from_keys, 'To_PLACEKEY': to_keys, 'Time_mins': time_minutes})
travel_time.head()

Unnamed: 0,From_PLACEKEY,To_PLACEKEY,Time_mins
0,zzw-224@62k-p96-s5z,zzw-223@62k-ns4-pn5,20
1,zzw-224@62k-p96-s5z,zzy-222@62k-pd8-975,20
2,zzw-224@62k-p96-s5z,237-222@62k-p8v-z4v,16
3,zzw-224@62k-p96-s5z,222-222@62k-p8v-2p9,12
4,zzw-224@62k-p96-s5z,229-222@62k-p76-d9z,14


Checking if only the focal brands are present in the FROM_Placekey

In [14]:
from_place_key_time_brands = brands_visits[brands_visits['PLACEKEY'].isin(travel_time['From_PLACEKEY'].to_list())]['brand_visitation'].unique()
from_place_key_time_brands

array(['Target', 'Walmart', 'Anthropologie', 'Olive Garden',
       "Victoria's Secret", 'ULTA Beauty', 'Sephora',
       'The Cheesecake Factory'], dtype=object)

Checking the unique brands present in TO_Placekey

In [15]:
to_place_key_time_brands = brands_visits[brands_visits['PLACEKEY'].isin(travel_time['To_PLACEKEY'].to_list())]['brand_visitation'].unique()

### Comparing the brands in distance values and time values

In [16]:
from_place_key_distance_brands.sort() == from_place_key_time_brands.sort()

True

In [17]:
to_place_key_distance_brands.sort() == to_place_key_time_brands.sort()

True

It seems like both distance and time data have same brands in both the placekey and tokey values.

In [18]:
print(len(to_place_key_distance_brands))
print(len(to_place_key_time_brands))

418
418


Since the total brands are only 418 which is close to what the brands we have containing local reviews (427), let's see if these are the same brands which are present in thos brands

In [19]:
local_reviews_brands = brand_visit_local_reviews['brand_visitation'].unique()

In [20]:
print('Total Constant brands between to_placekey and brands having local reviews:', len(set(local_reviews_brands).intersection(set(to_place_key_distance_brands))))

Total Constant brands between to_placekey and brands having local reviews: 223


Since almost half of the brands don't have local reviews, so we are only going to consider the Neighboring brands from distance df for all the focal brands

Let's see if the to_placekey contains any focal brand or not

In [21]:
set(from_place_key_distance_brands).intersection(set(to_place_key_distance_brands))

{'Anthropologie',
 'Olive Garden',
 'Sephora',
 'Target',
 'The Cheesecake Factory',
 'ULTA Beauty',
 "Victoria's Secret",
 'Walmart'}

So, now its clear that to_placekey also contains the focal brands. Now, we can go ahead and simply consider all the to_brands as the neighboring brands and from_key as the focal brands

### Performing First Degree Neighbor Calculations

Let's see all the focal brands we have

In [22]:
focal_brands_list = focal_brands['BRANDS'].tolist()
focal_brands_list

['Sephora',
 'ULTA Beauty',
 'Olive Garden',
 'The Cheesecake Factory',
 'Target',
 'Walmart',
 'Anthropologie',
 "Victoria's Secret"]

Select a focal brand and then extract all the PlaceKeys for this focal brand

In [44]:
foc_brand = focal_brands_list[5]
foc_brand

'Walmart'

Getting all the store keys for this specific focal brand

In [45]:
store_keys_foc_brand = brand_visit_local_reviews[brand_visit_local_reviews['brand_visitation'] == foc_brand]['PLACEKEY'].unique().tolist()
store_keys_foc_brand

['222-222@62k-rdm-4gk',
 '222-222@62k-r3z-dqf',
 'zzw-223@62k-qz8-swk',
 '222-222@62k-pg4-qxq',
 '222-225@62k-qbv-gx5',
 'zzw-222@65y-xy5-7wk',
 'zzw-224@62k-9dz-syv',
 'zzw-224@62k-r9f-rx5',
 '222-222@62k-26t-zs5',
 'zzw-222@62k-85y-nwk',
 'zzw-225@62k-nvh-g8v',
 'zzw-222@629-2h4-rff',
 'zzw-223@629-2px-mx5',
 '222-223@62k-pq4-v9f',
 '222-224@62k-c36-28v',
 'zzw-222@62k-r6t-p9z',
 '222-222@62k-pcd-q75',
 '222-222@65y-yd8-kpv',
 'zzw-222@62k-362-jn5',
 'zzw-223@62k-c4w-xt9',
 '222-223@62k-qwp-8gk',
 '222-222@62j-src-52k',
 '222-226@62k-pqf-wkz',
 '222-222@62j-sth-zpv',
 '222-223@62k-p7f-qpv',
 '222-222@62k-jf5-g49',
 'zzw-223@62k-f2m-94v',
 'zzw-223@62j-t24-x5z',
 '222-222@62j-ptf-ht9',
 '222-223@62k-p8n-hyv',
 'zzw-222@62k-f5p-pqf',
 'zzw-223@62j-t3n-5mk',
 '222-222@629-2g2-v4v',
 'zzw-224@62k-p96-s5z',
 '222-223@62k-qgf-ghq',
 '222-224@62k-2hd-66k',
 '222-223@62j-sxw-nkf',
 'zzw-222@62j-rjt-ct9',
 'zzw-223@62j-t4y-zcq',
 'zzw-223@62k-nry-z75',
 '222-226@62k-dwd-j7q',
 'zzw-224@62k-r8

Getting all the neighboring keys for each store of focal brand

In [46]:
all_neib_placekey = distance_results[distance_results['From_PLACEKEY'].isin(store_keys_foc_brand)]['To_PLACEKEY'].unique().tolist()
all_neib_placekey

['zzw-222@62j-sgj-q2k',
 '223-222@62k-phk-vfz',
 '222-223@62k-r7z-m49',
 'zzw-223@62j-pth-zs5',
 '22c-222@62k-pq5-grk',
 '222-222@62k-qww-g8v',
 '223-222@62k-ny3-hnq',
 'zzw-223@62j-stx-cyv',
 '222-222@62k-pgd-psq',
 '224-222@62k-nr8-rzf',
 'zzw-222@62j-ptp-yn5',
 '24c-222@62j-rk6-t9z',
 'zzw-222@62k-qwn-rhq',
 '223-222@62k-r84-qmk',
 '226-222@62j-rjp-z75',
 '222-223@62k-p8n-hyv',
 'zzw-222@62j-smd-z9f',
 '228-222@628-zzx-t9z',
 '22f-222@62k-p67-j35',
 'zzy-223@62j-y9n-f9f',
 'zzw-222@62j-scj-ygk',
 'zzy-225@629-4n4-gkz',
 '27d-222@62j-shy-whq',
 '223-222@62k-qbv-f9f',
 '22b-222@62j-t2f-snq',
 'zzy-222@62k-p9m-xbk',
 'zzy-222@62j-sj2-75z',
 'zzw-222@62k-qv7-bhq',
 '223-224@62k-nsg-6p9',
 '22k-222@62j-pss-4jv',
 '223-223@62j-sjx-rhq',
 '224-22s@62j-sbz-vxq',
 '224-223@62j-sym-gkz',
 'zzy-222@62j-ptn-4sq',
 '224-222@62k-3p3-z2k',
 'zzw-227@62k-pzn-m6k',
 '22d-222@62j-shy-whq',
 'zzw-222@62k-3q5-s89',
 'zzw-222@62j-sth-xbk',
 '223-222@62k-3p8-zmk',
 '222-222@629-2g2-tn5',
 '224-223@62j-sh

Getting Unique Neighbors for all stores of the focal brand

In [47]:
unique_neib_brands_foc = brands_visits[brands_visits['PLACEKEY'].isin(all_neib_placekey)]['brand_visitation'].unique().tolist()

In [48]:
len(unique_neib_brands_foc)

418

1. For each Unique Neighboring Brand, get all the placekeys
2. Filter them as per Neib PlaceKeys for all the focal stores
3. For each focal store, calculate first neib metrics (with local reviews if it is present in local_reviews, else only calculate visits from brand_visits data)

In [50]:
unique_neib = unique_neib_brands_foc[362]
unique_neib

'SpeeDee Oil Change & Auto Service'

In [51]:
unique_neib_placekeys = brands_visits[brands_visits['brand_visitation'] == unique_neib]['PLACEKEY'].unique().tolist()
unique_neib_placekeys

['222-226@62k-prt-gzf',
 '225-223@62k-85z-7nq',
 '222-222@62k-f55-5mk',
 '223-222@62k-nyq-92k']

In [52]:
unique_neib_placekeys

['222-226@62k-prt-gzf',
 '225-223@62k-85z-7nq',
 '222-222@62k-f55-5mk',
 '223-222@62k-nyq-92k']

In [53]:
focal_stores_first_degree_neib = distance_results[(distance_results['From_PLACEKEY'].isin(store_keys_foc_brand)) &
                                                (distance_results['To_PLACEKEY'].isin(unique_neib_placekeys)) &
                                                (distance_results['Distance_km']<=16.0934)]
focal_stores_first_degree_neib

Unnamed: 0,From_PLACEKEY,To_PLACEKEY,Distance_km
28504,222-225@62k-qbv-gx5,222-226@62k-prt-gzf,5.406999
84910,zzw-222@62k-f5p-pqf,222-222@62k-f55-5mk,1.828422
504356,222-222@62k-jd2-9xq,223-222@62k-nyq-92k,9.11784
581208,222-223@62k-pq4-v9f,222-226@62k-prt-gzf,6.562793
1415244,zzw-222@62k-85y-nwk,225-223@62k-85z-7nq,1.991561


In [157]:
store_keys_foc_brand_filtered = focal_stores_first_degree_neib['From_PLACEKEY'].unique().tolist()
foc_store = store_keys_foc_brand_filtered[4]
foc_store

'zzw-222@62k-85y-nwk'

Calculating all the brands for the stores which can be categorized as the first degreen neighbors. All the stores of such brands need to be excluded from second degree neighbor calculation

In [158]:
foc_store_all_first_degree_neibs = distance_results[(distance_results['From_PLACEKEY'] == foc_store) & (distance_results['Distance_km'] <= 16.0934)]['To_PLACEKEY'].to_list()
foc_store_all_first_degree_neibs_brands = brands_visits[brands_visits['PLACEKEY'].isin(foc_store_all_first_degree_neibs)]['brand_visitation'].unique().tolist()

In [159]:
foc_store_first_degree_neibs = focal_stores_first_degree_neib[focal_stores_first_degree_neib['From_PLACEKEY'] == foc_store]['To_PLACEKEY'].to_list()
foc_store_first_degree_neibs

['225-223@62k-85z-7nq']

In [160]:
foc_store_distance = distance_results[(distance_results['From_PLACEKEY'] == foc_store) & (distance_results['To_PLACEKEY'].isin(foc_store_first_degree_neibs))]
foc_store_distance

Unnamed: 0,From_PLACEKEY,To_PLACEKEY,Distance_km
1415244,zzw-222@62k-85y-nwk,225-223@62k-85z-7nq,1.991561


In [161]:
spatial_distance_avg = np.average(foc_store_distance['Distance_km'].to_list())
spatial_distance_avg

np.float64(1.9915605708401896)

In [162]:
foc_store_time = travel_time[(travel_time['From_PLACEKEY'] == foc_store) & (travel_time['To_PLACEKEY'].isin(foc_store_first_degree_neibs))]
foc_store_time

Unnamed: 0,From_PLACEKEY,To_PLACEKEY,Time_mins
42773,zzw-222@62k-85y-nwk,225-223@62k-85z-7nq,4


In [163]:
travel_distance_avg = np.average(foc_store_time['Time_mins'].to_list())
travel_distance_avg

np.float64(4.0)

If the unique neighbour in question is has the local reviews, then we need to calculate local neib metrics else only simple visits will be needed

In [164]:
def calculate_first_neib_mean_reviews_visits(group_df):
    
    inv_visits = 0
    inv_visits_exp = 0
    
    num_reviews_fb_neibmean = 0
    num_reviews_ig_neibmean = 0
    num_reviews_tw_neibmean = 0

    num_reviews_fb_neibmean_exp = 0
    num_reviews_ig_neibmean_exp = 0
    num_reviews_tw_neibmean_exp = 0
    
    foc_store_first_degree_neibs_time = foc_store_time['To_PLACEKEY'].to_list()
    
    for neib_store in foc_store_first_degree_neibs_time:
        neib_store_time = foc_store_time[(foc_store_time['From_PLACEKEY'] == foc_store) & (foc_store_time['To_PLACEKEY'] == neib_store)]['Time_mins'].values[0]
        neib_store_time = neib_store_time/60 #Converting from minutes to hours

        neib_store_reviews = group_df[group_df['PLACEKEY'] == neib_store][['localized_fb_reviews_60_days',
                                                                            'localized_ig_reviews_60_days',
                                                                            'localized_tw_reviews_60_days',
                                                                            'visits_by_day']]
        
        fb_reviews = neib_store_reviews['localized_fb_reviews_60_days']
        if (len(fb_reviews) != 0):
            num_reviews_fb_neibmean += (1/neib_store_time) * fb_reviews.values[0]
            num_reviews_fb_neibmean_exp += (1/np.exp(neib_store_time)) * fb_reviews.values[0]
        
        ig_reviews = neib_store_reviews['localized_ig_reviews_60_days']
        if (len(ig_reviews) != 0):
            num_reviews_ig_neibmean += (1/neib_store_time) * ig_reviews.values[0]
            num_reviews_ig_neibmean_exp += (1/np.exp(neib_store_time)) * ig_reviews.values[0]
        
        tw_reviews = neib_store_reviews['localized_tw_reviews_60_days']
        if (len(tw_reviews) != 0):
            num_reviews_tw_neibmean += (1/neib_store_time) * tw_reviews.values[0]
            num_reviews_tw_neibmean_exp += (1/np.exp(neib_store_time)) * tw_reviews.values[0]
        
        visits = neib_store_reviews['visits_by_day']
        if (len(visits) != 0):
            inv_visits += (1/neib_store_time) * visits.values[0]
            inv_visits_exp += (1/np.exp(neib_store_time)) * visits.values[0]
    
    return pd.Series([foc_store, inv_visits, num_reviews_fb_neibmean, num_reviews_ig_neibmean, num_reviews_tw_neibmean,
                      inv_visits_exp, num_reviews_fb_neibmean_exp, num_reviews_ig_neibmean_exp, num_reviews_tw_neibmean_exp])

In [165]:
def calculate_first_neib_visits(group_df):
    
    inv_visits = 0
    inv_visits_exp = 0
    
    num_reviews_fb_neibmean = 0
    num_reviews_ig_neibmean = 0
    num_reviews_tw_neibmean = 0

    num_reviews_fb_neibmean_exp = 0
    num_reviews_ig_neibmean_exp = 0
    num_reviews_tw_neibmean_exp = 0
    
    foc_store_first_degree_neibs_time = foc_store_time['To_PLACEKEY'].to_list()
    
    for neib_store in foc_store_first_degree_neibs_time:
        neib_store_time = foc_store_time[(foc_store_time['From_PLACEKEY'] == foc_store) & (foc_store_time['To_PLACEKEY'] == neib_store)]['Time_mins'].values[0]
        neib_store_time = neib_store_time/60 #Converting from minutes to hours
        
        visits = group_df[group_df['PLACEKEY'] == neib_store]['visits_by_day']
        if (len(visits) != 0):
            inv_visits += (1/neib_store_time) * visits.values[0]
            inv_visits_exp += (1/np.exp(neib_store_time)) * visits.values[0]
    
    return pd.Series([foc_store, inv_visits, num_reviews_fb_neibmean, num_reviews_ig_neibmean, num_reviews_tw_neibmean,
                      inv_visits_exp, num_reviews_fb_neibmean_exp, num_reviews_ig_neibmean_exp, num_reviews_tw_neibmean_exp])

In [166]:
if unique_neib in brand_visit_local_reviews_list:
    first_neib_metrics = brand_visit_local_reviews[brand_visit_local_reviews['PLACEKEY'].isin(foc_store_first_degree_neibs)][['PLACEKEY', 'visits_by_day','localized_fb_reviews_60_days','localized_ig_reviews_60_days', 
                                                                                                                                'localized_tw_reviews_60_days']].groupby('date').apply(calculate_first_neib_mean_reviews_visits)
else:
    first_neib_metrics = brands_visits[brands_visits['PLACEKEY'].isin(foc_store_first_degree_neibs)][['PLACEKEY', 'visits_by_day']].groupby('date').apply(calculate_first_neib_visits)

In [167]:
first_neib_metrics = first_neib_metrics.rename(columns={0:'focal_store', 1:'inv_visits', 2: 'num_reviews_fb_neibmean', 3:'num_reviews_ig_neibmean', 4:'num_reviews_tw_neibmean',
                                                        5:'inv_visits_exp', 6:'num_reviews_fb_neibmean_exp', 7: 'num_reviews_ig_neibmean_exp', 8: 'num_reviews_tw_neibmean_exp'})
first_neib_metrics.head()

Unnamed: 0_level_0,focal_store,inv_visits,num_reviews_fb_neibmean,num_reviews_ig_neibmean,num_reviews_tw_neibmean,inv_visits_exp,num_reviews_fb_neibmean_exp,num_reviews_ig_neibmean_exp,num_reviews_tw_neibmean_exp
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2019-01-01,zzw-222@62k-85y-nwk,60.0,0,0,0,3.742028,0,0,0
2019-01-02,zzw-222@62k-85y-nwk,75.0,0,0,0,4.677535,0,0,0
2019-01-03,zzw-222@62k-85y-nwk,105.0,0,0,0,6.548549,0,0,0
2019-01-04,zzw-222@62k-85y-nwk,135.0,0,0,0,8.419563,0,0,0
2019-01-05,zzw-222@62k-85y-nwk,135.0,0,0,0,8.419563,0,0,0


### Calculating metrics for second degree brands

In [168]:
foc_store

'zzw-222@62k-85y-nwk'

In [169]:
foc_store_first_degree_neibs

['225-223@62k-85z-7nq']

In [170]:
# Excluding first degree neighboring brand, all the first degree neighbors brand stores, and calculating the second degree neighbors (less than 1 mile)
second_neib_df = neib_distance_km[neib_distance_km['SRC_PLACEKEY'].isin(foc_store_first_degree_neibs) &
                                (~neib_distance_km['DST_BRAND'].isin(foc_store_all_first_degree_neibs_brands)) &
                                (neib_distance_km['DST_BRAND'] != unique_neib)]
second_neib_df

Unnamed: 0,SRC_PLACEKEY,DST_PLACEKEY,Distance_Km,DST_BRAND
44999607,225-223@62k-85z-7nq,222-222@62k-9hr-qxq,12.124441,United States Postal Service (USPS)
45002272,225-223@62k-85z-7nq,224-222@62k-856-mtv,2.537286,United States Postal Service (USPS)
45002816,225-223@62k-85z-7nq,225-222@62k-863-m6k,4.043825,United States Postal Service (USPS)
45006351,225-223@62k-85z-7nq,zzw-222@62k-f5s-nbk,15.532825,United States Postal Service (USPS)


In [173]:
second_neib_df_local_reviews = second_neib_df[second_neib_df['DST_BRAND'].isin(brand_visit_local_reviews_list)]
second_neib_df_local_reviews

Unnamed: 0,SRC_PLACEKEY,DST_PLACEKEY,Distance_Km,DST_BRAND


In [174]:
first_deg_stores = second_neib_df['SRC_PLACEKEY'].unique().tolist()
second_deg_stores = second_neib_df['DST_PLACEKEY'].unique().tolist()

In [175]:
first_deg_stores_local_reviews = second_neib_df_local_reviews['SRC_PLACEKEY'].unique().tolist()
second_deg_stores_local_reviews = second_neib_df_local_reviews['DST_PLACEKEY'].unique().tolist()

In [None]:
def calculate_second_neib_visits(group_df):
    
    inv_visits_secondneibmean = 0
    inv_visits_secondneibmean_exp = 0
    
    # Return these zeros in case there are no second degree neighboring stores which have reviews
    num_reviews_fb_secondneibmean = 0
    num_reviews_ig_secondneibmean = 0
    num_reviews_tw_secondneibmean = 0
    num_reviews_fb_secondneibmean_exp = 0
    num_reviews_ig_secondneibmean_exp = 0
    num_reviews_tw_secondneibmean_exp = 0
    
    for first_deg_store in first_deg_stores:
        inv_visits = 0
        inv_visits_exp = 0
        first_deg_store_dist = foc_store_distance[(foc_store_distance['From_PLACEKEY'] == foc_store) & (foc_store_distance['To_PLACEKEY'] == first_deg_store)]['Distance_km'].values[0]
        first_deg_store_dist = first_deg_store_dist/16.0934 #Divide by 10 miles to keep the value of distance between 0 and 1 
        
        second_deg_store_placekey = second_neib_df[second_neib_df['SRC_PLACEKEY'] == first_deg_store]['DST_PLACEKEY'].to_list()
        second_deg_store_distance = second_neib_df[second_neib_df['SRC_PLACEKEY'] == first_deg_store]['Distance_Km'].to_list()        
        
        for i in range(len(second_deg_store_placekey)):
            visits = group_df[group_df['PLACEKEY'] == second_deg_store_placekey[i]]['visits_by_day']
            if (len(visits) != 0):
                inv_visits += (1/second_deg_store_distance[i]) * visits.values[0]
                inv_visits_exp += (1/np.exp(second_deg_store_distance[i])) * visits.values[0]
        
        inv_visits_secondneibmean += (1/first_deg_store_dist) * inv_visits
        inv_visits_secondneibmean_exp += (1/np.exp(first_deg_store_dist)) * inv_visits_exp
    
    
    return pd.Series([foc_store, inv_visits_secondneibmean, inv_visits_secondneibmean_exp,
                    num_reviews_fb_secondneibmean, num_reviews_ig_secondneibmean, num_reviews_tw_secondneibmean,
                    num_reviews_fb_secondneibmean_exp, num_reviews_ig_secondneibmean_exp, num_reviews_tw_secondneibmean_exp])


In [177]:
second_neib_brand_stores = brands_visits[brands_visits['PLACEKEY'].isin(second_deg_stores)][['PLACEKEY', 'visits_by_day']]
second_neib_metrics_visits = second_neib_brand_stores.groupby('date').apply(calculate_second_neib_visits)
second_neib_metrics_visits = second_neib_metrics_visits.rename(columns={0: 'focal_store', 1:'inv_visits_secondneibmean', 2:'inv_visits_secondneibmean_exp',
                                                                        3:'num_reviews_fb_secondneibmean', 4:'num_reviews_ig_secondneibmean',
                                                                        5: 'num_reviews_tw_secondneibmean', 6: 'num_reviews_fb_secondneibmean_exp',
                                                                        7: 'num_reviews_ig_secondneibmean_exp', 8: 'num_reviews_tw_secondneibmean_exp'}).reset_index()

if len(second_neib_df_local_reviews) != 0: # only take the first 4 columns if second neighbors exist having local reviews
    second_neib_metrics_visits = second_neib_metrics_visits.iloc[:,0:4]
    
second_neib_metrics_visits.head()

Unnamed: 0,date,focal_store,inv_visits_secondneibmean,inv_visits_secondneibmean_exp,num_reviews_fb_secondneibmean,num_reviews_ig_secondneibmean,num_reviews_tw_secondneibmean,num_reviews_fb_secondneibmean_exp,num_reviews_ig_secondneibmean_exp,num_reviews_tw_secondneibmean_exp
0,2019-01-01,zzw-222@62k-85y-nwk,0.230222,0.010793,0,0,0,0,0,0
1,2019-01-02,zzw-222@62k-85y-nwk,0.842027,0.037164,0,0,0,0,0,0
2,2019-01-03,zzw-222@62k-85y-nwk,2.185898,0.104316,0,0,0,0,0,0
3,2019-01-04,zzw-222@62k-85y-nwk,1.559884,0.071936,0,0,0,0,0,0
4,2019-01-05,zzw-222@62k-85y-nwk,1.831507,0.09113,0,0,0,0,0,0


In [None]:
def calculate_second_neib_mean_reviews(group_df):
    
    num_reviews_fb_secondneibmean = 0
    num_reviews_ig_secondneibmean = 0
    num_reviews_tw_secondneibmean = 0
    num_reviews_fb_secondneibmean_exp = 0
    num_reviews_ig_secondneibmean_exp = 0
    num_reviews_tw_secondneibmean_exp = 0
    
    for first_deg_store in first_deg_stores_local_reviews:
        
        num_reviews_fb_neibmean = 0
        num_reviews_ig_neibmean = 0
        num_reviews_tw_neibmean = 0
        num_reviews_fb_neibmean_exp = 0
        num_reviews_ig_neibmean_exp = 0
        num_reviews_tw_neibmean_exp = 0
        
        first_deg_store_dist = foc_store_distance[(foc_store_distance['From_PLACEKEY'] == foc_store) & (foc_store_distance['To_PLACEKEY'] == first_deg_store)]['Distance_km'].values[0]
        first_deg_store_dist = first_deg_store_dist/16.0934 #Divide by 10 miles to keep the value of distance between 0 and 1
        
        second_deg_store_placekey = second_neib_df_local_reviews[second_neib_df_local_reviews['SRC_PLACEKEY'] == first_deg_store]['DST_PLACEKEY'].to_list()
        second_deg_store_distance = second_neib_df_local_reviews[second_neib_df_local_reviews['SRC_PLACEKEY'] == first_deg_store]['Distance_Km'].to_list()
        
        for i in range(len(second_deg_store_placekey)):
            second_neib_brand_stores_local_reviews = group_df[group_df['PLACEKEY'] == second_deg_store_placekey[i]][['localized_fb_reviews_60_days',
                                                                                                                 'localized_ig_reviews_60_days',
                                                                                                                 'localized_tw_reviews_60_days']]
            fb_reviews = second_neib_brand_stores_local_reviews['localized_fb_reviews_60_days']
            if (len(fb_reviews) != 0):
                num_reviews_fb_neibmean += (1/second_deg_store_distance[i]) * fb_reviews.values[0]
                num_reviews_fb_neibmean_exp += (1/np.exp(second_deg_store_distance[i])) * fb_reviews.values[0]
            
            ig_reviews = second_neib_brand_stores_local_reviews['localized_ig_reviews_60_days']
            if (len(ig_reviews) != 0):
                num_reviews_ig_neibmean += (1/second_deg_store_distance[i]) * ig_reviews.values[0]
                num_reviews_ig_neibmean_exp += (1/np.exp(second_deg_store_distance[i])) * ig_reviews.values[0]
            
            tw_reviews = second_neib_brand_stores_local_reviews['localized_tw_reviews_60_days']
            if (len(tw_reviews) != 0):
                num_reviews_tw_neibmean += (1/second_deg_store_distance[i]) * tw_reviews.values[0]
                num_reviews_tw_neibmean_exp += (1/np.exp(second_deg_store_distance[i])) * tw_reviews.values[0]

        num_reviews_fb_secondneibmean += (1/first_deg_store_dist) * num_reviews_fb_neibmean
        num_reviews_ig_secondneibmean += (1/first_deg_store_dist) * num_reviews_ig_neibmean
        num_reviews_tw_secondneibmean += (1/first_deg_store_dist) * num_reviews_tw_neibmean
        
        num_reviews_fb_secondneibmean_exp += (1/np.exp(first_deg_store_dist)) * num_reviews_fb_neibmean_exp
        num_reviews_ig_secondneibmean_exp += (1/np.exp(first_deg_store_dist)) * num_reviews_ig_neibmean_exp
        num_reviews_tw_secondneibmean_exp += (1/np.exp(first_deg_store_dist)) * num_reviews_tw_neibmean_exp
    
    return pd.Series([foc_store, num_reviews_fb_secondneibmean, num_reviews_ig_secondneibmean, num_reviews_tw_secondneibmean, 
                      num_reviews_fb_secondneibmean_exp, num_reviews_ig_secondneibmean_exp, num_reviews_tw_secondneibmean_exp])

In [179]:
second_neib_brand_stores_local_reviews = brand_visit_local_reviews[brand_visit_local_reviews['PLACEKEY'].isin(second_deg_stores_local_reviews)][['PLACEKEY',
                                                                                                                                                 'localized_fb_reviews_60_days',
                                                                                                                                                 'localized_ig_reviews_60_days', 
                                                                                                                                                 'localized_tw_reviews_60_days']]
second_neib_metrics_local_reviews = second_neib_brand_stores_local_reviews.groupby('date').apply(calculate_second_neib_mean_reviews)

second_neib_metrics_local_reviews = second_neib_metrics_local_reviews.rename(columns={0: 'focal_store', 1:'num_reviews_fb_secondneibmean', 2:'num_reviews_ig_secondneibmean',
                                                                                      3: 'num_reviews_tw_secondneibmean', 4: 'num_reviews_fb_secondneibmean_exp',
                                                                                      5: 'num_reviews_ig_secondneibmean_exp', 6: 'num_reviews_tw_secondneibmean_exp'}).reset_index()
second_neib_metrics_local_reviews.head()

Unnamed: 0,date,PLACEKEY,localized_fb_reviews_60_days,localized_ig_reviews_60_days,localized_tw_reviews_60_days


In [180]:
if len(second_neib_df_local_reviews) != 0: # merge both the visits if local reviews data exist, else, just take the values with zero reviews stored in second_neib_metrics_visits
    second_neib_metrics = pd.merge(left=second_neib_metrics_visits, right=second_neib_metrics_local_reviews, how='inner', on=['date', 'focal_store'])
else:
    second_neib_metrics = second_neib_metrics_visits
    
second_neib_metrics.head()

Unnamed: 0,date,focal_store,inv_visits_secondneibmean,inv_visits_secondneibmean_exp,num_reviews_fb_secondneibmean,num_reviews_ig_secondneibmean,num_reviews_tw_secondneibmean,num_reviews_fb_secondneibmean_exp,num_reviews_ig_secondneibmean_exp,num_reviews_tw_secondneibmean_exp
0,2019-01-01,zzw-222@62k-85y-nwk,0.230222,0.010793,0,0,0,0,0,0
1,2019-01-02,zzw-222@62k-85y-nwk,0.842027,0.037164,0,0,0,0,0,0
2,2019-01-03,zzw-222@62k-85y-nwk,2.185898,0.104316,0,0,0,0,0,0
3,2019-01-04,zzw-222@62k-85y-nwk,1.559884,0.071936,0,0,0,0,0,0
4,2019-01-05,zzw-222@62k-85y-nwk,1.831507,0.09113,0,0,0,0,0,0


### Extracting metrics for the specific focal store in terms of local reviews

In [181]:
brand_visit_local_reviews

Unnamed: 0_level_0,PLACEKEY,visits_by_day,brand_visitation,visits_past_60_days,visits_past_3_days,total_visits_across_stores_60_days,proportion_of_visits_60_days,localized_fb_reviews_60_days,localized_ig_reviews_60_days,localized_tw_reviews_60_days
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019-01-01,zzw-225@62j-sgb-ygk,15,The Container Store,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000
2019-01-01,222-222@62j-sc2-yn5,6,Staples,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000
2019-01-01,zzw-222@62j-t2c-f4v,11,Burger King,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000
2019-01-01,222-222@62j-sdt-45f,12,Staples,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000
2019-01-01,229-223@62j-r5p-66k,10,Staples,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...
2019-12-31,zzw-22k@629-2rt-cyv,598,PacSun,41529.0,2262.0,224813.0,0.184727,20.135228,794.879687,0.369454
2019-12-31,222-222@62k-9mh-3bk,3,Dunkin',330.0,14.0,1647756.0,0.000200,0.000000,0.211287,0.005007
2019-12-31,223-222@62k-jf9-54v,11,Ace Hardware,363.0,23.0,25639.0,0.014158,0.000000,0.000000,0.000000
2019-12-31,zzw-223@62j-t2f-t35,21,Pottery Barn,1311.0,108.0,48763.0,0.026885,1.532453,19.195989,0.026885


In [182]:
# Extracting Local Review Information for the focal store
focal_store_information = brand_visit_local_reviews[brand_visit_local_reviews['PLACEKEY'] == foc_store][['PLACEKEY', 'brand_visitation', 'visits_by_day', 'visits_past_60_days',
                                                                                                         'localized_fb_reviews_60_days', 'localized_ig_reviews_60_days', 'localized_tw_reviews_60_days']]

In [183]:
# Combining distance metrics with the focal store info
focal_store_information['spatial_distance_km'] = spatial_distance_avg
focal_store_information['travel_distance_min'] = travel_distance_avg

In [184]:
focal_store_information

Unnamed: 0_level_0,PLACEKEY,brand_visitation,visits_by_day,visits_past_60_days,localized_fb_reviews_60_days,localized_ig_reviews_60_days,localized_tw_reviews_60_days,spatial_distance_km,travel_distance_min
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2019-01-01,zzw-222@62k-85y-nwk,Walmart,58,0.0,0.000000,0.000000,0.000000,1.991561,4.0
2019-01-02,zzw-222@62k-85y-nwk,Walmart,51,0.0,0.000000,0.000000,0.000000,1.991561,4.0
2019-01-03,zzw-222@62k-85y-nwk,Walmart,64,0.0,0.000000,0.000000,0.000000,1.991561,4.0
2019-01-04,zzw-222@62k-85y-nwk,Walmart,69,0.0,0.000000,0.000000,0.000000,1.991561,4.0
2019-01-05,zzw-222@62k-85y-nwk,Walmart,63,0.0,0.000000,0.000000,0.000000,1.991561,4.0
...,...,...,...,...,...,...,...,...,...
2019-12-27,zzw-222@62k-85y-nwk,Walmart,80,4670.0,0.525877,4.099891,0.652477,1.991561,4.0
2019-12-28,zzw-222@62k-85y-nwk,Walmart,88,4660.0,1.438992,0.204181,0.311133,1.991561,4.0
2019-12-29,zzw-222@62k-85y-nwk,Walmart,92,4679.0,0.349762,0.252606,0.194312,1.991561,4.0
2019-12-30,zzw-222@62k-85y-nwk,Walmart,31,4703.0,0.194903,5.242902,0.292355,1.991561,4.0


In [185]:
# Combining first and second degree neighbor metrics with the focal store info
focal_store_information = pd.merge(left=focal_store_information, right=first_neib_metrics, how='inner', on=['date']).drop('focal_store', axis=1)
focal_store_information = pd.merge(left=focal_store_information, right=second_neib_metrics, how='inner', on=['date']).drop('focal_store', axis=1)
focal_store_information = focal_store_information.sort_values('date').fillna(0)

In [186]:
focal_store_information.head()

Unnamed: 0,date,PLACEKEY,brand_visitation,visits_by_day,visits_past_60_days,localized_fb_reviews_60_days,localized_ig_reviews_60_days,localized_tw_reviews_60_days,spatial_distance_km,travel_distance_min,...,num_reviews_ig_neibmean_exp,num_reviews_tw_neibmean_exp,inv_visits_secondneibmean,inv_visits_secondneibmean_exp,num_reviews_fb_secondneibmean,num_reviews_ig_secondneibmean,num_reviews_tw_secondneibmean,num_reviews_fb_secondneibmean_exp,num_reviews_ig_secondneibmean_exp,num_reviews_tw_secondneibmean_exp
0,2019-01-01,zzw-222@62k-85y-nwk,Walmart,58,0.0,0.0,0.0,0.0,1.991561,4.0,...,0,0,0.230222,0.010793,0,0,0,0,0,0
1,2019-01-02,zzw-222@62k-85y-nwk,Walmart,51,0.0,0.0,0.0,0.0,1.991561,4.0,...,0,0,0.842027,0.037164,0,0,0,0,0,0
2,2019-01-03,zzw-222@62k-85y-nwk,Walmart,64,0.0,0.0,0.0,0.0,1.991561,4.0,...,0,0,2.185898,0.104316,0,0,0,0,0,0
3,2019-01-04,zzw-222@62k-85y-nwk,Walmart,69,0.0,0.0,0.0,0.0,1.991561,4.0,...,0,0,1.559884,0.071936,0,0,0,0,0,0
4,2019-01-05,zzw-222@62k-85y-nwk,Walmart,63,0.0,0.0,0.0,0.0,1.991561,4.0,...,0,0,1.831507,0.09113,0,0,0,0,0,0
