In [37]:
import pickle
import pandas as pd
import numpy as np
from tqdm import tqdm
import datetime
from geopy.distance import geodesic

### Reading all the focal brands

In [38]:
with open('top_brands.pickle', 'rb') as file:
    focal_brands = pickle.load(file)

focal_brands

Unnamed: 0,SUB_CATEGORY,BRANDS,Total_Spend,Total_Visits,Unique_PLACEKEY_Count
0,"Cosmetics, Beauty Supplies, and Perfume Stores",Sephora,1913415.32,702927,15
1,"Cosmetics, Beauty Supplies, and Perfume Stores",ULTA Beauty,1339844.15,404546,20
2,Full-Service Restaurants,Olive Garden,1058844.15,125651,15
3,Full-Service Restaurants,The Cheesecake Factory,974090.35,473745,5
4,Warehouse Clubs and Supercenters,Target,41977753.97,2053582,50
5,Warehouse Clubs and Supercenters,Walmart,33473235.0,2815949,47
6,Women's Clothing Stores,Anthropologie,840473.95,327685,7
7,Women's Clothing Stores,Victoria's Secret,662200.98,295606,14


### Reading the visitation data for all the brands

In [39]:
brands_visits = pd.read_csv('data/revision_visits_revenue_2019.csv')
brands_visits['brand_standard'] = brands_visits['brand'].apply(lambda x: x.strip().lower()) # For comparison with catalog.tsv
brands_visits['date'] = brands_visits['date'].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d').date())
brands_visits = brands_visits.rename(columns={'brand': 'brand_visitation'})
brands_visits.head()

Unnamed: 0,date,PLACEKEY,visits_by_day,spend_by_day,brand_visitation,lat,lon,brand_standard
0,2019-06-01,zzw-222@62j-sgj-q2k,5,0.0,Orangetheory Fitness,42.350592,-71.153024,orangetheory fitness
1,2019-06-02,zzw-222@62j-sgj-q2k,1,0.0,Orangetheory Fitness,42.350592,-71.153024,orangetheory fitness
2,2019-06-03,zzw-222@62j-sgj-q2k,6,859.0,Orangetheory Fitness,42.350592,-71.153024,orangetheory fitness
3,2019-06-04,zzw-222@62j-sgj-q2k,6,30.0,Orangetheory Fitness,42.350592,-71.153024,orangetheory fitness
4,2019-06-05,zzw-222@62j-sgj-q2k,8,193.0,Orangetheory Fitness,42.350592,-71.153024,orangetheory fitness


### Reading the Data for Local Reviews of all brands having social data

In [40]:
with open('brand_visit_local_reviews.pickle', 'rb') as file:
    brand_visit_local_reviews = pickle.load(file)

brand_visit_local_reviews.head()
brand_visit_local_reviews_list = brand_visit_local_reviews['brand_visitation'].unique().tolist()

### Reading Spatial Distance

In [41]:
distance_results = pd.read_csv('data/distance_results.csv')
distance_results.head()

Unnamed: 0,From_PLACEKEY,To_PLACEKEY,Distance_km
0,zzw-223@62j-pth-zs5,zzw-222@62j-sgj-q2k,27.251127
1,zzw-223@62j-pth-zs5,223-222@62k-phk-vfz,36.776232
2,zzw-223@62j-pth-zs5,222-223@62k-r7z-m49,53.38983
3,zzw-223@62j-pth-zs5,zzw-223@62j-pth-zs5,0.0
4,zzw-223@62j-pth-zs5,22c-222@62k-pq5-grk,100.139331


Checking if only the focal brands are present in the FROM_Placekey

In [42]:
from_place_key_distance_brands = brands_visits[brands_visits['PLACEKEY'].isin(distance_results['From_PLACEKEY'].to_list())]['brand_visitation'].unique()
from_place_key_distance_brands

array(['ULTA Beauty', 'Walmart', 'Target', 'Sephora',
       'The Cheesecake Factory', 'Olive Garden', "Victoria's Secret",
       'Anthropologie'], dtype=object)

Checking the unique brands present in TO_Placekey

In [43]:
to_place_key_distance_brands = brands_visits[brands_visits['PLACEKEY'].isin(distance_results['To_PLACEKEY'].to_list())]['brand_visitation'].unique()

### Reading Travel Time

In [44]:
with open('data/travel_time.pkl', 'rb') as file:
    travel_time_dict = pickle.load(file)
    
travel_time_keys = list(travel_time_dict.keys())
from_keys = [key[0] for key in travel_time_keys]
to_keys = [key[1] for key in travel_time_keys]
time_minutes = list(travel_time_dict.values())
time_minutes = [int(time_inst.split(' ')[0]) for time_inst in time_minutes]

travel_time = pd.DataFrame({'From_PLACEKEY': from_keys, 'To_PLACEKEY': to_keys, 'Time_mins': time_minutes})
travel_time.head()

Unnamed: 0,From_PLACEKEY,To_PLACEKEY,Time_mins
0,zzw-224@62k-p96-s5z,zzw-223@62k-ns4-pn5,20
1,zzw-224@62k-p96-s5z,zzy-222@62k-pd8-975,20
2,zzw-224@62k-p96-s5z,237-222@62k-p8v-z4v,16
3,zzw-224@62k-p96-s5z,222-222@62k-p8v-2p9,12
4,zzw-224@62k-p96-s5z,229-222@62k-p76-d9z,14


Checking if only the focal brands are present in the FROM_Placekey

In [45]:
from_place_key_time_brands = brands_visits[brands_visits['PLACEKEY'].isin(travel_time['From_PLACEKEY'].to_list())]['brand_visitation'].unique()
from_place_key_time_brands

array(['ULTA Beauty', 'Walmart', 'Target', 'Sephora',
       'The Cheesecake Factory', 'Olive Garden', "Victoria's Secret",
       'Anthropologie'], dtype=object)

Checking the unique brands present in TO_Placekey

In [46]:
to_place_key_time_brands = brands_visits[brands_visits['PLACEKEY'].isin(travel_time['To_PLACEKEY'].to_list())]['brand_visitation'].unique()

### Comparing the brands in distance values and time values

In [47]:
from_place_key_distance_brands.sort() == from_place_key_time_brands.sort()

True

In [48]:
to_place_key_distance_brands.sort() == to_place_key_time_brands.sort()

True

It seems like both distance and time data have same brands in both the placekey and tokey values.

In [49]:
print(len(to_place_key_distance_brands))
print(len(to_place_key_time_brands))

418
418


Since the total brands are only 418 which is close to what the brands we have containing local reviews (427), let's see if these are the same brands which are present in thos brands

In [50]:
local_reviews_brands = brand_visit_local_reviews['brand_visitation'].unique()

In [51]:
print('Total Constant brands between to_placekey and brands having local reviews:', len(set(local_reviews_brands).intersection(set(to_place_key_distance_brands))))

Total Constant brands between to_placekey and brands having local reviews: 223


Since almost half of the brands don't have local reviews, so we are only going to consider the Neighboring brands from distance df for all the focal brands

Let's see if the to_placekey contains any focal brand or not

In [52]:
set(from_place_key_distance_brands).intersection(set(to_place_key_distance_brands))

{'Anthropologie',
 'Olive Garden',
 'Sephora',
 'Target',
 'The Cheesecake Factory',
 'ULTA Beauty',
 "Victoria's Secret",
 'Walmart'}

So, now its clear that to_placekey also contains the focal brands. Now, we can go ahead and simply consider all the to_brands as the neighboring brands and from_key as the focal brands

### Performing First Degree Neighbor Calculations

Let's see all the focal brands we have

In [53]:
focal_brands_list = focal_brands['BRANDS'].tolist()
focal_brands_list

['Sephora',
 'ULTA Beauty',
 'Olive Garden',
 'The Cheesecake Factory',
 'Target',
 'Walmart',
 'Anthropologie',
 "Victoria's Secret"]

Select a focal brand and then extract all the PlaceKeys for this focal brand

In [54]:
foc_brand = focal_brands_list[0]

Getting all the store keys for this specific focal brand

In [55]:
store_keys_foc_brand = brand_visit_local_reviews[brand_visit_local_reviews['brand_visitation'] == foc_brand]['PLACEKEY'].unique().tolist()
store_keys_foc_brand

['22f-222@62j-shx-fcq',
 '229-222@62j-sj3-qfz',
 '228-222@62k-phc-qxq',
 'zzy-223@62k-p98-4qf',
 'zzw-22g@62j-shx-tjv',
 'zzw-227@62j-shr-8sq',
 'zzw-225@62k-3q6-35z',
 'zzw-22d@62j-sj3-p9z',
 'zzw-222@62j-sbz-whq',
 '229-222@62k-qps-tsq',
 'zzw-22k@62j-pth-zs5',
 '222-222@62j-t2f-sbk',
 'zzw-22p@62j-srj-ffz',
 'zzw-22m@629-2rt-cyv',
 'zzw-22t@62k-p73-2p9']

Getting all the neighboring keys for each store of focal brand

In [56]:
all_neib_placekey = distance_results[distance_results['From_PLACEKEY'].isin(store_keys_foc_brand)]['To_PLACEKEY'].unique().tolist()
all_neib_placekey

['zzw-222@62j-sgj-q2k',
 '223-222@62k-phk-vfz',
 '222-223@62k-r7z-m49',
 'zzw-223@62j-pth-zs5',
 '22c-222@62k-pq5-grk',
 '222-222@62k-qww-g8v',
 '223-222@62k-ny3-hnq',
 'zzw-223@62j-stx-cyv',
 '222-222@62k-pgd-psq',
 '224-222@62k-nr8-rzf',
 'zzw-222@62j-ptp-yn5',
 '24c-222@62j-rk6-t9z',
 'zzw-222@62k-qwn-rhq',
 '223-222@62k-r84-qmk',
 '226-222@62j-rjp-z75',
 '222-223@62k-p8n-hyv',
 'zzw-222@62j-smd-z9f',
 '228-222@628-zzx-t9z',
 '22f-222@62k-p67-j35',
 'zzy-223@62j-y9n-f9f',
 'zzw-222@62j-scj-ygk',
 'zzy-225@629-4n4-gkz',
 '27d-222@62j-shy-whq',
 '223-222@62k-qbv-f9f',
 '22b-222@62j-t2f-snq',
 'zzy-222@62k-p9m-xbk',
 'zzy-222@62j-sj2-75z',
 'zzw-222@62k-qv7-bhq',
 '223-224@62k-nsg-6p9',
 '22k-222@62j-pss-4jv',
 '223-223@62j-sjx-rhq',
 '224-22s@62j-sbz-vxq',
 '224-223@62j-sym-gkz',
 'zzy-222@62j-ptn-4sq',
 '224-222@62k-3p3-z2k',
 'zzw-227@62k-pzn-m6k',
 '22d-222@62j-shy-whq',
 'zzw-222@62k-3q5-s89',
 'zzw-222@62j-sth-xbk',
 '223-222@62k-3p8-zmk',
 '222-222@629-2g2-tn5',
 '224-223@62j-sh

Getting Unique Neighbors for all stores of the focal brand

In [57]:
unique_neib_brands_foc = brands_visits[brands_visits['PLACEKEY'].isin(all_neib_placekey)]['brand_visitation'].unique().tolist()

In [58]:
len(unique_neib_brands_foc)

418

1. For each Unique Neighboring Brand, get all the placekeys
2. Filter them as per Neib PlaceKeys for all the focal stores
3. For each focal store, calculate first neib metrics (with local reviews if it is present in local_reviews, else only calculate visits from brand_visits data)

In [59]:
unique_neib = unique_neib_brands_foc[0]
unique_neib

'Orangetheory Fitness'

In [60]:
unique_neib_placekeys = brands_visits[brands_visits['brand_visitation'] == unique_neib]['PLACEKEY'].unique().tolist()
unique_neib_placekeys

['zzw-222@62j-sgj-q2k',
 'zzw-227@62k-phd-99f',
 'zzw-223@62j-srz-2tv',
 '228-222@62j-sx7-vj9',
 'zzy-222@62k-pg8-8vz',
 'zzw-223@62j-sj4-nwk',
 'zzw-222@62j-sj3-2kz',
 '225-222@62j-sj9-7bk',
 'zzw-226@62j-sqs-jsq',
 '22g-223@62j-sy7-fs5',
 'zzw-224@62k-rdq-33q',
 'zzw-222@62k-p9p-2p9',
 'zzw-222@62j-t4w-2kz',
 '222-222@62j-ptq-54v',
 '222-222@62k-2ww-j35',
 'zzw-223@62j-sr7-k2k',
 'zzw-223@62j-qnn-btv',
 '225-222@62k-r5r-qxq',
 'zzw-222@62j-sbz-x5z',
 '22b-222@62k-jd3-xt9',
 'zzw-223@62j-sf9-g8v',
 'zzw-224@62k-rfs-sh5',
 'zzw-223@62k-rc5-jn5',
 '226-222@62j-t3n-5mk',
 '23r-222@62j-ssc-28v',
 'zzw-223@62j-qsz-qvf']

In [61]:
focal_stores_first_degree_neib = distance_results[(distance_results['From_PLACEKEY'].isin(store_keys_foc_brand)) &
                                                (distance_results['To_PLACEKEY'].isin(unique_neib_placekeys)) &
                                                (distance_results['Distance_km']<=16.0934)]
focal_stores_first_degree_neib

Unnamed: 0,From_PLACEKEY,To_PLACEKEY,Distance_km
112268,22f-222@62j-shx-fcq,zzw-222@62j-sgj-q2k,8.029469
112492,22f-222@62j-shx-fcq,zzw-223@62j-sj4-nwk,1.613781
112517,22f-222@62j-shx-fcq,zzw-222@62j-sj3-2kz,2.208878
112762,22f-222@62j-shx-fcq,225-222@62j-sj9-7bk,3.942750
113547,22f-222@62j-shx-fcq,22g-223@62j-sy7-fs5,13.278776
...,...,...,...
1278262,zzw-22t@62k-p73-2p9,zzw-227@62k-phd-99f,12.619351
1278352,zzw-22t@62k-p73-2p9,zzw-223@62j-sj4-nwk,14.892433
1279638,zzw-22t@62k-p73-2p9,zzw-222@62k-p9p-2p9,3.560544
1284345,zzw-22t@62k-p73-2p9,zzw-223@62j-qnn-btv,8.875992


In [62]:
store_keys_foc_brand = focal_stores_first_degree_neib['From_PLACEKEY'].unique().tolist()
foc_store = store_keys_foc_brand[0]
foc_store

'22f-222@62j-shx-fcq'

In [63]:
foc_store_first_degree_neibs = focal_stores_first_degree_neib[focal_stores_first_degree_neib['From_PLACEKEY'] == foc_store]['To_PLACEKEY'].to_list()
foc_store_first_degree_neibs

['zzw-222@62j-sgj-q2k',
 'zzw-223@62j-sj4-nwk',
 'zzw-222@62j-sj3-2kz',
 '225-222@62j-sj9-7bk',
 '22g-223@62j-sy7-fs5',
 'zzw-222@62k-p9p-2p9',
 'zzw-223@62j-sf9-g8v']

In [64]:
foc_store_distance = distance_results[(distance_results['From_PLACEKEY'] == foc_store) & (distance_results['To_PLACEKEY'].isin(foc_store_first_degree_neibs))]
foc_store_distance

Unnamed: 0,From_PLACEKEY,To_PLACEKEY,Distance_km
112268,22f-222@62j-shx-fcq,zzw-222@62j-sgj-q2k,8.029469
112492,22f-222@62j-shx-fcq,zzw-223@62j-sj4-nwk,1.613781
112517,22f-222@62j-shx-fcq,zzw-222@62j-sj3-2kz,2.208878
112762,22f-222@62j-shx-fcq,225-222@62j-sj9-7bk,3.94275
113547,22f-222@62j-shx-fcq,22g-223@62j-sy7-fs5,13.278776
113778,22f-222@62j-shx-fcq,zzw-222@62k-p9p-2p9,13.136242
119987,22f-222@62j-shx-fcq,zzw-223@62j-sf9-g8v,14.313863


In [65]:
spatial_distance_avg = np.average(foc_store_distance['Distance_km'].to_list())
spatial_distance_avg

np.float64(8.074822689108732)

In [66]:
foc_store_time = travel_time[(travel_time['From_PLACEKEY'] == foc_store) & (travel_time['To_PLACEKEY'].isin(foc_store_first_degree_neibs))]
foc_store_time

Unnamed: 0,From_PLACEKEY,To_PLACEKEY,Time_mins
57545,22f-222@62j-shx-fcq,zzw-222@62j-sgj-q2k,18
57591,22f-222@62j-shx-fcq,zzw-223@62j-sj4-nwk,11
57597,22f-222@62j-shx-fcq,zzw-222@62j-sj3-2kz,12
57646,22f-222@62j-shx-fcq,225-222@62j-sj9-7bk,10
57812,22f-222@62j-shx-fcq,22g-223@62j-sy7-fs5,22
57852,22f-222@62j-shx-fcq,zzw-222@62k-p9p-2p9,22
59241,22f-222@62j-shx-fcq,zzw-223@62j-sf9-g8v,22


In [67]:
travel_distance_avg = np.average(foc_store_time['Time_mins'].to_list())
travel_distance_avg

np.float64(16.714285714285715)

If the unique neighbour in question is has the local reviews, then we need to calculate local neib metrics else only simple visits will be needed

In [68]:
def calculate_first_neib_mean_reviews_visits(group_df):
    
    inv_visits = 0
    inv_visits_exp = 0
    
    num_reviews_fb_neibmean = 0
    num_reviews_ig_neibmean = 0
    num_reviews_tw_neibmean = 0

    num_reviews_fb_neibmean_exp = 0
    num_reviews_ig_neibmean_exp = 0
    num_reviews_tw_neibmean_exp = 0
    
    
    for neib_store in foc_store_first_degree_neibs:
        neib_store_time = foc_store_time[(foc_store_time['From_PLACEKEY'] == foc_store) & (foc_store_time['To_PLACEKEY'] == neib_store)]['Time_mins'].values[0]
        
        fb_reviews = group_df[group_df['PLACEKEY'] == neib_store]['localized_fb_reviews_60_days']
        if (len(fb_reviews) != 0) and (np.isnan(fb_reviews.values[0]) != True):
            num_reviews_fb_neibmean += (1/neib_store_time) * fb_reviews.values[0]
            num_reviews_fb_neibmean_exp += (1/np.exp(neib_store_time)) * fb_reviews.values[0]
        
        ig_reviews = group_df[group_df['PLACEKEY'] == neib_store]['localized_ig_reviews_60_days']
        if (len(ig_reviews) != 0) and (np.isnan(ig_reviews.values[0]) != True):
            num_reviews_ig_neibmean += (1/neib_store_time) * ig_reviews.values[0]
            num_reviews_ig_neibmean_exp += (1/np.exp(neib_store_time)) * ig_reviews.values[0]
        
        tw_reviews = group_df[group_df['PLACEKEY'] == neib_store]['localized_tw_reviews_60_days']
        if (len(tw_reviews) != 0) and (np.isnan(tw_reviews.values[0]) != True):
            num_reviews_tw_neibmean += (1/neib_store_time) * tw_reviews.values[0]
            num_reviews_tw_neibmean_exp += (1/np.exp(neib_store_time)) * tw_reviews.values[0]
        
        visits = group_df[group_df['PLACEKEY'] == neib_store]['visits_by_day']
        if (len(visits) != 0) and (np.isnan(visits.values[0]) != True):
            inv_visits += (1/neib_store_time) * visits.values[0]
            inv_visits_exp += (1/np.exp(neib_store_time)) * visits.values[0]
    
    return pd.Series([foc_store, inv_visits, num_reviews_fb_neibmean, num_reviews_ig_neibmean, num_reviews_tw_neibmean,
                      inv_visits_exp, num_reviews_fb_neibmean_exp, num_reviews_ig_neibmean_exp, num_reviews_tw_neibmean_exp])

In [69]:
def calculate_first_neib_visits(group_df):
    
    inv_visits = 0
    inv_visits_exp = 0
    
    num_reviews_fb_neibmean = 0
    num_reviews_ig_neibmean = 0
    num_reviews_tw_neibmean = 0

    num_reviews_fb_neibmean_exp = 0
    num_reviews_ig_neibmean_exp = 0
    num_reviews_tw_neibmean_exp = 0
    
    
    for neib_store in foc_store_first_degree_neibs:
        neib_store_time = foc_store_time[(foc_store_time['From_PLACEKEY'] == foc_store) & (foc_store_time['To_PLACEKEY'] == neib_store)]['Time_mins'].values[0]
                
        visits = group_df[group_df['PLACEKEY'] == neib_store]['visits_by_day']
        if (len(visits) != 0) and (np.isnan(visits.values[0]) != True):
            inv_visits += (1/neib_store_time) * visits.values[0]
            inv_visits_exp += (1/np.exp(neib_store_time)) * visits.values[0]
    
    return pd.Series([foc_store, inv_visits, num_reviews_fb_neibmean, num_reviews_ig_neibmean, num_reviews_tw_neibmean,
                      inv_visits_exp, num_reviews_fb_neibmean_exp, num_reviews_ig_neibmean_exp, num_reviews_tw_neibmean_exp])

In [70]:
unique_neib

'Orangetheory Fitness'

In [71]:
if unique_neib in brand_visit_local_reviews_list:
    first_neib_metrics = brand_visit_local_reviews[brand_visit_local_reviews['PLACEKEY'].isin(foc_store_first_degree_neibs)][['date', 'PLACEKEY', 'visits_by_day','localized_fb_reviews_60_days','localized_ig_reviews_60_days', 
                                                                                                                                'localized_tw_reviews_60_days']].groupby('date').apply(calculate_first_neib_mean_reviews_visits)
else:
    first_neib_metrics = brands_visits[brands_visits['PLACEKEY'].isin(foc_store_first_degree_neibs)][['date', 'PLACEKEY', 'visits_by_day']].groupby('date').apply(calculate_first_neib_visits)

  'localized_tw_reviews_60_days']].groupby('date').apply(calculate_first_neib_mean_reviews_visits)


In [72]:
first_neib_metrics = first_neib_metrics.rename(columns={0:'focal_store', 1:'inv_visits', 2: 'num_reviews_fb_neibmean', 3:'num_reviews_ig_neibmean', 4:'num_reviews_tw_neibmean',
                                                        5:'inv_visits_exp', 6:'num_reviews_fb_neibmean_exp', 7: 'num_reviews_ig_neibmean_exp', 8: 'num_reviews_tw_neibmean_exp'})
first_neib_metrics.head()

Unnamed: 0_level_0,focal_store,inv_visits,num_reviews_fb_neibmean,num_reviews_ig_neibmean,num_reviews_tw_neibmean,inv_visits_exp,num_reviews_fb_neibmean_exp,num_reviews_ig_neibmean_exp,num_reviews_tw_neibmean_exp
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2019-01-01,22f-222@62j-shx-fcq,4.092424,0.0,0.0,0.0,0.001298,0.0,0.0,0.0
2019-01-02,22f-222@62j-shx-fcq,18.14596,0.0,0.0,0.0,0.007412,0.0,0.0,0.0
2019-01-03,22f-222@62j-shx-fcq,21.456061,0.0,0.0,0.0,0.008796,0.0,0.0,0.0
2019-01-04,22f-222@62j-shx-fcq,16.95404,0.0,0.0,0.0,0.006558,0.0,0.0,0.0
2019-01-05,22f-222@62j-shx-fcq,6.266162,0.0,0.0,0.0,0.001989,0.0,0.0,0.0


In [73]:
brands_visits.head()

Unnamed: 0,date,PLACEKEY,visits_by_day,spend_by_day,brand_visitation,lat,lon,brand_standard
0,2019-06-01,zzw-222@62j-sgj-q2k,5,0.0,Orangetheory Fitness,42.350592,-71.153024,orangetheory fitness
1,2019-06-02,zzw-222@62j-sgj-q2k,1,0.0,Orangetheory Fitness,42.350592,-71.153024,orangetheory fitness
2,2019-06-03,zzw-222@62j-sgj-q2k,6,859.0,Orangetheory Fitness,42.350592,-71.153024,orangetheory fitness
3,2019-06-04,zzw-222@62j-sgj-q2k,6,30.0,Orangetheory Fitness,42.350592,-71.153024,orangetheory fitness
4,2019-06-05,zzw-222@62j-sgj-q2k,8,193.0,Orangetheory Fitness,42.350592,-71.153024,orangetheory fitness


### Calculating metrics for second degree brands

Extracting all the unique store placekeys to calculate the distance on run time for second neighbor

In [74]:
unique_brand_placekeys = brands_visits[['PLACEKEY', 'lat', 'lon', 'brand_visitation']].groupby(['PLACEKEY'])[['lat', 'lon','brand_visitation']].apply(lambda x: x.iloc[0]).reset_index()
unique_brand_placekeys

Unnamed: 0,PLACEKEY,lat,lon,brand_visitation
0,222-222@628-zxy-rc5,42.061720,-72.631550,CVS
1,222-222@628-zxy-tn5,42.059753,-72.633141,McDonald's
2,222-222@628-zy4-89z,42.066704,-72.676556,United States Postal Service (USPS)
3,222-222@628-zz4-c5z,42.067520,-72.676677,Wendy's
4,222-222@628-zzp-f2k,42.091974,-72.584124,Hilton Garden Inn
...,...,...,...,...
10709,zzy-22n@62k-3k7-qs5,41.454268,-70.601068,The Black Dog
10710,zzy-22q@62k-3k5-fvf,41.457741,-70.557234,Budget Rent A Car
10711,zzy-22r@62k-3k7-qs5,41.454266,-70.601066,The Black Dog
10712,zzy-22v@62k-3k5-j35,41.457723,-70.558321,Phillips 66


In [75]:
second_neib_df = None

# Function to calculate the distance between two coordinates
def calculate_distance_km(df_row):
    lat_dst = df_row['lat']
    lon_dst = df_row['lon']
    placekey_dst = df_row['PLACEKEY']
    dst_brand = df_row['brand_visitation']
    
    dist_km = geodesic((lat_src, lon_src), (lat_dst, lon_dst)).km
    
    return pd.Series([first_deg_neib, placekey_dst, dst_brand, dist_km])

for i in range(len(foc_store_first_degree_neibs)):
    first_deg_neib = foc_store_first_degree_neibs[i]
    lat_src, lon_src = unique_brand_placekeys[unique_brand_placekeys['PLACEKEY'] == first_deg_neib][['lat', 'lon']].values[0]
    temp_df = unique_brand_placekeys.apply(calculate_distance_km, axis=1).rename(columns={0:'SRC_PLACEKEY', 1: 'DST_PLACEKEY', 2:'DST_BRAND', 3: 'Distance_Km'})
    
    if i == 0:
        second_neib_df = temp_df
    else:
        second_neib_df = pd.concat([second_neib_df, temp_df], axis=0)

In [76]:
# Excluding first degree neighboring brand, all the first degree neighbors brand stores, and calculating the second degree neighbors (less than 1 mile)
second_neib_df = second_neib_df[(second_neib_df['DST_BRAND'] != unique_neib) &
                                (second_neib_df['Distance_Km']<=16.0934)]
second_neib_df

Unnamed: 0,SRC_PLACEKEY,DST_PLACEKEY,DST_BRAND,Distance_Km
219,zzw-222@62j-sgj-q2k,222-222@62j-qp2-brk,FedEx,10.823571
275,zzw-222@62j-sgj-q2k,222-222@62j-sbt-389,Supercuts,13.092979
276,zzw-222@62j-sgj-q2k,222-222@62j-sbv-pn5,Bertucci's,12.424380
277,zzw-222@62j-sgj-q2k,222-222@62j-sbz-tvz,Extended Stay America,15.638401
278,zzw-222@62j-sgj-q2k,222-222@62j-sbz-ty9,AMC Entertainment,15.535075
...,...,...,...,...
10701,zzw-223@62j-sf9-g8v,zzy-22g@62j-shz-vs5,Pepper Palace,14.392946
10702,zzw-223@62j-sf9-g8v,zzy-22g@62j-sj3-mhq,Saint Laurent,14.476479
10703,zzw-223@62j-sf9-g8v,zzy-22g@62j-sxw-6c5,Hot Topic,11.325673
10704,zzw-223@62j-sf9-g8v,zzy-22h@62j-sxw-6hq,Hannoush Jewelers,11.269029


In [77]:
second_neib_df_local_reviews = second_neib_df[second_neib_df['DST_BRAND'].isin(brand_visit_local_reviews_list)]
second_neib_df_local_reviews

Unnamed: 0,SRC_PLACEKEY,DST_PLACEKEY,DST_BRAND,Distance_Km
219,zzw-222@62j-sgj-q2k,222-222@62j-qp2-brk,FedEx,10.823571
275,zzw-222@62j-sgj-q2k,222-222@62j-sbt-389,Supercuts,13.092979
276,zzw-222@62j-sgj-q2k,222-222@62j-sbv-pn5,Bertucci's,12.424380
277,zzw-222@62j-sgj-q2k,222-222@62j-sbz-tvz,Extended Stay America,15.638401
279,zzw-222@62j-sgj-q2k,222-222@62j-sbz-vxq,Swarovski,15.551711
...,...,...,...,...
10697,zzw-223@62j-sf9-g8v,zzy-22d@62j-sxw-6hq,Spencer's,11.319749
10698,zzw-223@62j-sf9-g8v,zzy-22f@62j-shz-vs5,Victoria's Secret,14.468369
10699,zzw-223@62j-sf9-g8v,zzy-22f@62j-sj3-mhq,Lucky Brand,14.558010
10703,zzw-223@62j-sf9-g8v,zzy-22g@62j-sxw-6c5,Hot Topic,11.325673


In [78]:
first_deg_stores = second_neib_df['SRC_PLACEKEY'].unique().tolist()
second_deg_stores = second_neib_df['DST_PLACEKEY'].unique().tolist()

In [79]:
first_deg_stores_local_reviews = second_neib_df_local_reviews['SRC_PLACEKEY'].unique().tolist()
second_deg_stores_local_reviews = second_neib_df_local_reviews['DST_PLACEKEY'].unique().tolist()

In [80]:
def calculate_second_neib_visits(group_df):
    
    inv_visits_secondneibmean = 0
    inv_visits_secondneibmean_exp = 0
    
    for first_deg_store in first_deg_stores:
        inv_visits = 0
        inv_visits_exp = 0
        first_deg_store_dist = foc_store_distance[(foc_store_distance['From_PLACEKEY'] == foc_store) & (foc_store_distance['To_PLACEKEY'] == first_deg_store)]['Distance_km'].values[0]
        second_deg_neighbors = second_neib_df[second_neib_df['SRC_PLACEKEY'] == first_deg_store][['DST_PLACEKEY', 'Distance_Km']].iloc[0:3]
        
        for _, row in second_deg_neighbors.iterrows():
            
            second_neib_store = row['DST_PLACEKEY']
            second_neib_store_dist = row['Distance_Km']
                
            visits = group_df[group_df['PLACEKEY'] == second_neib_store]['visits_by_day']
            if (len(visits) != 0) and (np.isnan(visits.values[0]) != True):
                inv_visits += (1/second_neib_store_dist) * visits.values[0]
                inv_visits_exp += (1/np.exp(second_neib_store_dist)) * visits.values[0]
        
        inv_visits_secondneibmean += (1/first_deg_store_dist) * inv_visits
        inv_visits_secondneibmean_exp += (1/np.exp(first_deg_store_dist)) * inv_visits_exp
    
    return pd.Series([foc_store, inv_visits_secondneibmean, inv_visits_secondneibmean_exp])

In [81]:
def calculate_second_neib_mean_reviews(group_df):
    
    num_reviews_fb_secondneibmean = 0
    num_reviews_ig_secondneibmean = 0
    num_reviews_tw_secondneibmean = 0
    num_reviews_fb_secondneibmean_exp = 0
    num_reviews_ig_secondneibmean_exp = 0
    num_reviews_tw_secondneibmean_exp = 0
    
    for first_deg_store in first_deg_stores_local_reviews:
        
        num_reviews_fb_neibmean = 0
        num_reviews_ig_neibmean = 0
        num_reviews_tw_neibmean = 0
        num_reviews_fb_neibmean_exp = 0
        num_reviews_ig_neibmean_exp = 0
        num_reviews_tw_neibmean_exp = 0
        
        first_deg_store_dist = foc_store_distance[(foc_store_distance['From_PLACEKEY'] == foc_store) & (foc_store_distance['To_PLACEKEY'] == first_deg_store)]['Distance_km'].values[0]
        second_deg_neighbors = second_neib_df_local_reviews[second_neib_df_local_reviews['SRC_PLACEKEY'] == first_deg_store][['DST_PLACEKEY', 'Distance_Km']].iloc[0:3]
        
        for _, row in second_deg_neighbors.iterrows():
            
            second_neib_store = row['DST_PLACEKEY']
            second_neib_store_dist = row['Distance_Km']
            
            fb_reviews = group_df[group_df['PLACEKEY'] == second_neib_store]['localized_fb_reviews_60_days']
            if (len(fb_reviews) != 0) and (np.isnan(fb_reviews.values[0]) != True):
                num_reviews_fb_neibmean += (1/second_neib_store_dist) * fb_reviews.values[0]
                num_reviews_fb_neibmean_exp += (1/np.exp(second_neib_store_dist)) * fb_reviews.values[0]
            
            ig_reviews = group_df[group_df['PLACEKEY'] == second_neib_store]['localized_ig_reviews_60_days']
            if (len(ig_reviews) != 0) and (np.isnan(ig_reviews.values[0]) != True):
                num_reviews_ig_neibmean += (1/second_neib_store_dist) * ig_reviews.values[0]
                num_reviews_ig_neibmean_exp += (1/np.exp(second_neib_store_dist)) * ig_reviews.values[0]
            
            tw_reviews = group_df[group_df['PLACEKEY'] == second_neib_store]['localized_tw_reviews_60_days']
            if (len(tw_reviews) != 0) and (np.isnan(tw_reviews.values[0]) != True):
                num_reviews_tw_neibmean += (1/second_neib_store_dist) * tw_reviews.values[0]
                num_reviews_tw_neibmean_exp += (1/np.exp(second_neib_store_dist)) * tw_reviews.values[0]
        
        num_reviews_fb_secondneibmean += (1/first_deg_store_dist) * num_reviews_fb_neibmean
        num_reviews_ig_secondneibmean += (1/first_deg_store_dist) * num_reviews_ig_neibmean
        num_reviews_tw_secondneibmean += (1/first_deg_store_dist) * num_reviews_tw_neibmean
        
        num_reviews_fb_secondneibmean_exp += (1/np.exp(first_deg_store_dist)) * num_reviews_fb_neibmean_exp
        num_reviews_ig_secondneibmean_exp += (1/np.exp(first_deg_store_dist)) * num_reviews_ig_neibmean_exp
        num_reviews_tw_secondneibmean_exp += (1/np.exp(first_deg_store_dist)) * num_reviews_tw_neibmean_exp
    
    return pd.Series([foc_store, num_reviews_fb_secondneibmean, num_reviews_ig_secondneibmean, num_reviews_tw_secondneibmean, 
                      num_reviews_fb_secondneibmean_exp, num_reviews_ig_secondneibmean_exp, num_reviews_tw_secondneibmean_exp])

In [82]:
second_neib_metrics_visits = brands_visits[brands_visits['PLACEKEY'].isin(second_deg_stores)][['date', 'PLACEKEY', 'visits_by_day']].groupby('date').apply(calculate_second_neib_visits)
second_neib_metrics_visits = second_neib_metrics_visits.rename(columns={0: 'focal_store', 1:'inv_visits_secondneibmean', 2:'inv_visits_secondneibmean_exp'}).reset_index()
second_neib_metrics_visits.head()

  second_neib_metrics_visits = brands_visits[brands_visits['PLACEKEY'].isin(second_deg_stores)][['date', 'PLACEKEY', 'visits_by_day']].groupby('date').apply(calculate_second_neib_visits)


Unnamed: 0,date,focal_store,inv_visits_secondneibmean,inv_visits_secondneibmean_exp
0,2019-01-01,22f-222@62j-shx-fcq,2.323285,2e-06
1,2019-01-02,22f-222@62j-shx-fcq,2.967207,3e-06
2,2019-01-03,22f-222@62j-shx-fcq,2.530818,2e-06
3,2019-01-04,22f-222@62j-shx-fcq,3.136467,3e-06
4,2019-01-05,22f-222@62j-shx-fcq,3.117654,2e-06


In [83]:
second_neib_metrics_local_reviews = brand_visit_local_reviews[brand_visit_local_reviews['PLACEKEY'].isin(second_deg_stores_local_reviews)][['date', 'PLACEKEY', 'localized_fb_reviews_60_days','localized_ig_reviews_60_days', 
                                                                                            'localized_tw_reviews_60_days']].groupby('date').apply(calculate_second_neib_mean_reviews)

second_neib_metrics_local_reviews = second_neib_metrics_local_reviews.rename(columns={0: 'focal_store', 1:'num_reviews_fb_secondneibmean', 2:'num_reviews_ig_secondneibmean',
                                                                                      3: 'num_reviews_tw_secondneibmean', 4: 'num_reviews_fb_secondneibmean_exp',
                                                                                      5: 'num_reviews_ig_secondneibmean_exp', 6: 'num_reviews_tw_secondneibmean_exp'}).reset_index()
second_neib_metrics_local_reviews.head()

  'localized_tw_reviews_60_days']].groupby('date').apply(calculate_second_neib_mean_reviews)


Unnamed: 0,date,focal_store,num_reviews_fb_secondneibmean,num_reviews_ig_secondneibmean,num_reviews_tw_secondneibmean,num_reviews_fb_secondneibmean_exp,num_reviews_ig_secondneibmean_exp,num_reviews_tw_secondneibmean_exp
0,2019-01-01,22f-222@62j-shx-fcq,0.0,0.0,0.0,0.0,0.0,0.0
1,2019-01-02,22f-222@62j-shx-fcq,0.0,0.0,0.0,0.0,0.0,0.0
2,2019-01-03,22f-222@62j-shx-fcq,0.0,0.0,0.0,0.0,0.0,0.0
3,2019-01-04,22f-222@62j-shx-fcq,0.0,0.0,0.0,0.0,0.0,0.0
4,2019-01-05,22f-222@62j-shx-fcq,0.0,0.0,0.0,0.0,0.0,0.0


In [84]:
second_neib_metrics = pd.merge(left=second_neib_metrics_visits, right=second_neib_metrics_local_reviews, how='inner', on=['date', 'focal_store'])
second_neib_metrics.head()

Unnamed: 0,date,focal_store,inv_visits_secondneibmean,inv_visits_secondneibmean_exp,num_reviews_fb_secondneibmean,num_reviews_ig_secondneibmean,num_reviews_tw_secondneibmean,num_reviews_fb_secondneibmean_exp,num_reviews_ig_secondneibmean_exp,num_reviews_tw_secondneibmean_exp
0,2019-01-01,22f-222@62j-shx-fcq,2.323285,2e-06,0.0,0.0,0.0,0.0,0.0,0.0
1,2019-01-02,22f-222@62j-shx-fcq,2.967207,3e-06,0.0,0.0,0.0,0.0,0.0,0.0
2,2019-01-03,22f-222@62j-shx-fcq,2.530818,2e-06,0.0,0.0,0.0,0.0,0.0,0.0
3,2019-01-04,22f-222@62j-shx-fcq,3.136467,3e-06,0.0,0.0,0.0,0.0,0.0,0.0
4,2019-01-05,22f-222@62j-shx-fcq,3.117654,2e-06,0.0,0.0,0.0,0.0,0.0,0.0


### Extracting metrics for the specific focal store in terms of local reviews

In [104]:
# Extracting Local Review Information for the focal store
focal_store_information = brand_visit_local_reviews[brand_visit_local_reviews['PLACEKEY'] == foc_store][['date', 'PLACEKEY', 'brand_visitation', 'visits_past_60_days', 'localized_fb_reviews_60_days',
                                                                                                         'localized_ig_reviews_60_days', 'localized_tw_reviews_60_days']]

In [105]:
# Combining distance metrics with the focal store info
focal_store_information['spatial_distance_km'] = spatial_distance_avg
focal_store_information['travel_distance_min'] = travel_distance_avg

In [106]:
# Combining first and second degree neighbor metrics with the focal store info
focal_store_information = pd.merge(left=focal_store_information, right=first_neib_metrics, how='inner', on=['date']).drop('focal_store', axis=1)
focal_store_information = pd.merge(left=focal_store_information, right=second_neib_metrics, how='inner', on=['date']).drop('focal_store', axis=1)
focal_store_information = focal_store_information.sort_values('date').fillna(0)

In [110]:
focal_store_information.head()

Unnamed: 0,date,PLACEKEY,brand_visitation,visits_past_60_days,localized_fb_reviews_60_days,localized_ig_reviews_60_days,localized_tw_reviews_60_days,spatial_distance_km,travel_distance_min,inv_visits,...,num_reviews_ig_neibmean_exp,num_reviews_tw_neibmean_exp,inv_visits_secondneibmean,inv_visits_secondneibmean_exp,num_reviews_fb_secondneibmean,num_reviews_ig_secondneibmean,num_reviews_tw_secondneibmean,num_reviews_fb_secondneibmean_exp,num_reviews_ig_secondneibmean_exp,num_reviews_tw_secondneibmean_exp
0,2019-06-01,22f-222@62j-shx-fcq,Sephora,2920.0,0.260525,471.749962,0.140282,8.074823,16.714286,8.491414,...,0.034701,0.000157,4.740543,4e-06,0.656824,1.763014,0.016851,1.623136e-07,4.14953e-07,5.594145e-09
1,2019-06-02,22f-222@62j-shx-fcq,Sephora,2960.0,0.040382,603.831924,0.161528,8.074823,16.714286,6.507071,...,0.00091,6.3e-05,3.780383,3e-06,0.393202,0.989201,0.015314,9.268239e-08,2.382469e-07,3.00787e-09
2,2019-06-03,22f-222@62j-shx-fcq,Sephora,2976.0,0.0,240.186623,1.49952,8.074823,16.714286,24.071212,...,0.010059,0.000219,4.15469,4e-06,0.863331,2.250215,0.020142,2.508454e-07,5.716898e-07,5.745106e-09
3,2019-06-04,22f-222@62j-shx-fcq,Sephora,2971.0,0.888904,314.44979,1.515177,8.074823,16.714286,24.357576,...,0.007584,0.000125,3.841029,3e-06,1.584309,1.413015,0.021184,4.945317e-07,4.210581e-07,1.007626e-08
4,2019-06-05,22f-222@62j-shx-fcq,Sephora,2975.0,13.025057,356.517891,2.937221,8.074823,16.714286,30.594444,...,0.014708,0.00025,3.694549,3e-06,1.137817,1.565798,0.038435,4.066385e-07,4.930174e-07,2.685654e-08


In [109]:
pd.concat([focal_store_information, focal_store_information], axis=0)

Unnamed: 0,date,PLACEKEY,brand_visitation,visits_past_60_days,localized_fb_reviews_60_days,localized_ig_reviews_60_days,localized_tw_reviews_60_days,spatial_distance_km,travel_distance_min,inv_visits,...,num_reviews_ig_neibmean_exp,num_reviews_tw_neibmean_exp,inv_visits_secondneibmean,inv_visits_secondneibmean_exp,num_reviews_fb_secondneibmean,num_reviews_ig_secondneibmean,num_reviews_tw_secondneibmean,num_reviews_fb_secondneibmean_exp,num_reviews_ig_secondneibmean_exp,num_reviews_tw_secondneibmean_exp
0,2019-06-01,22f-222@62j-shx-fcq,Sephora,2920.0,0.260525,471.749962,0.140282,8.074823,16.714286,8.491414,...,0.034701,0.000157,4.740543,0.000004,0.656824,1.763014,0.016851,1.623136e-07,4.149530e-07,5.594145e-09
1,2019-06-02,22f-222@62j-shx-fcq,Sephora,2960.0,0.040382,603.831924,0.161528,8.074823,16.714286,6.507071,...,0.000910,0.000063,3.780383,0.000003,0.393202,0.989201,0.015314,9.268239e-08,2.382469e-07,3.007870e-09
2,2019-06-03,22f-222@62j-shx-fcq,Sephora,2976.0,0.000000,240.186623,1.499520,8.074823,16.714286,24.071212,...,0.010059,0.000219,4.154690,0.000004,0.863331,2.250215,0.020142,2.508454e-07,5.716898e-07,5.745106e-09
3,2019-06-04,22f-222@62j-shx-fcq,Sephora,2971.0,0.888904,314.449790,1.515177,8.074823,16.714286,24.357576,...,0.007584,0.000125,3.841029,0.000003,1.584309,1.413015,0.021184,4.945317e-07,4.210581e-07,1.007626e-08
4,2019-06-05,22f-222@62j-shx-fcq,Sephora,2975.0,13.025057,356.517891,2.937221,8.074823,16.714286,30.594444,...,0.014708,0.000250,3.694549,0.000003,1.137817,1.565798,0.038435,4.066385e-07,4.930174e-07,2.685654e-08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
359,2019-09-26,22f-222@62j-shx-fcq,Sephora,4103.0,2.213264,453.936009,1.432112,8.074823,16.714286,27.712626,...,0.008245,0.000190,3.235006,0.000003,0.353875,1.727271,0.164260,8.795016e-08,4.335150e-07,1.446142e-08
360,2019-09-27,22f-222@62j-shx-fcq,Sephora,4058.0,5.382134,2088.397233,0.947256,8.074823,16.714286,23.551515,...,0.000064,0.000763,4.314808,0.000004,1.178193,1.330206,0.022079,3.754920e-07,2.810594e-07,4.218458e-09
361,2019-09-28,22f-222@62j-shx-fcq,Sephora,4066.0,0.387743,829.424881,0.646238,8.074823,16.714286,11.797475,...,0.009377,0.002003,3.972602,0.000003,0.576251,1.332856,0.035709,1.356046e-07,2.706052e-07,5.330718e-09
362,2019-09-29,22f-222@62j-shx-fcq,Sephora,4099.0,0.129457,499.488625,0.906200,8.074823,16.714286,8.132828,...,0.013149,0.000190,3.969941,0.000003,0.890105,1.189965,0.013742,3.318988e-07,2.520394e-07,3.194848e-09
