In [1]:
import pickle
import pandas as pd
import numpy as np
from tqdm import tqdm

### Reading all the focal brands

In [2]:
with open('top_brands.pickle', 'rb') as file:
    focal_brands = pickle.load(file)

focal_brands

Unnamed: 0,SUB_CATEGORY,BRANDS,Total_Spend,Total_Visits,Unique_PLACEKEY_Count
0,"Cosmetics, Beauty Supplies, and Perfume Stores",Sephora,1913415.32,702927,15
1,"Cosmetics, Beauty Supplies, and Perfume Stores",ULTA Beauty,1339844.15,404546,20
2,Full-Service Restaurants,Olive Garden,1058844.15,125651,15
3,Full-Service Restaurants,The Cheesecake Factory,974090.35,473745,5
4,Warehouse Clubs and Supercenters,Target,41977753.97,2053582,50
5,Warehouse Clubs and Supercenters,Walmart,33473235.0,2815949,47
6,Women's Clothing Stores,Anthropologie,840473.95,327685,7
7,Women's Clothing Stores,Victoria's Secret,662200.98,295606,14


### Reading the social data

In [3]:
with open('social_data.pickle', 'rb') as file:
    social_data = pickle.load(file)

social_data.head()

Unnamed: 0,id,num_review_fb,num_review_ig,num_review_tw,date,brand
1406,5308,866.0,1005.0,6.0,2019-01-01,Acura
1416,5343,2.0,1087.0,0.0,2019-01-01,Aldo
1422,5352,2.0,1.0,0.0,2019-01-01,Allen Edmonds
1434,5381,1136.0,6.0,16.0,2019-01-01,Amtrak
1436,5387,551.0,2926.0,2.0,2019-01-01,Anthropologie


### Reading the Data for Local Reviews of all brands having social data

In [4]:
with open('brand_visit_local_reviews.pickle', 'rb') as file:
    brand_visit_local_reviews = pickle.load(file)

brand_visit_local_reviews.head()

Unnamed: 0,date,PLACEKEY,visits_by_day,spend_by_day,brand_visitation,lat,lon,brand_standard,ID,Name,...,Classification,Status,Name_Standard,visits_past_60_days,visits_past_3_days,total_visits_across_stores_60_days,proportion_of_visits_60_days,localized_fb_reviews_60_days,localized_ig_reviews_60_days,localized_tw_reviews_60_days
47220,2019-06-01,223-222@62j-ptj-6c5,44,1878.06,Acura,42.547978,-70.948969,acura,5308.0,Acura,...,Automobile Makes,Honda Motor Company,acura,1692.0,99.0,12432.0,0.1361,4.219112,112.282819,0.680502
47221,2019-06-02,223-222@62j-ptj-6c5,24,0.0,Acura,42.547978,-70.948969,acura,5308.0,Acura,...,Automobile Makes,Honda Motor Company,acura,1707.0,117.0,12466.0,0.136932,3.560244,1.643189,0.54773
47222,2019-06-03,223-222@62j-ptj-6c5,27,196.55,Acura,42.547978,-70.948969,acura,5308.0,Acura,...,Automobile Makes,Honda Motor Company,acura,1696.0,99.0,12409.0,0.136675,4.510275,70.524297,0.5467
47223,2019-06-04,223-222@62j-ptj-6c5,42,0.0,Acura,42.547978,-70.948969,acura,5308.0,Acura,...,Automobile Makes,Honda Motor Company,acura,1697.0,95.0,12474.0,0.136043,9.250922,120.806157,0.816258
47224,2019-06-05,223-222@62j-ptj-6c5,32,2209.06,Acura,42.547978,-70.948969,acura,5308.0,Acura,...,Automobile Makes,Honda Motor Company,acura,1710.0,93.0,12503.0,0.136767,0.820603,1.094137,0.95737


### Read the travel times

In [5]:
with open('data/travel_time.pkl', 'rb') as file:
    travel_time_dict = pickle.load(file)

In [6]:
travel_time_keys = list(travel_time_dict.keys())
from_keys = [key[0] for key in travel_time_keys]
to_keys = [key[1] for key in travel_time_keys]
time_minutes = list(travel_time_dict.values())
time_minutes = [int(time_inst.split(' ')[0]) for time_inst in time_minutes]

In [7]:
travel_time = pd.DataFrame({'From_PLACEKEY': from_keys, 'To_PLACEKEY': to_keys, 'Time_mins': time_minutes})
travel_time.head()

Unnamed: 0,From_PLACEKEY,To_PLACEKEY,Time_mins
0,zzw-224@62k-p96-s5z,zzw-223@62k-ns4-pn5,20
1,zzw-224@62k-p96-s5z,zzy-222@62k-pd8-975,20
2,zzw-224@62k-p96-s5z,237-222@62k-p8v-z4v,16
3,zzw-224@62k-p96-s5z,222-222@62k-p8v-2p9,12
4,zzw-224@62k-p96-s5z,229-222@62k-p76-d9z,14


### Read the distance results

In [8]:
distance_results = pd.read_csv('data/distance_results.csv')
distance_results.head()

Unnamed: 0,From_PLACEKEY,To_PLACEKEY,Distance_km
0,zzw-223@62j-pth-zs5,zzw-222@62j-sgj-q2k,27.251127
1,zzw-223@62j-pth-zs5,223-222@62k-phk-vfz,36.776232
2,zzw-223@62j-pth-zs5,222-223@62k-r7z-m49,53.38983
3,zzw-223@62j-pth-zs5,zzw-223@62j-pth-zs5,0.0
4,zzw-223@62j-pth-zs5,22c-222@62k-pq5-grk,100.139331


### Read the first and second neighbors for each store of all focal brands

In [None]:
with open('focal_brands_first_second_degree_neighbors.pickle', 'rb') as file:
    focal_stores_first_second_neib = pickle.load(file)
    
focal_stores_first_second_neib = focal_stores_first_second_neib.rename(columns={0:'foc_brand_name', 1: 'first_neighbor', 2: 'second_neighbor'})
focal_stores_first_second_neib.head()

### Measure the time for first neighbors

In [11]:
def get_first_neighbor_time(neighbor_row):    
    first_neighbors_time = []
    
    focal_store = neighbor_row.name
    first_neighbors_list = neighbor_row['first_neighbor']
    
    for i in tqdm(range(len(first_neighbors_list))):
        try:
            first_neighbors_time.append(travel_time[(travel_time['From_PLACEKEY'] == focal_store) & (travel_time['To_PLACEKEY'] == first_neighbors_list[i])]['Time_mins'].iloc[0])
        except:
            first_neighbors_time.append(np.nan)
    
    return first_neighbors_time

In [None]:
focal_stores_first_second_neib.loc[:,'first_neighbor_time'] = focal_stores_first_second_neib.apply(get_first_neighbor_time, axis=1)

### Measure the time for second neighbors

In [16]:
def get_second_neighbor_time(neighbor_row):
    second_neib_time_dict = {}
    
    second_neib_dict = neighbor_row['second_neighbor']
    first_degree_neib = list(second_neib_dict.keys())

    for i in tqdm(range(len(first_degree_neib))):
        second_neib_time = []
        first_neib = first_degree_neib[i]
        second_neib_list = second_neib_dict[first_neib]
        
        for second_neib in second_neib_list:
            try:
                second_neib_time.append(travel_time[(travel_time['From_PLACEKEY'] == first_neib) & (travel_time['To_PLACEKEY'] == second_neib)]['Time_mins'].iloc[0])
            except:
                second_neib_time.append(np.nan)
        
        second_neib_time_dict[first_neib] = second_neib_time
    
    return second_neib_time_dict

In [None]:
focal_stores_first_second_neib.loc[:,'second_neighbor_time'] = focal_stores_first_second_neib.apply(get_second_neighbor_time, axis=1)

In [None]:
with open('focal_brands_first_second_degree_neighbors_time.pickle', 'wb') as file:
    pickle.dump(focal_stores_first_second_neib, file)