In [1]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import pickle
import datetime
import numpy as np
import random
import seaborn as sns

Specifying all the focal brands

In [2]:
focal_brands = ['Sephora',
 'ULTA Beauty',
 'Olive Garden',
 'The Cheesecake Factory',
 'Target',
 'Walmart',
 'Anthropologie',
 "Victoria's Secret"]

Reading the social brands catalog to get visits later for each store

In [3]:
brands_visits = pd.read_csv('../data/revision_visits_revenue_2019.csv')
brands_visits['brand_standard'] = brands_visits['brand'].apply(lambda x: x.strip().lower()) # For comparison with catalog.tsv
brands_visits['date'] = brands_visits['date'].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d').date())
brands_visits = brands_visits.rename(columns={'brand': 'brand_visitation'})
brands_visits.head()

Unnamed: 0,date,PLACEKEY,visits_by_day,spend_by_day,brand_visitation,lat,lon,brand_standard
0,2019-06-01,zzw-222@62j-sgj-q2k,5,0.0,Orangetheory Fitness,42.350592,-71.153024,orangetheory fitness
1,2019-06-02,zzw-222@62j-sgj-q2k,1,0.0,Orangetheory Fitness,42.350592,-71.153024,orangetheory fitness
2,2019-06-03,zzw-222@62j-sgj-q2k,6,859.0,Orangetheory Fitness,42.350592,-71.153024,orangetheory fitness
3,2019-06-04,zzw-222@62j-sgj-q2k,6,30.0,Orangetheory Fitness,42.350592,-71.153024,orangetheory fitness
4,2019-06-05,zzw-222@62j-sgj-q2k,8,193.0,Orangetheory Fitness,42.350592,-71.153024,orangetheory fitness


Reading Travel Time

In [4]:
with open('../data/travel_time.pkl', 'rb') as file:
    travel_time_dict = pickle.load(file)
    
travel_time_keys = list(travel_time_dict.keys())
from_keys = [key[0] for key in travel_time_keys]
to_keys = [key[1] for key in travel_time_keys]
time_minutes = list(travel_time_dict.values())
time_minutes = [int(time_inst.split(' ')[0]) for time_inst in time_minutes]

travel_time = pd.DataFrame({'From_PLACEKEY': from_keys, 'To_PLACEKEY': to_keys, 'Time_mins': time_minutes})
travel_time.head()

Unnamed: 0,From_PLACEKEY,To_PLACEKEY,Time_mins
0,zzw-224@62k-p96-s5z,zzw-223@62k-ns4-pn5,20
1,zzw-224@62k-p96-s5z,zzy-222@62k-pd8-975,20
2,zzw-224@62k-p96-s5z,237-222@62k-p8v-z4v,16
3,zzw-224@62k-p96-s5z,222-222@62k-p8v-2p9,12
4,zzw-224@62k-p96-s5z,229-222@62k-p76-d9z,14


Reading the statistics of the specific focal brand using the results of Part 2

In [5]:
brand = focal_brands[0]
print(brand)
focal_brand_path = os.path.join('../part2_r_statistics', brand)
focal_brand_path

Sephora


'../part2_r_statistics/Sephora'

Reading all the neighboring brands results for the selected focal brand

In [6]:
file_list = os.listdir(focal_brand_path)
result_file_list = [file for file in file_list if file.find('_result') != -1]

In [7]:
result_df = pd.DataFrame()

for result_file in result_file_list:
    result_file_path = os.path.join(focal_brand_path, result_file)
    tmp_res_df = pd.read_csv(result_file_path, skiprows=1, float_precision="round_trip")
    # Removing all the records for ols and m_olsExp
    if 'filename' in tmp_res_df['filename'].tolist(): #Wrong input
        continue
    result_df = pd.concat([result_df, tmp_res_df], ignore_index=True)

In [8]:
result_df = result_df[~result_df['type'].isin(['ols', 'm_olsExp'])].reset_index(drop=True)
result_df = result_df.replace('FALSE', False).replace('False', False)
# result_df.iloc[:,3:] = result_df.iloc[:,3:].astype('float64', copy=True)

In [9]:
result_df

Unnamed: 0,tmp,filename,type,X_Estimate,X_Std. Error,X_t value,X_Pr(>|t|),IV_firststage_reviews_tw_Estimate,IV_firststage_reviews_tw_Std. Error,IV_firststage_reviews_tw_t value,...,HausWutest_Df,HausWutest_F,HausWutest_Pr(>F),Sargan_result_rsq,Sargan_result_adjrsq,Sargan_pvalue_rsq,Sargan_pvalue_adjrsq,num_sig_variables,Y_r.squared,Y_adj.r.squared
0,tmp,Brooks Brothers,fe_reviews_reviews,0.069505,0.019034,3.651686,2.663556e-04,-0.000100211019005055,0.000190635514929389,-0.52566815287368,...,-1,2.34354613255589,0.125940823615684,4.23449349278573,-353.500987164209,0.120362561387644,1,2,0.671032,0.625286
1,tmp,Brooks Brothers,fe_reviews_visits,0.068299,0.003906,17.487215,2.233359e-64,False,False,False,...,-1,1.9329373522152,0.164572372068733,1.21490380085773e-28,-355.731304347826,1,1,1,0.671424,0.625732
2,tmp,Brooks Brothers,fe_exp_reviews_reviews,1.541672,0.234746,6.567409,6.312742e-11,0.377923078481934,0.318135845031044,1.18792988713691,...,-1,14.7189627428136,0.000128155117934805,0.74492433429816,-357.468891520661,0.689035719250415,1,2,0.727018,0.689058
3,tmp,Brooks Brothers,fe_exp_reviews_visits,1.064166,0.030081,35.376507,3.701041e-219,False,False,False,...,-1,37.0578552273081,1.34043757762807e-09,2.74781634341386e-28,-355.731304347826,1,1,1,0.735753,0.699007
4,tmp,Torrid,fe_reviews_reviews,0.060262,0.021028,2.865750,4.187351e-03,0.00498750665721031,0.00258175309132841,1.93182945106654,...,-1,14.496389999924,0.000143060081773808,3.21982428292259,-346.092335329809,0.199905176680324,1,0,0.109297,0.019754
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1479,tmp,REI,fe_exp_reviews_visits,17.982587,11.619486,1.547623,1.218157e-01,False,False,False,...,-1,159.888003382137,9.11891570094413e-36,3.13423717663725e-28,-347.234455958549,1,1,0,0.011222,-0.091762
1480,tmp,B,fe_reviews_reviews,1.637189,1.288917,1.270205,2.041267e-01,-0.000207663876637634,0.000482982056227962,-0.429961887734437,...,-1,4.07898337207887,0.0435235342635979,9.96297734472914,-343.774750838493,0.00686383694371739,1,1,0.044036,-0.075134
1481,tmp,B,fe_reviews_visits,4.949395,1.881332,2.630792,8.569535e-03,False,False,False,...,-1,1470.55007618526,9.57750888570686e-255,1.75098878080951e-28,-352.441507967353,1,1,1,0.036950,-0.083103
1482,tmp,B,fe_exp_reviews_reviews,-1.193309,3.944105,-0.302555,7.622535e-01,-0.00426440753458848,0.00068360771342525,-6.23809159967119,...,-1,0.309792452704997,0.577856108473467,2.8353034355311,-351.778482684411,0.242282296608164,1,3,0.007549,-0.116170


In [10]:
sum(result_df.groupby('filename')['type'].nunique() == 1)

0

In [11]:
sum(result_df.groupby('filename')['type'].nunique() == 2)

8

In [12]:
sum(result_df.groupby('filename')['type'].nunique() == 3)

6

In [13]:
sum(result_df.groupby('filename')['type'].nunique() == 4)

362

Only getting those brands who have values for all the four unique models

In [14]:
# count_list = result_df['filename'].value_counts()
count_list = result_df.groupby('filename')['type'].nunique()
valid_brands = count_list[count_list == 4].index.to_list() # having the values of all the four models
len(valid_brands)

362

In [15]:
result_df = result_df[result_df['filename'].isin(valid_brands)]
result_df

Unnamed: 0,tmp,filename,type,X_Estimate,X_Std. Error,X_t value,X_Pr(>|t|),IV_firststage_reviews_tw_Estimate,IV_firststage_reviews_tw_Std. Error,IV_firststage_reviews_tw_t value,...,HausWutest_Df,HausWutest_F,HausWutest_Pr(>F),Sargan_result_rsq,Sargan_result_adjrsq,Sargan_pvalue_rsq,Sargan_pvalue_adjrsq,num_sig_variables,Y_r.squared,Y_adj.r.squared
0,tmp,Brooks Brothers,fe_reviews_reviews,0.069505,0.019034,3.651686,2.663556e-04,-0.000100211019005055,0.000190635514929389,-0.52566815287368,...,-1,2.34354613255589,0.125940823615684,4.23449349278573,-353.500987164209,0.120362561387644,1,2,0.671032,0.625286
1,tmp,Brooks Brothers,fe_reviews_visits,0.068299,0.003906,17.487215,2.233359e-64,False,False,False,...,-1,1.9329373522152,0.164572372068733,1.21490380085773e-28,-355.731304347826,1,1,1,0.671424,0.625732
2,tmp,Brooks Brothers,fe_exp_reviews_reviews,1.541672,0.234746,6.567409,6.312742e-11,0.377923078481934,0.318135845031044,1.18792988713691,...,-1,14.7189627428136,0.000128155117934805,0.74492433429816,-357.468891520661,0.689035719250415,1,2,0.727018,0.689058
3,tmp,Brooks Brothers,fe_exp_reviews_visits,1.064166,0.030081,35.376507,3.701041e-219,False,False,False,...,-1,37.0578552273081,1.34043757762807e-09,2.74781634341386e-28,-355.731304347826,1,1,1,0.735753,0.699007
4,tmp,Torrid,fe_reviews_reviews,0.060262,0.021028,2.865750,4.187351e-03,0.00498750665721031,0.00258175309132841,1.93182945106654,...,-1,14.496389999924,0.000143060081773808,3.21982428292259,-346.092335329809,0.199905176680324,1,0,0.109297,0.019754
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1479,tmp,REI,fe_exp_reviews_visits,17.982587,11.619486,1.547623,1.218157e-01,False,False,False,...,-1,159.888003382137,9.11891570094413e-36,3.13423717663725e-28,-347.234455958549,1,1,0,0.011222,-0.091762
1480,tmp,B,fe_reviews_reviews,1.637189,1.288917,1.270205,2.041267e-01,-0.000207663876637634,0.000482982056227962,-0.429961887734437,...,-1,4.07898337207887,0.0435235342635979,9.96297734472914,-343.774750838493,0.00686383694371739,1,1,0.044036,-0.075134
1481,tmp,B,fe_reviews_visits,4.949395,1.881332,2.630792,8.569535e-03,False,False,False,...,-1,1470.55007618526,9.57750888570686e-255,1.75098878080951e-28,-352.441507967353,1,1,1,0.036950,-0.083103
1482,tmp,B,fe_exp_reviews_reviews,-1.193309,3.944105,-0.302555,7.622535e-01,-0.00426440753458848,0.00068360771342525,-6.23809159967119,...,-1,0.309792452704997,0.577856108473467,2.8353034355311,-351.778482684411,0.242282296608164,1,3,0.007549,-0.116170


Extract the significant neighboring brands whose p-values for all the models are significant (<0.05) i.e., X_Pr(>|t|)

In [16]:
def filter_brands_pvalue(brand_pvalue):
    pvalues = brand_pvalue.values
    
    if (pvalues[0] < 0.05) and (pvalues[1] < 0.05) and (pvalues[2] < 0.05) and (pvalues[3] < 0.05):
        return True
    else:
        return False

In [17]:
significant_brands = result_df.groupby('filename')['X_Pr(>|t|)'].apply(filter_brands_pvalue)
significant_brands_list = significant_brands [significant_brands == True].index.to_list()

In [18]:
significant_brands_list

['Aubuchon Hardware',
 'Bath & Body Works',
 'Best Friends Pet Care',
 'Boston Market',
 'Brooks Brothers',
 "Chico's",
 "Dave & Buster's",
 'Everything but Water',
 'Free People',
 'Gap',
 "Jordan's Furniture",
 'Nordstrom',
 'Olive Garden',
 'Primark',
 'Skechers',
 "Spencer's",
 'Tous Les Jours',
 'Town Fair Tire',
 'Wegmans Food Markets',
 'Windsor',
 'Zumiez',
 'lululemon athletica']

### Performing the calculations for WIC and WIBC

Firstly Calculating the Inverse and Inverse-Exp of the Time_mins

In [66]:
travel_time['Time_mins'] = travel_time['Time_mins']/60 # converting from minutes to hours
travel_time['Time_mins_inv'] = 1/travel_time['Time_mins']
travel_time['Time_mins_inv_exp'] = 1/np.exp(travel_time['Time_mins'])
travel_time = travel_time.rename(columns={'From_PLACEKEY': 'Focal_Stores', 'To_PLACEKEY': 'Neib_Stores'})
travel_time

Unnamed: 0,Focal_Stores,Neib_Stores,Time_mins,Time_mins_inv,Time_mins_inv_exp
0,zzw-224@62k-p96-s5z,zzw-223@62k-ns4-pn5,0.005556,180.000000,0.994460
1,zzw-224@62k-p96-s5z,zzy-222@62k-pd8-975,0.005556,180.000000,0.994460
2,zzw-224@62k-p96-s5z,237-222@62k-p8v-z4v,0.004444,225.000000,0.995565
3,zzw-224@62k-p96-s5z,222-222@62k-p8v-2p9,0.003333,300.000000,0.996672
4,zzw-224@62k-p96-s5z,229-222@62k-p76-d9z,0.003889,257.142857,0.996119
...,...,...,...,...,...
139637,zzy-22f@62j-shz-vs5,22c-222@62j-sgs-w49,0.008056,124.137931,0.991977
139638,zzy-22f@62j-shz-vs5,22k-222@62j-sjr-z9f,0.005833,171.428571,0.994184
139639,zzy-22f@62j-shz-vs5,228-222@62j-sj3-v75,0.003889,257.142857,0.996119
139640,zzy-22f@62j-shz-vs5,22g-222@62j-sxw-33q,0.005278,189.473684,0.994736


1. Extracting all the stores of the significant brands and focal store
2. Calculating the Average Visits of each store to be used as average sales
3. Dropping all the duplicate rows to have a clean dataframe

In [99]:
brands_visits_focal_sig_neib = brands_visits[(brands_visits['brand_visitation'].isin(significant_brands_list)) | (brands_visits['brand_visitation']==brand)]
brands_visits_focal_sig_neib.loc[:,'avg_visits'] = brands_visits_focal_sig_neib.groupby('PLACEKEY')['visits_by_day'].transform('median')
brands_visits_focal_sig_neib = brands_visits_focal_sig_neib.drop(columns=['date', 'visits_by_day', 'spend_by_day'])
brands_visits_focal_sig_neib = brands_visits_focal_sig_neib.drop_duplicates().reset_index(drop=True)
brands_visits_focal_sig_neib

Extracting the time_mins with all the focal stores belonging to brand under consideration, and all of the significant brands

In [100]:
focal_stores = brands_visits_focal_sig_neib[brands_visits_focal_sig_neib['brand_visitation'] == brand]['PLACEKEY'].unique().tolist()
sig_neib_stores = brands_visits_focal_sig_neib[brands_visits_focal_sig_neib['brand_visitation'].isin(significant_brands_list)]['PLACEKEY'].unique().tolist()
travel_time_filtered = travel_time[(travel_time['Focal_Stores'].isin(focal_stores)) & (travel_time['Neib_Stores'].isin(sig_neib_stores))]
travel_time_filtered

Unnamed: 0,Focal_Stores,Neib_Stores,Time_mins,Time_mins_inv,Time_mins_inv_exp
615,zzw-22m@629-2rt-cyv,zzw-222@629-2fq-qvf,0.004444,225.000000,0.995565
616,zzw-22m@629-2rt-cyv,zzw-229@629-2rt-fzz,0.001111,900.000000,0.998890
635,zzw-22m@629-2rt-cyv,zzw-223@629-22b-3h5,0.005833,171.428571,0.994184
798,zzw-22m@629-2rt-cyv,222-222@629-2rt-cqz,0.001111,900.000000,0.998890
804,zzw-22m@629-2rt-cyv,224-222@629-2pw-zvf,0.006111,163.636364,0.993908
...,...,...,...,...,...
135484,zzw-22p@62j-srj-ffz,zzw-22s@62j-srm-vs5,0.000278,3600.000000,0.999722
135500,zzw-22p@62j-srj-ffz,zzy-222@62j-srm-7nq,0.001111,900.000000,0.998890
135566,zzw-22p@62j-srj-ffz,222-222@62j-sr5-92k,0.004444,225.000000,0.995565
135576,zzw-22p@62j-srj-ffz,222-222@62j-srm-6ff,0.001389,720.000000,0.998612


Merging by Focal Stores to get the average visits of the focal stores

In [101]:
travel_time_filtered = pd.merge(left=travel_time_filtered, right=brands_visits_focal_sig_neib, how='left', left_on='Focal_Stores', right_on='PLACEKEY')
travel_time_filtered = travel_time_filtered.drop(columns=['PLACEKEY', 'brand_standard']).rename(columns={'brand_visitation':'Foc_Brand', 'lat':'Foc_lat', 'lon':'Foc_lon', 'avg_visits': 'Foc_avg_visits'})
travel_time_filtered

Unnamed: 0,Focal_Stores,Neib_Stores,Time_mins,Time_mins_inv,Time_mins_inv_exp,Foc_Brand,Foc_lat,Foc_lon,Foc_avg_visits
0,zzw-22m@629-2rt-cyv,zzw-222@629-2fq-qvf,0.004444,225.000000,0.995565,Sephora,42.168174,-72.641450,29.146409
1,zzw-22m@629-2rt-cyv,zzw-229@629-2rt-fzz,0.001111,900.000000,0.998890,Sephora,42.168174,-72.641450,29.146409
2,zzw-22m@629-2rt-cyv,zzw-223@629-22b-3h5,0.005833,171.428571,0.994184,Sephora,42.168174,-72.641450,29.146409
3,zzw-22m@629-2rt-cyv,222-222@629-2rt-cqz,0.001111,900.000000,0.998890,Sephora,42.168174,-72.641450,29.146409
4,zzw-22m@629-2rt-cyv,224-222@629-2pw-zvf,0.006111,163.636364,0.993908,Sephora,42.168174,-72.641450,29.146409
...,...,...,...,...,...,...,...,...,...
404,zzw-22p@62j-srj-ffz,zzw-22s@62j-srm-vs5,0.000278,3600.000000,0.999722,Sephora,42.301064,-71.383703,788.653846
405,zzw-22p@62j-srj-ffz,zzy-222@62j-srm-7nq,0.001111,900.000000,0.998890,Sephora,42.301064,-71.383703,788.653846
406,zzw-22p@62j-srj-ffz,222-222@62j-sr5-92k,0.004444,225.000000,0.995565,Sephora,42.301064,-71.383703,788.653846
407,zzw-22p@62j-srj-ffz,222-222@62j-srm-6ff,0.001389,720.000000,0.998612,Sephora,42.301064,-71.383703,788.653846


Merging by Neighboring Stores to get the average visits of the neighboring stores

In [102]:
travel_time_filtered = pd.merge(left=travel_time_filtered, right=brands_visits_focal_sig_neib, how='left', left_on='Neib_Stores', right_on='PLACEKEY')
travel_time_filtered = travel_time_filtered.drop(columns=['PLACEKEY','lat', 'lon', 'brand_standard']).rename(columns={'brand_visitation':'Neib_Brand', 'avg_visits': 'Neib_avg_visits'})
travel_time_filtered

Unnamed: 0,Focal_Stores,Neib_Stores,Time_mins,Time_mins_inv,Time_mins_inv_exp,Foc_Brand,Foc_lat,Foc_lon,Foc_avg_visits,Neib_Brand,Neib_avg_visits
0,zzw-22m@629-2rt-cyv,zzw-222@629-2fq-qvf,0.004444,225.000000,0.995565,Sephora,42.168174,-72.641450,29.146409,Town Fair Tire,10.172702
1,zzw-22m@629-2rt-cyv,zzw-229@629-2rt-fzz,0.001111,900.000000,0.998890,Sephora,42.168174,-72.641450,29.146409,Bath & Body Works,27.309392
2,zzw-22m@629-2rt-cyv,zzw-223@629-22b-3h5,0.005833,171.428571,0.994184,Sephora,42.168174,-72.641450,29.146409,Chico's,2.516129
3,zzw-22m@629-2rt-cyv,222-222@629-2rt-cqz,0.001111,900.000000,0.998890,Sephora,42.168174,-72.641450,29.146409,Zumiez,9.281768
4,zzw-22m@629-2rt-cyv,224-222@629-2pw-zvf,0.006111,163.636364,0.993908,Sephora,42.168174,-72.641450,29.146409,Town Fair Tire,3.108359
...,...,...,...,...,...,...,...,...,...,...,...
404,zzw-22p@62j-srj-ffz,zzw-22s@62j-srm-vs5,0.000278,3600.000000,0.999722,Sephora,42.301064,-71.383703,788.653846,Windsor,788.653846
405,zzw-22p@62j-srj-ffz,zzy-222@62j-srm-7nq,0.001111,900.000000,0.998890,Sephora,42.301064,-71.383703,788.653846,Spencer's,788.653846
406,zzw-22p@62j-srj-ffz,222-222@62j-sr5-92k,0.004444,225.000000,0.995565,Sephora,42.301064,-71.383703,788.653846,Best Friends Pet Care,1.470199
407,zzw-22p@62j-srj-ffz,222-222@62j-srm-6ff,0.001389,720.000000,0.998612,Sephora,42.301064,-71.383703,788.653846,Town Fair Tire,2.284211


#### Performing Neib Centrality Calculations by taking each neighboring brand

In [79]:
def centrality_calc_reviews(group_df):
    neib_brand = group_df.name
    neib_result_df = result_df [result_df['filename'] == neib_brand]
    
    dist_l = group_df['Time_mins_inv'].values
    dist2_l = group_df['Time_mins_inv_exp'].values
    average_sales = group_df['Neib_avg_visits'].values
    est_reviews = neib_result_df[neib_result_df['type'] == 'fe_reviews_reviews']['X_Estimate'].values[0]
    est_reviews_exp = neib_result_df[neib_result_df['type'] == 'fe_exp_reviews_reviews']['X_Estimate'].values[0]

    weighted_influence = np.sum(np.array(dist_l).dot(np.array(average_sales))*float(est_reviews))
    weighted_influence_exp = np.sum(np.array(dist2_l).dot(np.array(average_sales))*float(est_reviews_exp))
    influence = np.sum(np.array(dist_l)*float(est_reviews))
    influence_exp = np.sum(np.array(dist2_l)*float(est_reviews_exp))
    weighted_no_influence = np.sum(np.array(dist_l).dot(np.array(average_sales)))
    no_influence = np.sum(np.array(dist_l))
    avg_visits = np.mean(average_sales)
    dist_inv = np.mean([1.0 / d for d in np.array(dist_l)])
    num_stores = group_df['Neib_Stores'].nunique()
    
    return pd.Series({'weighted_influence': weighted_influence, 'weighted_influence_exp': weighted_influence_exp, 'influence': influence, 'influence_exp':influence_exp,
                      'weighted_no_influence': weighted_no_influence, 'no_influence': no_influence, 'avg_visits':avg_visits, 'dist_inv': dist_inv, 'num_stores': num_stores})

In [80]:
neib_centrality_reviews = travel_time_filtered.groupby('Neib_Brand')[['Time_mins_inv', 'Time_mins_inv_exp', 'Neib_avg_visits', 'Neib_Stores']].apply(centrality_calc_reviews).reset_index()
neib_centrality_reviews

Unnamed: 0,Neib_Brand,weighted_influence,weighted_influence_exp,influence,influence_exp,weighted_no_influence,no_influence,avg_visits,dist_inv,num_stores
0,Aubuchon Hardware,192907.969549,6509.116399,27562.54797,932.100356,9961.463,1423.286445,6.983117,0.005159,6.0
1,Bath & Body Works,199054.014852,6270.43101,1708.752133,74.636902,2632031.0,22594.309671,83.886105,0.004256,15.0
2,Best Friends Pet Care,-38801.336321,-2933.773589,-4425.815476,-280.445258,12916.94,1473.351648,10.467815,0.003778,3.0
3,Boston Market,-270335.951194,-2343.565473,-16575.415512,-176.177015,107350.3,6582.089597,13.28905,0.00495,8.0
4,Brooks Brothers,82739.112062,2844.702252,636.086212,24.559988,1190407.0,9151.673254,115.726246,0.00434,4.0
5,Chico's,87263.69854,1416.994389,19502.332619,371.684873,48564.59,10853.571429,3.810096,0.002465,5.0
6,Dave & Buster's,62214.628086,1401.204761,255.48385,11.114516,1764068.0,7244.130601,125.811266,0.003778,3.0
7,Everything but Water,47167.970785,828.311277,174.23835,5.104181,1433987.0,5297.142857,162.056231,0.002,3.0
8,Free People,53173.006278,976.160528,267.834593,14.697712,1502880.0,7570.066356,66.226003,0.004185,4.0
9,Gap,77477.212555,1820.49575,3244.613537,97.983961,543255.6,22750.61802,18.569752,0.004248,13.0


In [29]:
def centrality_calc_visits(group_df):
    neib_brand = group_df.name
    neib_result_df = result_df [result_df['filename'] == neib_brand]
    
    dist_l = group_df['Time_mins_inv'].values
    dist2_l = group_df['Time_mins_inv_exp'].values
    average_sales = group_df['Neib_avg_visits'].values
    est_visits = neib_result_df[neib_result_df['type'] == 'fe_reviews_visits']['X_Estimate'].values[0]
    est_visits_exp = neib_result_df[neib_result_df['type'] == 'fe_exp_reviews_visits']['X_Estimate'].values[0]

    weighted_influence = np.sum(np.array(dist_l).dot(np.array(average_sales))*float(est_visits))
    weighted_influence_exp = np.sum(np.array(dist2_l).dot(np.array(average_sales))*float(est_visits_exp))
    influence = np.sum(np.array(dist_l)*float(est_visits))
    influence_exp = np.sum(np.array(dist2_l)*float(est_visits_exp))
    weighted_no_influence = np.sum(np.array(dist_l).dot(np.array(average_sales)))
    no_influence = np.sum(np.array(dist_l))
    avg_visits = np.mean(average_sales)
    dist_inv = np.mean([1.0 / d for d in np.array(dist_l)])
    num_stores = group_df['Neib_Stores'].nunique()
    
    return pd.Series({'weighted_influence': weighted_influence, 'weighted_influence_exp': weighted_influence_exp, 'influence': influence, 'influence_exp':influence_exp,
                      'weighted_no_influence': weighted_no_influence, 'no_influence': no_influence, 'avg_visits':avg_visits, 'dist_inv': dist_inv, 'num_stores': num_stores})

In [None]:
neib_centrality_visits = travel_time_filtered.groupby('Neib_Brand')[['Time_mins_inv', 'Time_mins_inv_exp', 'Neib_avg_visits', 'Neib_Stores']].apply(centrality_calc_visits).reset_index()
neib_centrality_visits

#### Performing Inner Centrality Calculations by taking each focal store

In [31]:
def inner_centrality_calc_reviews(group_df):
        
    dist_l = group_df['Time_mins_inv'].values
    dist2_l = group_df['Time_mins_inv_exp'].values
    average_sales = group_df['Neib_avg_visits'].values
    
    est_reviews = []
    est_exp_reviews = []
    neib_brand_list = group_df['Neib_Brand'].values.tolist()
    for neib_brand in neib_brand_list:
        neib_result_df = result_df[result_df['filename'] == neib_brand]
        est_reviews.append(neib_result_df[neib_result_df['type'] == 'fe_reviews_reviews']['X_Estimate'].values[0])
        est_exp_reviews.append(neib_result_df[neib_result_df['type'] == 'fe_exp_reviews_reviews']['X_Estimate'].values[0])

    weighted_influence = np.sum(np.array(est_reviews) * np.array(dist_l) * np.array(average_sales))
    weighted_influence_exp = np.sum(np.array(est_exp_reviews) * np.array(dist2_l) * np.array(average_sales))
    influence = np.sum(np.array(est_reviews) * np.array(dist_l))
    influence_exp = np.sum(np.array(est_exp_reviews) * np.array(dist2_l))
    dist_inv = np.mean([1.0 / d for d in np.array(dist_l)])
    avg_visits = np.mean(average_sales)
    weighted_no_influence = np.sum(np.array(dist_l) * np.array(average_sales))
    no_influence = np.sum(np.array(dist_l))
    num_stores = group_df['Neib_Stores'].nunique()
    focal_lat = group_df['Foc_lat'].values[0]
    focal_lon = group_df['Foc_lon'].values[0]    


    return pd.Series({'weighted_influence': weighted_influence, 'weighted_influence_exp': weighted_influence_exp, 'influence': influence, 'influence_exp': influence_exp,
                      'weighted_no_influence': weighted_no_influence, 'no_influence': no_influence, 'avg_visits':avg_visits, 'dist_inv': dist_inv, 'num_stores': num_stores,
                      'focal_lat': focal_lat, 'focal_lon': focal_lon})

In [None]:
inner_centrality_reviews = travel_time_filtered.groupby('Focal_Stores')[['Time_mins_inv', 'Time_mins_inv_exp', 'Neib_avg_visits', 'Neib_Brand', 'Neib_Stores', 'Foc_lat', 'Foc_lon']].apply(inner_centrality_calc_reviews).reset_index()
inner_centrality_reviews

In [33]:
def inner_centrality_calc_visits(group_df):
        
    dist_l = group_df['Time_mins_inv'].values
    dist2_l = group_df['Time_mins_inv_exp'].values
    average_sales = group_df['Neib_avg_visits'].values
    
    est_visits = []
    est_exp_visits = []
    neib_brand_list = group_df['Neib_Brand'].values.tolist()
    for neib_brand in neib_brand_list:
        neib_result_df = result_df[result_df['filename'] == neib_brand]
        est_visits.append(neib_result_df[neib_result_df['type'] == 'fe_reviews_visits']['X_Estimate'].values[0])
        est_exp_visits.append(neib_result_df[neib_result_df['type'] == 'fe_exp_reviews_visits']['X_Estimate'].values[0])

    weighted_influence = np.sum(np.array(est_visits) * np.array(dist_l) * np.array(average_sales))
    weighted_influence_exp = np.sum(np.array(est_exp_visits) * np.array(dist2_l) * np.array(average_sales))
    influence = np.sum(np.array(est_visits) * np.array(dist_l))
    influence_exp = np.sum(np.array(est_exp_visits) * np.array(dist2_l))
    dist_inv = np.mean([1.0 / d for d in np.array(dist_l)])
    avg_visits = np.mean(average_sales)
    weighted_no_influence = np.sum(np.array(dist_l) * np.array(average_sales))
    no_influence = np.sum(np.array(dist_l))
    num_stores = group_df['Neib_Stores'].nunique()
    focal_lat = group_df['Foc_lat'].values[0]
    focal_lon = group_df['Foc_lon'].values[0]    


    return pd.Series({'weighted_influence': weighted_influence, 'weighted_influence_exp': weighted_influence_exp, 'influence': influence, 'influence_exp': influence_exp,
                      'weighted_no_influence': weighted_no_influence, 'no_influence': no_influence, 'avg_visits':avg_visits, 'dist_inv': dist_inv, 'num_stores': num_stores,
                      'focal_lat': focal_lat, 'focal_lon': focal_lon})

In [None]:
np.sum(np.array(est_exp_visits) * np.array(dist2_l))

In [None]:
inner_centrality_visits = travel_time_filtered.groupby('Focal_Stores')[['Time_mins_inv', 'Time_mins_inv_exp', 'Neib_avg_visits', 'Neib_Brand', 'Neib_Stores', 'Foc_lat', 'Foc_lon']].apply(inner_centrality_calc_visits).reset_index()
inner_centrality_visits

### Aggregating the WIBC and MIBC values for all the focal stores

In [35]:
dir_path = os.path.join('foc_brand_inner_centrality', brand)
os.makedirs(dir_path, exist_ok=True)

In [None]:
tmp_inner_centrality_reviews = inner_centrality_reviews[['Focal_Stores', 'focal_lat', 'focal_lon', 'weighted_influence', 'influence', 'weighted_influence_exp', 'influence_exp']].rename(columns={
    'weighted_influence': 'WIBC_reviews',
    'weighted_influence_exp': 'WIBC_reviews_exp',
    'influence': 'MIBC_reviews',
    'influence_exp': 'MIBC_reviews_exp'
})

tmp_inner_centrality_visits = inner_centrality_visits[['Focal_Stores', 'weighted_influence', 'influence', 'weighted_influence_exp', 'influence_exp']].rename(columns={
    'weighted_influence': 'WIBC_visits',
    'weighted_influence_exp': 'WIBC_visits_exp',
    'influence': 'MIBC_visits',
    'influence_exp': 'MIBC_visits_exp'
})

foc_brand_centrality_info = pd.merge(left=tmp_inner_centrality_reviews, right=tmp_inner_centrality_visits, how='left', on='Focal_Stores')
foc_brand_centrality_info.insert(0, 'Focal_Brand', brand)
foc_brand_centrality_info.to_latex(os.path.join(dir_path, 'wibc_mibc_info.tex'))
foc_brand_centrality_info.head()

### Calculating the visits for all the businesses

For Reviews Model

In [None]:
visits_all_business_pure_reviews = travel_time_filtered
visits_all_business_pure_reviews['Neib_est_review'] = visits_all_business_pure_reviews['Neib_Brand'].apply(lambda x: result_df[(result_df['filename'] == x) & (result_df['type'] == 'fe_reviews_reviews')]['X_Estimate'].values[0])
visits_all_business_pure_reviews['Neib_est_exp_review'] = visits_all_business_pure_reviews['Neib_Brand'].apply(lambda x: result_df[(result_df['filename'] == x) & (result_df['type'] == 'fe_exp_reviews_reviews')]['X_Estimate'].values[0])
visits_all_business_pure_reviews['num_inf'] = visits_all_business_pure_reviews['Neib_est_review'] * visits_all_business_pure_reviews['Time_mins_inv']
visits_all_business_pure_reviews['num_inf_exp'] = visits_all_business_pure_reviews['Neib_est_exp_review'] * visits_all_business_pure_reviews['Time_mins_inv_exp']
visits_all_business_pure_reviews['num_inf_visits'] = visits_all_business_pure_reviews['Neib_est_review'] * visits_all_business_pure_reviews['Time_mins_inv'] * visits_all_business_pure_reviews['Neib_avg_visits']
visits_all_business_pure_reviews['num_inf_visits_exp'] = visits_all_business_pure_reviews['Neib_est_exp_review'] * visits_all_business_pure_reviews['Time_mins_inv_exp'] * visits_all_business_pure_reviews['Neib_avg_visits']
visits_all_business_pure_reviews['weighted_visits'] = visits_all_business_pure_reviews['Time_mins_inv'] * visits_all_business_pure_reviews['Neib_avg_visits']
visits_all_business_pure_reviews.head()

In [None]:
visits_all_business_pure_reviews = visits_all_business_pure_reviews.sort_values('num_inf_visits', ascending=False)

ranked_dic_reviews = visits_all_business_pure_reviews.groupby('Neib_Brand').agg(distance=('Time_mins', 'mean'),
                                                   avg_visits=('Neib_avg_visits', 'sum'),
                                                   weighted_visits=('weighted_visits', 'sum'),
                                                   num_inf = ('num_inf', 'sum'),
                                                   num_inf_visits = ('num_inf_visits','sum')
                                                )
ranked_dic_reviews

In [None]:
visits_all_business_pure_reviews = visits_all_business_pure_reviews.sort_values('num_inf_visits_exp', ascending=False)

ranked_dic_reviews_exp = visits_all_business_pure_reviews.groupby('Neib_Brand').agg(distance=('Time_mins', 'mean'),
                                                   avg_visits=('Neib_avg_visits', 'sum'),
                                                   weighted_visits=('weighted_visits', 'sum'),
                                                   num_inf = ('num_inf_exp', 'sum'),
                                                   num_inf_visits = ('num_inf_visits_exp','sum')
                                                )
ranked_dic_reviews_exp

For Visits Model

In [None]:
visits_all_business_pure_visits = travel_time_filtered
visits_all_business_pure_visits['Neib_est_visits'] = visits_all_business_pure_visits['Neib_Brand'].apply(lambda x: result_df[(result_df['filename'] == x) & (result_df['type'] == 'fe_reviews_visits')]['X_Estimate'].values[0])
visits_all_business_pure_visits['Neib_est_exp_visits'] = visits_all_business_pure_visits['Neib_Brand'].apply(lambda x: result_df[(result_df['filename'] == x) & (result_df['type'] == 'fe_exp_reviews_visits')]['X_Estimate'].values[0])
visits_all_business_pure_visits['num_inf'] = visits_all_business_pure_visits['Neib_est_visits'] * visits_all_business_pure_visits['Time_mins_inv']
visits_all_business_pure_visits['num_inf_exp'] = visits_all_business_pure_visits['Neib_est_exp_visits'] * visits_all_business_pure_visits['Time_mins_inv_exp']
visits_all_business_pure_visits['num_inf_visits'] = visits_all_business_pure_visits['Neib_est_visits'] * visits_all_business_pure_visits['Time_mins_inv'] * visits_all_business_pure_visits['Neib_avg_visits']
visits_all_business_pure_visits['num_inf_visits_exp'] = visits_all_business_pure_visits['Neib_est_exp_visits'] * visits_all_business_pure_visits['Time_mins_inv_exp'] * visits_all_business_pure_visits['Neib_avg_visits']
visits_all_business_pure_visits['weighted_visits'] = visits_all_business_pure_visits['Time_mins_inv'] * visits_all_business_pure_visits['Neib_avg_visits']
visits_all_business_pure_visits.head()

In [None]:
visits_all_business_pure_visits = visits_all_business_pure_visits.sort_values('num_inf_visits', ascending=False)

ranked_dic_visits = visits_all_business_pure_visits.groupby('Neib_Brand').agg(distance=('Time_mins', 'mean'),
                                                   avg_visits=('Neib_avg_visits', 'sum'),
                                                   weighted_visits=('weighted_visits', 'sum'),
                                                   num_inf = ('num_inf', 'sum'),
                                                   num_inf_visits = ('num_inf_visits','sum')
                                                )
ranked_dic_visits

In [None]:
visits_all_business_pure_visits = visits_all_business_pure_visits.sort_values('num_inf_visits_exp', ascending=False)

ranked_dic_visits_exp = visits_all_business_pure_visits.groupby('Neib_Brand').agg(distance=('Time_mins', 'mean'),
                                                   avg_visits=('Neib_avg_visits', 'sum'),
                                                   weighted_visits=('weighted_visits', 'sum'),
                                                   num_inf = ('num_inf_exp', 'sum'),
                                                   num_inf_visits = ('num_inf_visits_exp','sum')
                                                )
ranked_dic_visits_exp

### Example 1 Web Design

Create the folder to save the results

In [43]:
dir_path = os.path.join('example1_web_design', brand)
os.makedirs(dir_path, exist_ok=True)

Linear Reviews

In [None]:
k = 7
top_selected_w = neib_centrality_reviews.sort_values('weighted_influence', ascending =False)[:k]['Neib_Brand'].values
top_selected = neib_centrality_reviews.sort_values('influence', ascending =False)[:k]['Neib_Brand'].values
most_avg_visits = ranked_dic_reviews.sort_values('avg_visits', ascending =False).index[:k].values
closest_distance = ranked_dic_reviews.sort_values('distance', ascending = True).index[:k].values
closest_weighted = ranked_dic_reviews.sort_values('weighted_visits', ascending =False).index[:k].values
num_inf = ranked_dic_reviews.sort_values('num_inf', ascending = False).index[:k].values
num_inf_visits = ranked_dic_reviews.sort_values('num_inf_visits', ascending =False).index[:k].values

visits_rage = [0.0109, 0.0509] # mean is 3.09 #https://martech.org/report-cost-to-drive-store-visits-varies-widely-by-category-mobile-most-efficient-channel/
exmple_1_web_design = []


def compute_visits(selected, current_visits_rate):
    sub_g = visits_all_business_pure_reviews[visits_all_business_pure_reviews.Neib_Brand.isin(list(selected))]    
    return sub_g.num_inf_visits.sum() * current_visits_rate

for rand in range(100):
    current_visits_rate = random.uniform(visits_rage[0], visits_rage[1]) 
    top_cent_visits = compute_visits(top_selected, current_visits_rate)
    top_w_cent_visits = compute_visits(top_selected_w, current_visits_rate)
    random_selected = random.choices(visits_all_business_pure_reviews.Neib_Brand.unique(), k = k)
    random_visits = compute_visits(random_selected, current_visits_rate)
    most_avg_visits_inf = compute_visits(most_avg_visits, current_visits_rate)
    closest_distance_inf = compute_visits(closest_distance, current_visits_rate)
    closest_weighted_inf =compute_visits(closest_weighted, current_visits_rate)
    exmple_1_web_design.append([top_cent_visits, top_w_cent_visits, random_visits, most_avg_visits_inf, closest_weighted_inf, closest_distance_inf])
    
exmple_1_web_design = pd.DataFrame(exmple_1_web_design, columns = ['top_cent_visits', 'top_w_cent_visits', 'random_visits', 'most_avg_visits_inf', 'closest_weighted_inf', "closest_distance_inf"])

exmple_1_web_design.mean()
exmple_1_web_design['type'] = 'example_1_web_design'
exmple_1_web_design

In [None]:
cnt = 0 
DAY = 30 
plt.figure(figsize = (8, 5))

data = exmple_1_web_design
data_sub = data.melt(value_vars = data.columns[:-1])
data_sub['value'] = data_sub['value'].astype('float')

data_sub['Increased visits'] = DAY * data_sub['value']
sns.barplot(data = data_sub, x = 'variable', y = 'Increased visits')
plt.ylabel('Increased visits', fontsize = 12, fontweight = 'bold')
plt.title('{}'.format(brand), fontsize = 12, fontweight = 'bold')
plt.xticks(range(6), ['MIC', 'WIC', 'random', 'avg visits', 'weighted visits', 'closest'], fontsize = 12, rotation = 30, fontweight = 'bold')
plt.xlabel('')
plt.yticks(fontsize = 12, fontweight = 'bold')
plt.tight_layout()
plt.savefig(os.path.join(dir_path, 'reviews.png'))


Exponential Reviews

In [None]:
k = 7
top_selected_w = neib_centrality_reviews.sort_values('weighted_influence_exp', ascending =False)[:k]['Neib_Brand'].values
top_selected = neib_centrality_reviews.sort_values('influence_exp', ascending =False)[:k]['Neib_Brand'].values
most_avg_visits = ranked_dic_reviews_exp.sort_values('avg_visits', ascending =False).index[:k].values
closest_distance = ranked_dic_reviews_exp.sort_values('distance', ascending = True).index[:k].values
closest_weighted = ranked_dic_reviews_exp.sort_values('weighted_visits', ascending =False).index[:k].values
num_inf = ranked_dic_reviews_exp.sort_values('num_inf', ascending = False).index[:k].values
num_inf_visits = ranked_dic_reviews_exp.sort_values('num_inf_visits', ascending =False).index[:k].values

visits_rage = [0.0109, 0.0509] # mean is 3.09 #https://martech.org/report-cost-to-drive-store-visits-varies-widely-by-category-mobile-most-efficient-channel/
exmple_1_web_design = []


def compute_visits(selected, current_visits_rate):
    sub_g = visits_all_business_pure_reviews[visits_all_business_pure_reviews.Neib_Brand.isin(list(selected))]    
    return sub_g.num_inf_visits_exp.sum() * current_visits_rate

for rand in range(100):
    current_visits_rate = random.uniform(visits_rage[0], visits_rage[1]) 
    top_cent_visits = compute_visits(top_selected, current_visits_rate)
    top_w_cent_visits = compute_visits(top_selected_w, current_visits_rate)
    random_selected = random.choices(visits_all_business_pure_reviews.Neib_Brand.unique(), k = k)
    random_visits = compute_visits(random_selected, current_visits_rate)
    most_avg_visits_inf = compute_visits(most_avg_visits, current_visits_rate)
    closest_distance_inf = compute_visits(closest_distance, current_visits_rate)
    closest_weighted_inf =compute_visits(closest_weighted, current_visits_rate)
    exmple_1_web_design.append([top_cent_visits, top_w_cent_visits, random_visits, most_avg_visits_inf, closest_weighted_inf, closest_distance_inf])
    
exmple_1_web_design = pd.DataFrame(exmple_1_web_design, columns = ['top_cent_visits', 'top_w_cent_visits', 'random_visits', 'most_avg_visits_inf', 'closest_weighted_inf', "closest_distance_inf"])

exmple_1_web_design.mean()
exmple_1_web_design['type'] = 'example_1_web_design'
exmple_1_web_design

In [None]:
cnt = 0 
DAY = 30 
plt.figure(figsize = (8, 5))

data = exmple_1_web_design
data_sub = data.melt(value_vars = data.columns[:-1])
data_sub['value'] = data_sub['value'].astype('float')

data_sub['Increased visits'] = DAY * data_sub['value']
sns.barplot(data = data_sub, x = 'variable', y = 'Increased visits')
plt.ylabel('Increased visits', fontsize = 12, fontweight = 'bold')
plt.title('{}'.format(brand), fontsize = 12, fontweight = 'bold')
plt.xticks(range(6), ['MIC', 'WIC', 'random', 'avg visits', 'weighted visits', 'closest'], fontsize = 12, rotation = 30, fontweight = 'bold')
plt.xlabel('')
plt.yticks(fontsize = 12, fontweight = 'bold')
plt.tight_layout()
plt.savefig(os.path.join(dir_path, 'reviews_exp.png'))


Linear Visits

In [None]:
k = 7
top_selected_w = neib_centrality_visits.sort_values('weighted_influence', ascending =False)[:k]['Neib_Brand'].values
top_selected = neib_centrality_visits.sort_values('influence', ascending =False)[:k]['Neib_Brand'].values
most_avg_visits = ranked_dic_visits.sort_values('avg_visits', ascending =False).index[:k].values
closest_distance = ranked_dic_visits.sort_values('distance', ascending = True).index[:k].values
closest_weighted = ranked_dic_visits.sort_values('weighted_visits', ascending =False).index[:k].values
num_inf = ranked_dic_visits.sort_values('num_inf', ascending = False).index[:k].values
num_inf_visits = ranked_dic_visits.sort_values('num_inf_visits', ascending =False).index[:k].values

visits_rage = [0.0109, 0.0509] # mean is 3.09 #https://martech.org/report-cost-to-drive-store-visits-varies-widely-by-category-mobile-most-efficient-channel/
exmple_1_web_design = []


def compute_visits(selected, current_visits_rate):
    sub_g = visits_all_business_pure_visits[visits_all_business_pure_visits.Neib_Brand.isin(list(selected))]    
    return sub_g.num_inf_visits.sum() * current_visits_rate

for rand in range(100):
    current_visits_rate = random.uniform(visits_rage[0], visits_rage[1]) 
    top_cent_visits = compute_visits(top_selected, current_visits_rate)
    top_w_cent_visits = compute_visits(top_selected_w, current_visits_rate)
    random_selected = random.choices(visits_all_business_pure_visits.Neib_Brand.unique(), k = k)
    random_visits = compute_visits(random_selected, current_visits_rate)
    most_avg_visits_inf = compute_visits(most_avg_visits, current_visits_rate)
    closest_distance_inf = compute_visits(closest_distance, current_visits_rate)
    closest_weighted_inf =compute_visits(closest_weighted, current_visits_rate)
    exmple_1_web_design.append([top_cent_visits, top_w_cent_visits, random_visits, most_avg_visits_inf, closest_weighted_inf, closest_distance_inf])
    
exmple_1_web_design = pd.DataFrame(exmple_1_web_design, columns = ['top_cent_visits', 'top_w_cent_visits', 'random_visits', 'most_avg_visits_inf', 'closest_weighted_inf', "closest_distance_inf"])

exmple_1_web_design.mean()
exmple_1_web_design['type'] = 'example_1_web_design'
exmple_1_web_design

In [None]:
cnt = 0 
DAY = 30 
plt.figure(figsize = (8, 5))

data = exmple_1_web_design
data_sub = data.melt(value_vars = data.columns[:-1])
data_sub['value'] = data_sub['value'].astype('float')

data_sub['Increased visits'] = DAY * data_sub['value']
sns.barplot(data = data_sub, x = 'variable', y = 'Increased visits')
plt.ylabel('Increased visits', fontsize = 12, fontweight = 'bold')
plt.title('{}'.format(brand), fontsize = 12, fontweight = 'bold')
plt.xticks(range(6), ['MIC', 'WIC', 'random', 'avg visits', 'weighted visits', 'closest'], fontsize = 12, rotation = 30, fontweight = 'bold')
plt.xlabel('')
plt.yticks(fontsize = 12, fontweight = 'bold')
plt.tight_layout()
plt.savefig(os.path.join(dir_path, 'visits.png'))

Exponential Visits

In [None]:
k = 7
top_selected_w = neib_centrality_visits.sort_values('weighted_influence_exp', ascending =False)[:k]['Neib_Brand'].values
top_selected = neib_centrality_visits.sort_values('influence_exp', ascending =False)[:k]['Neib_Brand'].values
most_avg_visits = ranked_dic_visits_exp.sort_values('avg_visits', ascending =False).index[:k].values
closest_distance = ranked_dic_visits_exp.sort_values('distance', ascending = True).index[:k].values
closest_weighted = ranked_dic_visits_exp.sort_values('weighted_visits', ascending =False).index[:k].values
num_inf = ranked_dic_visits_exp.sort_values('num_inf', ascending = False).index[:k].values
num_inf_visits = ranked_dic_visits_exp.sort_values('num_inf_visits', ascending =False).index[:k].values

visits_rage = [0.0109, 0.0509] # mean is 3.09 #https://martech.org/report-cost-to-drive-store-visits-varies-widely-by-category-mobile-most-efficient-channel/
exmple_1_web_design = []


def compute_visits(selected, current_visits_rate):
    sub_g = visits_all_business_pure_visits[visits_all_business_pure_visits.Neib_Brand.isin(list(selected))]    
    return sub_g.num_inf_visits_exp.sum() * current_visits_rate

for rand in range(100):
    current_visits_rate = random.uniform(visits_rage[0], visits_rage[1]) 
    top_cent_visits = compute_visits(top_selected, current_visits_rate)
    top_w_cent_visits = compute_visits(top_selected_w, current_visits_rate)
    random_selected = random.choices(visits_all_business_pure_visits.Neib_Brand.unique(), k = k)
    random_visits = compute_visits(random_selected, current_visits_rate)
    most_avg_visits_inf = compute_visits(most_avg_visits, current_visits_rate)
    closest_distance_inf = compute_visits(closest_distance, current_visits_rate)
    closest_weighted_inf =compute_visits(closest_weighted, current_visits_rate)
    exmple_1_web_design.append([top_cent_visits, top_w_cent_visits, random_visits, most_avg_visits_inf, closest_weighted_inf, closest_distance_inf])
    
exmple_1_web_design = pd.DataFrame(exmple_1_web_design, columns = ['top_cent_visits', 'top_w_cent_visits', 'random_visits', 'most_avg_visits_inf', 'closest_weighted_inf', "closest_distance_inf"])

exmple_1_web_design.mean()
exmple_1_web_design['type'] = 'example_1_web_design'
exmple_1_web_design

In [None]:
cnt = 0 
DAY = 30 
plt.figure(figsize = (8, 5))

data = exmple_1_web_design
data_sub = data.melt(value_vars = data.columns[:-1])
data_sub['value'] = data_sub['value'].astype('float')

data_sub['Increased visits'] = DAY * data_sub['value']
sns.barplot(data = data_sub, x = 'variable', y = 'Increased visits')
plt.ylabel('Increased visits', fontsize = 12, fontweight = 'bold')
plt.title('{}'.format(brand), fontsize = 12, fontweight = 'bold')
plt.xticks(range(6), ['MIC', 'WIC', 'random', 'avg visits', 'weighted visits', 'closest'], fontsize = 12, rotation = 30, fontweight = 'bold')
plt.xlabel('')
plt.yticks(fontsize = 12, fontweight = 'bold')
plt.tight_layout()
plt.savefig(os.path.join(dir_path, 'visits_exp.png'))

### Example 2 Choose Partner

Create the folder to save the results

In [None]:
dir_path = os.path.join('example2_choose_partner', brand)
os.makedirs(dir_path, exist_ok=True)

Linear Reviews

In [None]:
visits_rage = [0.0203, 0.0603]#[0.1, 0.2]
k = 1

top_selected_w = neib_centrality_reviews.sort_values('weighted_influence', ascending =False)[:k]['Neib_Brand'].values
top_selected = neib_centrality_reviews.sort_values('influence', ascending =False)[:k]['Neib_Brand'].values
most_avg_visits = ranked_dic_reviews.sort_values('avg_visits', ascending =False).index[:k].values
closest_distance = ranked_dic_reviews.sort_values('distance', ascending = True).index[:k].values
closest_weighted = ranked_dic_reviews.sort_values('weighted_visits', ascending =False).index[:k].values

example_2_partner_choice = []

def compute_visits(selected, current_visits_rate):
    sub_g = visits_all_business_pure_reviews[visits_all_business_pure_reviews.Neib_Brand.isin(list(selected))]    
    return sub_g.num_inf_visits.sum() * current_visits_rate

for rand in range(100):
    current_visits_rate = random.uniform(visits_rage[0], visits_rage[1]) 
    top_cent_visits = compute_visits(top_selected, current_visits_rate)
    top_w_cent_visits = compute_visits(top_selected_w, current_visits_rate)
    random_selected = random.choices(visits_all_business_pure_reviews.Neib_Brand.unique(), k = k)
    random_visits = compute_visits(random_selected, current_visits_rate)
    most_avg_visits_inf = compute_visits(most_avg_visits, current_visits_rate)
    closest_distance_inf = compute_visits(closest_distance, current_visits_rate)
    closest_weighted_inf =compute_visits(closest_weighted, current_visits_rate)
    example_2_partner_choice.append([top_cent_visits, top_w_cent_visits, random_visits, most_avg_visits_inf, closest_weighted_inf, closest_distance_inf])


example_2_partner_choice = pd.DataFrame(example_2_partner_choice, columns = ['top_cent_visits', 'top_w_cent_visits', 'random_visits', 'most_avg_visits_inf', 'closest_weighted_inf', "closest_distance_inf"])
example_2_partner_choice.mean()
example_2_partner_choice['type'] = 'example_2_partner_choice'
example_2_partner_choice

In [None]:
cnt = 0 
DAY = 30 
plt.figure(figsize = (8, 5))

data = example_2_partner_choice
data_sub = data.melt(value_vars = data.columns[:-1])
data_sub['value'] = data_sub['value'].astype('float')

data_sub['Increased visits'] = DAY * data_sub['value']#%/ 3200 * 100
sns.barplot(data = data_sub, x = 'variable', y = 'Increased visits')
plt.ylabel('Increased visits', fontsize = 12, fontweight = 'bold')
plt.title('{}'.format(brand), fontsize = 12, fontweight = 'bold')
plt.xticks(range(6), ['MIC', 'WIC', 'random', 'avg visits', 'weighted visits', 'closest'], fontsize = 12, rotation = 30, fontweight = 'bold')
plt.xlabel('')
plt.yticks(fontsize = 12, fontweight = 'bold')
plt.tight_layout()
plt.savefig(os.path.join(dir_path, 'reviews.png'))

Exponential Reviews

In [None]:
visits_rage = [0.0203, 0.0603]#[0.1, 0.2]
k = 1

top_selected_w = neib_centrality_reviews.sort_values('weighted_influence_exp', ascending =False)[:k]['Neib_Brand'].values
top_selected = neib_centrality_reviews.sort_values('influence_exp', ascending =False)[:k]['Neib_Brand'].values
most_avg_visits = ranked_dic_reviews_exp.sort_values('avg_visits', ascending =False).index[:k].values
closest_distance = ranked_dic_reviews_exp.sort_values('distance', ascending = True).index[:k].values
closest_weighted = ranked_dic_reviews_exp.sort_values('weighted_visits', ascending =False).index[:k].values

example_2_partner_choice = []

def compute_visits(selected, current_visits_rate):
    sub_g = visits_all_business_pure_reviews[visits_all_business_pure_reviews.Neib_Brand.isin(list(selected))]    
    return sub_g.num_inf_visits_exp.sum() * current_visits_rate

for rand in range(100):
    current_visits_rate = random.uniform(visits_rage[0], visits_rage[1]) 
    top_cent_visits = compute_visits(top_selected, current_visits_rate)
    top_w_cent_visits = compute_visits(top_selected_w, current_visits_rate)
    random_selected = random.choices(visits_all_business_pure_reviews.Neib_Brand.unique(), k = k)
    random_visits = compute_visits(random_selected, current_visits_rate)
    most_avg_visits_inf = compute_visits(most_avg_visits, current_visits_rate)
    closest_distance_inf = compute_visits(closest_distance, current_visits_rate)
    closest_weighted_inf =compute_visits(closest_weighted, current_visits_rate)
    example_2_partner_choice.append([top_cent_visits, top_w_cent_visits, random_visits, most_avg_visits_inf, closest_weighted_inf, closest_distance_inf])


example_2_partner_choice = pd.DataFrame(example_2_partner_choice, columns = ['top_cent_visits', 'top_w_cent_visits', 'random_visits', 'most_avg_visits_inf', 'closest_weighted_inf', "closest_distance_inf"])
example_2_partner_choice.mean()
example_2_partner_choice['type'] = 'example_2_partner_choice'
example_2_partner_choice

In [None]:
cnt = 0 
DAY = 30 
plt.figure(figsize = (8, 5))

data = example_2_partner_choice
data_sub = data.melt(value_vars = data.columns[:-1])
data_sub['value'] = data_sub['value'].astype('float')

data_sub['Increased visits'] = DAY * data_sub['value']#%/ 3200 * 100
sns.barplot(data = data_sub, x = 'variable', y = 'Increased visits')
plt.ylabel('Increased visits', fontsize = 12, fontweight = 'bold')
plt.title('{}'.format(brand), fontsize = 12, fontweight = 'bold')
plt.xticks(range(6), ['MIC', 'WIC', 'random', 'avg visits', 'weighted visits', 'closest'], fontsize = 12, rotation = 30, fontweight = 'bold')
plt.xlabel('')
plt.yticks(fontsize = 12, fontweight = 'bold')
plt.tight_layout()
plt.savefig(os.path.join(dir_path, 'reviews_exp.png'))

Linear Visits

In [None]:
visits_rage = [0.0203, 0.0603]#[0.1, 0.2]
k = 1

top_selected_w = neib_centrality_visits.sort_values('weighted_influence', ascending =False)[:k]['Neib_Brand'].values
top_selected = neib_centrality_visits.sort_values('influence', ascending =False)[:k]['Neib_Brand'].values
most_avg_visits = ranked_dic_visits.sort_values('avg_visits', ascending =False).index[:k].values
closest_distance = ranked_dic_visits.sort_values('distance', ascending = True).index[:k].values
closest_weighted = ranked_dic_visits.sort_values('weighted_visits', ascending =False).index[:k].values

example_2_partner_choice = []

def compute_visits(selected, current_visits_rate):
    sub_g = visits_all_business_pure_visits[visits_all_business_pure_visits.Neib_Brand.isin(list(selected))]    
    return sub_g.num_inf_visits.sum() * current_visits_rate

for rand in range(100):
    current_visits_rate = random.uniform(visits_rage[0], visits_rage[1]) 
    top_cent_visits = compute_visits(top_selected, current_visits_rate)
    top_w_cent_visits = compute_visits(top_selected_w, current_visits_rate)
    random_selected = random.choices(visits_all_business_pure_visits.Neib_Brand.unique(), k = k)
    random_visits = compute_visits(random_selected, current_visits_rate)
    most_avg_visits_inf = compute_visits(most_avg_visits, current_visits_rate)
    closest_distance_inf = compute_visits(closest_distance, current_visits_rate)
    closest_weighted_inf =compute_visits(closest_weighted, current_visits_rate)
    example_2_partner_choice.append([top_cent_visits, top_w_cent_visits, random_visits, most_avg_visits_inf, closest_weighted_inf, closest_distance_inf])


example_2_partner_choice = pd.DataFrame(example_2_partner_choice, columns = ['top_cent_visits', 'top_w_cent_visits', 'random_visits', 'most_avg_visits_inf', 'closest_weighted_inf', "closest_distance_inf"])
example_2_partner_choice.mean()
example_2_partner_choice['type'] = 'example_2_partner_choice'
example_2_partner_choice

In [None]:
cnt = 0 
DAY = 30 
plt.figure(figsize = (8, 5))

data = example_2_partner_choice
data_sub = data.melt(value_vars = data.columns[:-1])
data_sub['value'] = data_sub['value'].astype('float')

data_sub['Increased visits'] = DAY * data_sub['value']#%/ 3200 * 100
sns.barplot(data = data_sub, x = 'variable', y = 'Increased visits')
plt.ylabel('Increased visits', fontsize = 12, fontweight = 'bold')
plt.title('{}'.format(brand), fontsize = 12, fontweight = 'bold')
plt.xticks(range(6), ['MIC', 'WIC', 'random', 'avg visits', 'weighted visits', 'closest'], fontsize = 12, rotation = 30, fontweight = 'bold')
plt.xlabel('')
plt.yticks(fontsize = 12, fontweight = 'bold')
plt.tight_layout()
plt.savefig(os.path.join(dir_path, 'visits.png'))

Exponential Visits

In [None]:
visits_rage = [0.0203, 0.0603]#[0.1, 0.2]
k = 1

top_selected_w = neib_centrality_visits.sort_values('weighted_influence_exp', ascending =False)[:k]['Neib_Brand'].values
top_selected = neib_centrality_visits.sort_values('influence_exp', ascending =False)[:k]['Neib_Brand'].values
most_avg_visits = ranked_dic_visits_exp.sort_values('avg_visits', ascending =False).index[:k].values
closest_distance = ranked_dic_visits_exp.sort_values('distance', ascending = True).index[:k].values
closest_weighted = ranked_dic_visits_exp.sort_values('weighted_visits', ascending =False).index[:k].values

example_2_partner_choice = []

def compute_visits(selected, current_visits_rate):
    sub_g = visits_all_business_pure_visits[visits_all_business_pure_visits.Neib_Brand.isin(list(selected))]    
    return sub_g.num_inf_visits_exp.sum() * current_visits_rate

for rand in range(100):
    current_visits_rate = random.uniform(visits_rage[0], visits_rage[1]) 
    top_cent_visits = compute_visits(top_selected, current_visits_rate)
    top_w_cent_visits = compute_visits(top_selected_w, current_visits_rate)
    random_selected = random.choices(visits_all_business_pure_visits.Neib_Brand.unique(), k = k)
    random_visits = compute_visits(random_selected, current_visits_rate)
    most_avg_visits_inf = compute_visits(most_avg_visits, current_visits_rate)
    closest_distance_inf = compute_visits(closest_distance, current_visits_rate)
    closest_weighted_inf =compute_visits(closest_weighted, current_visits_rate)
    example_2_partner_choice.append([top_cent_visits, top_w_cent_visits, random_visits, most_avg_visits_inf, closest_weighted_inf, closest_distance_inf])


example_2_partner_choice = pd.DataFrame(example_2_partner_choice, columns = ['top_cent_visits', 'top_w_cent_visits', 'random_visits', 'most_avg_visits_inf', 'closest_weighted_inf', "closest_distance_inf"])
example_2_partner_choice.mean()
example_2_partner_choice['type'] = 'example_2_partner_choice'
example_2_partner_choice

In [None]:
cnt = 0 
DAY = 30 
plt.figure(figsize = (8, 5))

data = example_2_partner_choice
data_sub = data.melt(value_vars = data.columns[:-1])
data_sub['value'] = data_sub['value'].astype('float')

data_sub['Increased visits'] = DAY * data_sub['value']#%/ 3200 * 100
sns.barplot(data = data_sub, x = 'variable', y = 'Increased visits')
plt.ylabel('Increased visits', fontsize = 12, fontweight = 'bold')
plt.title('{}'.format(brand), fontsize = 12, fontweight = 'bold')
plt.xticks(range(6), ['MIC', 'WIC', 'random', 'avg visits', 'weighted visits', 'closest'], fontsize = 12, rotation = 30, fontweight = 'bold')
plt.xlabel('')
plt.yticks(fontsize = 12, fontweight = 'bold')
plt.tight_layout()
plt.savefig(os.path.join(dir_path, 'visits_exp.png'))

### Example 4 Inbound Centrality Budget Allocations

Create the folder to save the results

In [None]:
dir_path = os.path.join('example4_inbound_centrality_budget_allocations', brand)
os.makedirs(dir_path, exist_ok=True)

Linear Reviews

In [None]:
visits_rage = [0.0203, 0.0603] 
k = 5
example_4_budget_allocation = []
avg_visits = visits_all_business_pure_reviews.sort_values('Neib_avg_visits', ascending = False)[:k]
weighted_visits = visits_all_business_pure_reviews.sort_values('weighted_visits', ascending = False)[:k]
closest = visits_all_business_pure_reviews.sort_values('Time_mins_inv', ascending = True)[:k]

for rand in range(100):
    current_visits_rate = random.uniform(visits_rage[0], visits_rage[1]) 
    estimate_based =  visits_all_business_pure_reviews['num_inf_visits'][:k].sum() * current_visits_rate  #(df['num_inf_visits'][:k].sum() * current_visits_rate * consumer_spending - k * budget ) / k * budget  * 100 
    random_based = np.sum(random.choices(visits_all_business_pure_reviews['num_inf_visits'].values, k = k)) * current_visits_rate #(np.sum(random.choices(df['num_inf_visits'].values, k = k)) * current_visits_rate * consumer_spending - k * budget ) / k * budget  * 100 
    avg_visits_profit = np.sum(avg_visits['num_inf_visits']) * current_visits_rate #(avg_visits['num_inf_visits'].sum() * current_visits_rate * consumer_spending - k * budget ) / k * budget  * 100 
    weighted_visits_profit = np.sum(weighted_visits['num_inf_visits'])  * current_visits_rate #(weighted_visits['num_inf_visits'].sum() * current_visits_rate * consumer_spending - k * budget ) / k * budget  * 100 
    closest_profit = np.sum(closest['num_inf_visits']) * current_visits_rate  #(closest['num_inf_visits'].sum() * current_visits_rate * consumer_spending - k * budget ) / k * budget  * 100 
    example_4_budget_allocation.append([estimate_based, random_based, avg_visits_profit, weighted_visits_profit, closest_profit])


example_4_budget_allocation = pd.DataFrame(example_4_budget_allocation, columns = ['top', 'random', 'avg_visits_profit', 'weighted_visits_profit', 'closest_profit'])
example_4_budget_allocation['type'] = 'example_4_budget_allocation'
example_4_budget_allocation

In [None]:
cnt = 0 
DAY = 30
plt.figure(figsize = (8, 5))
data = example_4_budget_allocation      
data_sub = data.melt(value_vars = data.columns[:-1])    

data_sub['Revenue'] = DAY * data_sub['value']
sns.barplot(data = data_sub, x = 'variable', y = 'Revenue')
plt.title('{}'.format(brand), fontsize = 12, fontweight = 'bold')
plt.xticks(range(5), [ 'WIC+WIBC', 'random', 'average visits', 'weighted visits', 'closest'], fontsize = 12, fontweight = 'bold', rotation = 30)
plt.xlabel('')
plt.yticks(fontsize = 12, fontweight = 'bold')
plt.ylabel('Increased visits', fontsize = 12, fontweight = 'bold')
plt.tight_layout()
plt.savefig(os.path.join(dir_path, 'reviews.png'))

Exponential Reviews

In [None]:
visits_rage = [0.0203, 0.0603] 
k = 5
example_4_budget_allocation = []
avg_visits = visits_all_business_pure_reviews.sort_values('Neib_avg_visits', ascending = False)[:k]
weighted_visits = visits_all_business_pure_reviews.sort_values('weighted_visits', ascending = False)[:k]
closest = visits_all_business_pure_reviews.sort_values('Time_mins_inv_exp', ascending = True)[:k]

for rand in range(100):
    current_visits_rate = random.uniform(visits_rage[0], visits_rage[1]) 
    estimate_based =  visits_all_business_pure_reviews['num_inf_visits_exp'][:k].sum() * current_visits_rate  #(df['num_inf_visits'][:k].sum() * current_visits_rate * consumer_spending - k * budget ) / k * budget  * 100 
    random_based = np.sum(random.choices(visits_all_business_pure_reviews['num_inf_visits_exp'].values, k = k)) * current_visits_rate #(np.sum(random.choices(df['num_inf_visits'].values, k = k)) * current_visits_rate * consumer_spending - k * budget ) / k * budget  * 100 
    avg_visits_profit = np.sum(avg_visits['num_inf_visits_exp']) * current_visits_rate #(avg_visits['num_inf_visits'].sum() * current_visits_rate * consumer_spending - k * budget ) / k * budget  * 100 
    weighted_visits_profit = np.sum(weighted_visits['num_inf_visits_exp'])  * current_visits_rate #(weighted_visits['num_inf_visits'].sum() * current_visits_rate * consumer_spending - k * budget ) / k * budget  * 100 
    closest_profit = np.sum(closest['num_inf_visits_exp']) * current_visits_rate  #(closest['num_inf_visits'].sum() * current_visits_rate * consumer_spending - k * budget ) / k * budget  * 100 
    example_4_budget_allocation.append([estimate_based, random_based, avg_visits_profit, weighted_visits_profit, closest_profit])


example_4_budget_allocation = pd.DataFrame(example_4_budget_allocation, columns = ['top', 'random', 'avg_visits_profit', 'weighted_visits_profit', 'closest_profit'])
example_4_budget_allocation['type'] = 'example_4_budget_allocation'
example_4_budget_allocation

In [None]:
cnt = 0 
plt.figure(figsize = (8, 5))
data = example_4_budget_allocation      
data_sub = data.melt(value_vars = data.columns[:-1])    

data_sub['Revenue'] = DAY * data_sub['value']
sns.barplot(data = data_sub, x = 'variable', y = 'Revenue')
plt.title('{}'.format(brand), fontsize = 12, fontweight = 'bold')
plt.xticks(range(5), [ 'WIC+WIBC', 'random', 'average visits', 'weighted visits', 'closest'], fontsize = 12, fontweight = 'bold', rotation = 30)
plt.xlabel('')
plt.yticks(fontsize = 12, fontweight = 'bold')
plt.ylabel('Increased visits', fontsize = 12, fontweight = 'bold')
plt.tight_layout()
plt.savefig(os.path.join(dir_path, 'reviews_exp.png'))

Linear Visits

In [None]:
visits_all_business_pure_visits.head()

In [None]:
visits_rage = [0.0203, 0.0603]
k = 5
example_4_budget_allocation = []
avg_visits = visits_all_business_pure_visits.sort_values('Neib_avg_visits', ascending = False)[:k]
weighted_visits = visits_all_business_pure_visits.sort_values('weighted_visits', ascending = False)[:k]
closest = visits_all_business_pure_visits.sort_values('Time_mins_inv', ascending = True)[:k]

for rand in range(100):
    current_visits_rate = random.uniform(visits_rage[0], visits_rage[1]) 
    estimate_based =  visits_all_business_pure_visits['num_inf_visits'][:k].sum() * current_visits_rate  #(df['num_inf_visits'][:k].sum() * current_visits_rate * consumer_spending - k * budget ) / k * budget  * 100 
    random_based = np.sum(random.choices(visits_all_business_pure_visits['num_inf_visits'].values, k = k)) * current_visits_rate #(np.sum(random.choices(df['num_inf_visits'].values, k = k)) * current_visits_rate * consumer_spending - k * budget ) / k * budget  * 100 
    avg_visits_profit = np.sum(avg_visits['num_inf_visits']) * current_visits_rate #(avg_visits['num_inf_visits'].sum() * current_visits_rate * consumer_spending - k * budget ) / k * budget  * 100 
    weighted_visits_profit = np.sum(weighted_visits['num_inf_visits'])  * current_visits_rate #(weighted_visits['num_inf_visits'].sum() * current_visits_rate * consumer_spending - k * budget ) / k * budget  * 100 
    closest_profit = np.sum(closest['num_inf_visits']) * current_visits_rate  #(closest['num_inf_visits'].sum() * current_visits_rate * consumer_spending - k * budget ) / k * budget  * 100 
    example_4_budget_allocation.append([estimate_based, random_based, avg_visits_profit, weighted_visits_profit, closest_profit])


example_4_budget_allocation = pd.DataFrame(example_4_budget_allocation, columns = ['top', 'random', 'avg_visits_profit', 'weighted_visits_profit', 'closest_profit'])
example_4_budget_allocation['type'] = 'example_4_budget_allocation'
example_4_budget_allocation

In [None]:
cnt = 0 
plt.figure(figsize = (8, 5))
data = example_4_budget_allocation      
data_sub = data.melt(value_vars = data.columns[:-1])    

data_sub['Revenue'] = DAY * data_sub['value']
sns.barplot(data = data_sub, x = 'variable', y = 'Revenue')
plt.title('{}'.format(brand), fontsize = 12, fontweight = 'bold')
plt.xticks(range(5), [ 'WIC+WIBC', 'random', 'average visits', 'weighted visits', 'closest'], fontsize = 12, fontweight = 'bold', rotation = 30)
plt.xlabel('')
plt.yticks(fontsize = 12, fontweight = 'bold')
plt.ylabel('Increased visits', fontsize = 12, fontweight = 'bold')
plt.tight_layout()
plt.savefig(os.path.join(dir_path, 'visits.png'))

Exponential Visits

In [None]:
visits_rage = [0.0203, 0.0603]
k = 5
example_4_budget_allocation = []
avg_visits = visits_all_business_pure_visits.sort_values('Neib_avg_visits', ascending = False)[:k]
weighted_visits = visits_all_business_pure_visits.sort_values('weighted_visits', ascending = False)[:k]
closest = visits_all_business_pure_visits.sort_values('Time_mins_inv_exp', ascending = True)[:k]

for rand in range(100):
    current_visits_rate = random.uniform(visits_rage[0], visits_rage[1]) 
    estimate_based =  visits_all_business_pure_visits['num_inf_visits_exp'][:k].sum() * current_visits_rate  #(df['num_inf_visits'][:k].sum() * current_visits_rate * consumer_spending - k * budget ) / k * budget  * 100 
    random_based = np.sum(random.choices(visits_all_business_pure_visits['num_inf_visits_exp'].values, k = k)) * current_visits_rate #(np.sum(random.choices(df['num_inf_visits'].values, k = k)) * current_visits_rate * consumer_spending - k * budget ) / k * budget  * 100 
    avg_visits_profit = np.sum(avg_visits['num_inf_visits_exp']) * current_visits_rate #(avg_visits['num_inf_visits'].sum() * current_visits_rate * consumer_spending - k * budget ) / k * budget  * 100 
    weighted_visits_profit = np.sum(weighted_visits['num_inf_visits_exp'])  * current_visits_rate #(weighted_visits['num_inf_visits'].sum() * current_visits_rate * consumer_spending - k * budget ) / k * budget  * 100 
    closest_profit = np.sum(closest['num_inf_visits_exp']) * current_visits_rate  #(closest['num_inf_visits'].sum() * current_visits_rate * consumer_spending - k * budget ) / k * budget  * 100 
    example_4_budget_allocation.append([estimate_based, random_based, avg_visits_profit, weighted_visits_profit, closest_profit])


example_4_budget_allocation = pd.DataFrame(example_4_budget_allocation, columns = ['top', 'random', 'avg_visits_profit', 'weighted_visits_profit', 'closest_profit'])
example_4_budget_allocation['type'] = 'example_4_budget_allocation'
example_4_budget_allocation

In [None]:
cnt = 0 
plt.figure(figsize = (8, 5))
data = example_4_budget_allocation
data_sub = data.melt(value_vars = data.columns[:-1])    

data_sub['Revenue'] = DAY * data_sub['value']
sns.barplot(data = data_sub, x = 'variable', y = 'Revenue')
plt.title('{}'.format(brand), fontsize = 12, fontweight = 'bold')
plt.xticks(range(5), [ 'WIC+WIBC', 'random', 'average visits', 'weighted visits', 'closest'], fontsize = 12, fontweight = 'bold', rotation = 30)
plt.xlabel('')
plt.yticks(fontsize = 12, fontweight = 'bold')
plt.ylabel('Increased visits', fontsize = 12, fontweight = 'bold')
plt.tight_layout()
plt.savefig(os.path.join(dir_path, 'visits_exp.png'))

### Example 3: Inbound Centrality for Local Marketing Partnerships

Creating a folder to save results

In [None]:
dir_path = os.path.join('example4_inbound_centrality_local_marketing', brand)
os.makedirs(dir_path, exist_ok=True)

Linear Reviews

In [None]:
focal_store_chosen_num = 1
#%%
# https://coresight.com/research/going-head-to-head-in-beauty-retailing-a-comparison-of-sephora-and-ulta/
ranked_dic = visits_all_business_pure_reviews.groupby('Focal_Stores').agg(num_neighbors=('Neib_Stores', 'count'),
                                                   focal_avg_visits=('Foc_avg_visits', 'mean'),
                                                   neighbor_avg_visits=('Neib_avg_visits', 'sum'),
                                                   weighted_visits = ('weighted_visits', 'sum')
                                                )

#%%

top_selected_w = inner_centrality_reviews.sort_values('weighted_influence', ascending =False)[:focal_store_chosen_num].Focal_Stores.values
top_selected = inner_centrality_reviews.sort_values('influence', ascending =False)[:focal_store_chosen_num].Focal_Stores.values
num_stores = ranked_dic.sort_values('num_neighbors', ascending =False).index[:focal_store_chosen_num]
focal_avg_visits = ranked_dic.sort_values('focal_avg_visits', ascending =False).index[:focal_store_chosen_num]
avg_visits = ranked_dic.sort_values('neighbor_avg_visits', ascending =False).index[:focal_store_chosen_num]
weighted_visits = ranked_dic.sort_values('weighted_visits', ascending =False).index[:focal_store_chosen_num]


def compute_visits_eg3(store_id, current_visits_rate, num_neighboring_store = 3):
    sub_g = visits_all_business_pure_reviews[visits_all_business_pure_reviews.Focal_Stores.isin(store_id)]
    #sub_g = sub_g.sort_values('weighted_visits', ascending = False).reset_index()#[:num_neighboring_store]
    visits = 0 
    for i in range(sub_g.shape[0]):
        visits += sub_g['num_inf_visits'].values[i] 
    return visits * current_visits_rate


### a single collaborator store 
example_3_local_partner_choice = []
for rand in range(100):
    current_visits_rate = random.uniform(visits_rage[0], visits_rage[1]) 
    random_selected = random.choice(ranked_dic.index)
    rand_profit = compute_visits_eg3([random_selected], current_visits_rate)
    top_selected_w_profit = compute_visits_eg3(top_selected_w, current_visits_rate)
    top_selected_profit = compute_visits_eg3(top_selected, current_visits_rate)
    num_stores_profit = compute_visits_eg3(num_stores, current_visits_rate)
    focal_avg_visits_profit = compute_visits_eg3(focal_avg_visits, current_visits_rate)
    avg_visits_prift = compute_visits_eg3(avg_visits, current_visits_rate)
    closest_weighted_profit = compute_visits_eg3(weighted_visits, current_visits_rate)
    example_3_local_partner_choice.append([rand_profit, top_selected_profit, top_selected_w_profit,  num_stores_profit, focal_avg_visits_profit, avg_visits_prift, closest_weighted_profit])


example_3_local_partner_choice = pd.DataFrame( example_3_local_partner_choice , columns =['rand_profit', 'top_selected_profit', 'top_selected_w_profit', 'num_stores_profit', 'focal_avg_visits_profit', "avg_visits_prift", "closest_weighted_profit"])
example_3_local_partner_choice['type'] = 'example_3_local_partner_choice'

example_3_local_partner_choice

In [None]:
cnt = 0 
plt.figure(figsize = (8, 5))
data = example_3_local_partner_choice
data_sub = data.melt(value_vars = data.columns[:-1])
data_sub['Revenue'] = DAY * (data_sub['value']) #/ 4000 * 100
order = ['top_selected_profit', 'top_selected_w_profit', 'rand_profit', 'num_stores_profit', 
            'focal_avg_visits_profit',  'closest_weighted_profit']
sns.barplot(data = data_sub, x = 'variable', y = 'Revenue', order=order)
plt.title('{}'.format(brand), fontsize = 12, fontweight = 'bold')
plt.yticks(fontsize = 12, fontweight = 'bold')
plt.xlabel('')
plt.ylabel('Increased visits', fontsize = 12, fontweight = 'bold')
plt.xticks(range(6), [ 'MIBC', 'WIBC', 'random', '# neighbors', 'focal visits', 'neighbor visits'], fontsize = 12, fontweight = 'bold', rotation = 30)
plt.tight_layout()
plt.savefig(os.path.join(dir_path, 'reviews.png'))


Exponential Reviews

In [None]:
focal_store_chosen_num = 1
#%%
# https://coresight.com/research/going-head-to-head-in-beauty-retailing-a-comparison-of-sephora-and-ulta/
ranked_dic = visits_all_business_pure_reviews.groupby('Focal_Stores').agg(num_neighbors=('Neib_Stores', 'count'),
                                                   focal_avg_visits=('Foc_avg_visits', 'mean'),
                                                   neighbor_avg_visits=('Neib_avg_visits', 'sum'),
                                                   weighted_visits = ('weighted_visits', 'sum')
                                                )

top_selected_w = inner_centrality_reviews.sort_values('weighted_influence_exp', ascending =False)[:focal_store_chosen_num].Focal_Stores.values
top_selected = inner_centrality_reviews.sort_values('influence_exp', ascending =False)[:focal_store_chosen_num].Focal_Stores.values
num_stores = ranked_dic.sort_values('num_neighbors', ascending =False).index[:focal_store_chosen_num]
focal_avg_visits = ranked_dic.sort_values('focal_avg_visits', ascending =False).index[:focal_store_chosen_num]
avg_visits = ranked_dic.sort_values('neighbor_avg_visits', ascending =False).index[:focal_store_chosen_num]
weighted_visits = ranked_dic.sort_values('weighted_visits', ascending =False).index[:focal_store_chosen_num]


def compute_visits_eg3(store_id, current_visits_rate, num_neighboring_store = 3):
    sub_g = visits_all_business_pure_reviews[visits_all_business_pure_reviews.Focal_Stores.isin(store_id)]
    #sub_g = sub_g.sort_values('weighted_visits', ascending = False).reset_index()#[:num_neighboring_store]
    visits = 0 
    for i in range(sub_g.shape[0]):
        visits += sub_g['num_inf_visits_exp'].values[i] 
    return visits * current_visits_rate


### a single collaborator store 
example_3_local_partner_choice = []
for rand in range(100):
    current_visits_rate = random.uniform(visits_rage[0], visits_rage[1]) 
    random_selected = random.choice(ranked_dic.index)
    rand_profit = compute_visits_eg3([random_selected], current_visits_rate)
    top_selected_w_profit = compute_visits_eg3(top_selected_w, current_visits_rate)
    top_selected_profit = compute_visits_eg3(top_selected, current_visits_rate)
    num_stores_profit = compute_visits_eg3(num_stores, current_visits_rate)
    focal_avg_visits_profit = compute_visits_eg3(focal_avg_visits, current_visits_rate)
    avg_visits_prift = compute_visits_eg3(avg_visits, current_visits_rate)
    closest_weighted_profit = compute_visits_eg3(weighted_visits, current_visits_rate)
    example_3_local_partner_choice.append([rand_profit, top_selected_profit, top_selected_w_profit,  num_stores_profit, focal_avg_visits_profit, avg_visits_prift, closest_weighted_profit])


example_3_local_partner_choice = pd.DataFrame( example_3_local_partner_choice , columns =['rand_profit', 'top_selected_profit', 'top_selected_w_profit', 'num_stores_profit', 'focal_avg_visits_profit', "avg_visits_prift", "closest_weighted_profit"])
example_3_local_partner_choice['type'] = 'example_3_local_partner_choice'

example_3_local_partner_choice

In [None]:
cnt = 0 
plt.figure(figsize = (8, 5))
data = example_3_local_partner_choice
data_sub = data.melt(value_vars = data.columns[:-1])
data_sub['Revenue'] = DAY * (data_sub['value']) #/ 4000 * 100
order = ['top_selected_profit', 'top_selected_w_profit', 'rand_profit', 'num_stores_profit', 
            'focal_avg_visits_profit',  'closest_weighted_profit']
sns.barplot(data = data_sub, x = 'variable', y = 'Revenue', order=order)
plt.title('{}'.format(brand), fontsize = 12, fontweight = 'bold')
plt.yticks(fontsize = 12, fontweight = 'bold')
plt.xlabel('')
plt.ylabel('Increased visits', fontsize = 12, fontweight = 'bold')
plt.xticks(range(6), [ 'MIBC', 'WIBC', 'random', '# neighbors', 'focal visits', 'neighbor visits'], fontsize = 12, fontweight = 'bold', rotation = 30)
plt.tight_layout()
plt.savefig(os.path.join(dir_path, 'reviews_exp.png'))


Linear Visits

In [None]:
focal_store_chosen_num = 1
#%%
# https://coresight.com/research/going-head-to-head-in-beauty-retailing-a-comparison-of-sephora-and-ulta/
ranked_dic = visits_all_business_pure_visits.groupby('Focal_Stores').agg(num_neighbors=('Neib_Stores', 'count'),
                                                   focal_avg_visits=('Foc_avg_visits', 'mean'),
                                                   neighbor_avg_visits=('Neib_avg_visits', 'sum'),
                                                   weighted_visits = ('weighted_visits', 'sum')
                                                )

top_selected_w = inner_centrality_visits.sort_values('weighted_influence', ascending =False)[:focal_store_chosen_num].Focal_Stores.values
top_selected = inner_centrality_visits.sort_values('influence', ascending =False)[:focal_store_chosen_num].Focal_Stores.values
num_stores = ranked_dic.sort_values('num_neighbors', ascending =False).index[:focal_store_chosen_num]
focal_avg_visits = ranked_dic.sort_values('focal_avg_visits', ascending =False).index[:focal_store_chosen_num]
avg_visits = ranked_dic.sort_values('neighbor_avg_visits', ascending =False).index[:focal_store_chosen_num]
weighted_visits = ranked_dic.sort_values('weighted_visits', ascending =False).index[:focal_store_chosen_num]


def compute_visits_eg3(store_id, current_visits_rate, num_neighboring_store = 3):
    sub_g = visits_all_business_pure_visits[visits_all_business_pure_visits.Focal_Stores.isin(store_id)]
    #sub_g = sub_g.sort_values('weighted_visits', ascending = False).reset_index()#[:num_neighboring_store]
    visits = 0 
    for i in range(sub_g.shape[0]):
        visits += sub_g['num_inf_visits'].values[i] 
    return visits * current_visits_rate


### a single collaborator store 
example_3_local_partner_choice = []
for rand in range(100):
    current_visits_rate = random.uniform(visits_rage[0], visits_rage[1]) 
    random_selected = random.choice(ranked_dic.index)
    rand_profit = compute_visits_eg3([random_selected], current_visits_rate)
    top_selected_w_profit = compute_visits_eg3(top_selected_w, current_visits_rate)
    top_selected_profit = compute_visits_eg3(top_selected, current_visits_rate)
    num_stores_profit = compute_visits_eg3(num_stores, current_visits_rate)
    focal_avg_visits_profit = compute_visits_eg3(focal_avg_visits, current_visits_rate)
    avg_visits_prift = compute_visits_eg3(avg_visits, current_visits_rate)
    closest_weighted_profit = compute_visits_eg3(weighted_visits, current_visits_rate)
    example_3_local_partner_choice.append([rand_profit, top_selected_profit, top_selected_w_profit,  num_stores_profit, focal_avg_visits_profit, avg_visits_prift, closest_weighted_profit])


example_3_local_partner_choice = pd.DataFrame( example_3_local_partner_choice , columns =['rand_profit', 'top_selected_profit', 'top_selected_w_profit', 'num_stores_profit', 'focal_avg_visits_profit', "avg_visits_prift", "closest_weighted_profit"])
example_3_local_partner_choice['type'] = 'example_3_local_partner_choice'

example_3_local_partner_choice

In [None]:
cnt = 0 
plt.figure(figsize = (8, 5))
data = example_3_local_partner_choice
data_sub = data.melt(value_vars = data.columns[:-1])
data_sub['Revenue'] = DAY * (data_sub['value']) #/ 4000 * 100
order = ['top_selected_profit', 'top_selected_w_profit', 'rand_profit', 'num_stores_profit', 
            'focal_avg_visits_profit',  'closest_weighted_profit']
sns.barplot(data = data_sub, x = 'variable', y = 'Revenue', order=order)
plt.title('{}'.format(brand), fontsize = 12, fontweight = 'bold')
plt.yticks(fontsize = 12, fontweight = 'bold')
plt.xlabel('')
plt.ylabel('Increased visits', fontsize = 12, fontweight = 'bold')
plt.xticks(range(6), [ 'MIBC', 'WIBC', 'random', '# neighbors', 'focal visits', 'neighbor visits'], fontsize = 12, fontweight = 'bold', rotation = 30)
plt.tight_layout()
plt.savefig(os.path.join(dir_path, 'visits.png'))

Exponential Visits

In [None]:
focal_store_chosen_num = 1
#%%
# https://coresight.com/research/going-head-to-head-in-beauty-retailing-a-comparison-of-sephora-and-ulta/
ranked_dic = visits_all_business_pure_visits.groupby('Focal_Stores').agg(num_neighbors=('Neib_Stores', 'count'),
                                                   focal_avg_visits=('Foc_avg_visits', 'mean'),
                                                   neighbor_avg_visits=('Neib_avg_visits', 'sum'),
                                                   weighted_visits = ('weighted_visits', 'sum')
                                                )

top_selected_w = inner_centrality_visits.sort_values('weighted_influence_exp', ascending =False)[:focal_store_chosen_num].Focal_Stores.values
top_selected = inner_centrality_visits.sort_values('influence_exp', ascending =False)[:focal_store_chosen_num].Focal_Stores.values
num_stores = ranked_dic.sort_values('num_neighbors', ascending =False).index[:focal_store_chosen_num]
focal_avg_visits = ranked_dic.sort_values('focal_avg_visits', ascending =False).index[:focal_store_chosen_num]
avg_visits = ranked_dic.sort_values('neighbor_avg_visits', ascending =False).index[:focal_store_chosen_num]
weighted_visits = ranked_dic.sort_values('weighted_visits', ascending =False).index[:focal_store_chosen_num]


def compute_visits_eg3(store_id, current_visits_rate, num_neighboring_store = 3):
    sub_g = visits_all_business_pure_visits[visits_all_business_pure_visits.Focal_Stores.isin(store_id)]
    #sub_g = sub_g.sort_values('weighted_visits', ascending = False).reset_index()#[:num_neighboring_store]
    visits = 0 
    for i in range(sub_g.shape[0]):
        visits += sub_g['num_inf_visits_exp'].values[i] 
    return visits * current_visits_rate


### a single collaborator store 
example_3_local_partner_choice = []
for rand in range(100):
    current_visits_rate = random.uniform(visits_rage[0], visits_rage[1]) 
    random_selected = random.choice(ranked_dic.index)
    rand_profit = compute_visits_eg3([random_selected], current_visits_rate)
    top_selected_w_profit = compute_visits_eg3(top_selected_w, current_visits_rate)
    top_selected_profit = compute_visits_eg3(top_selected, current_visits_rate)
    num_stores_profit = compute_visits_eg3(num_stores, current_visits_rate)
    focal_avg_visits_profit = compute_visits_eg3(focal_avg_visits, current_visits_rate)
    avg_visits_prift = compute_visits_eg3(avg_visits, current_visits_rate)
    closest_weighted_profit = compute_visits_eg3(weighted_visits, current_visits_rate)
    example_3_local_partner_choice.append([rand_profit, top_selected_profit, top_selected_w_profit,  num_stores_profit, focal_avg_visits_profit, avg_visits_prift, closest_weighted_profit])


example_3_local_partner_choice = pd.DataFrame( example_3_local_partner_choice , columns =['rand_profit', 'top_selected_profit', 'top_selected_w_profit', 'num_stores_profit', 'focal_avg_visits_profit', "avg_visits_prift", "closest_weighted_profit"])
example_3_local_partner_choice['type'] = 'example_3_local_partner_choice'

example_3_local_partner_choice

In [None]:
cnt = 0 
plt.figure(figsize = (8, 5))
data = example_3_local_partner_choice
data_sub = data.melt(value_vars = data.columns[:-1])
data_sub['Revenue'] = DAY * (data_sub['value']) #/ 4000 * 100
order = ['top_selected_profit', 'top_selected_w_profit', 'rand_profit', 'num_stores_profit', 
            'focal_avg_visits_profit',  'closest_weighted_profit']
sns.barplot(data = data_sub, x = 'variable', y = 'Revenue', order=order)
plt.title('{}'.format(brand), fontsize = 12, fontweight = 'bold')
plt.yticks(fontsize = 12, fontweight = 'bold')
plt.xlabel('')
plt.ylabel('Increased visits', fontsize = 12, fontweight = 'bold')
plt.xticks(range(6), [ 'MIBC', 'WIBC', 'random', '# neighbors', 'focal visits', 'neighbor visits'], fontsize = 12, fontweight = 'bold', rotation = 30)
plt.tight_layout()
plt.savefig(os.path.join(dir_path, 'visits_exp.png'))