In [None]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import pickle
import datetime
import numpy as np
import random
import seaborn as sns

Specifying all the focal brands

In [None]:
focal_brands = ['Sephora',
 'ULTA Beauty',
 'Olive Garden',
 'The Cheesecake Factory',
 'Target',
 'Walmart',
 'Anthropologie',
 "Victoria's Secret"]

Reading the social brands catalog to get visits later for each store

In [None]:
brands_visits = pd.read_csv('../data/revision_visits_revenue_2019.csv')
brands_visits['brand_standard'] = brands_visits['brand'].apply(lambda x: x.strip().lower()) # For comparison with catalog.tsv
brands_visits['date'] = brands_visits['date'].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d').date())
brands_visits = brands_visits.rename(columns={'brand': 'brand_visitation'})
brands_visits.head()

Reading Travel Time

In [None]:
with open('../data/travel_time.pkl', 'rb') as file:
    travel_time_dict = pickle.load(file)
    
travel_time_keys = list(travel_time_dict.keys())
from_keys = [key[0] for key in travel_time_keys]
to_keys = [key[1] for key in travel_time_keys]
time_minutes = list(travel_time_dict.values())
time_minutes = [int(time_inst.split(' ')[0]) for time_inst in time_minutes]

travel_time = pd.DataFrame({'From_PLACEKEY': from_keys, 'To_PLACEKEY': to_keys, 'Time_mins': time_minutes})
travel_time.head()

Reading the statistics of the specific focal brand using the results of Part 2

In [None]:
brand = focal_brands[1]
focal_brand_path = os.path.join('../part2_r_statistics', brand)
focal_brand_path

Reading all the neighboring brands results for the selected focal brand

In [None]:
file_list = os.listdir(focal_brand_path)
result_file_list = [file for file in file_list if file.find('_result') != -1]

In [None]:
result_df = pd.DataFrame()

for result_file in result_file_list:
    result_file_path = os.path.join(focal_brand_path, result_file)
    tmp_res_df = pd.read_csv(result_file_path, skiprows=1, float_precision="round_trip")
    # Removing all the records for ols and m_olsExp
    if 'filename' in tmp_res_df['filename'].tolist(): #Wrong input
        continue
    result_df = pd.concat([result_df, tmp_res_df], ignore_index=True)

In [None]:
result_df = result_df[~result_df['type'].isin(['ols', 'm_olsExp'])].reset_index(drop=True)
result_df = result_df.replace('FALSE', False).replace('False', False)
# result_df.iloc[:,3:] = result_df.iloc[:,3:].astype('float64', copy=True)

In [None]:
result_df

In [None]:
sum(result_df['filename'].value_counts() == 1)

In [None]:
sum(result_df['filename'].value_counts() == 2)

In [None]:
sum(result_df['filename'].value_counts() == 3)

In [None]:
sum(result_df['filename'].value_counts() == 4)

Only getting those brands who have values for all the four models

In [None]:
count_list = result_df['filename'].value_counts()
valid_brands = count_list[count_list == 4].index.to_list() # having the values of all the four models
len(valid_brands)

In [None]:
result_df = result_df[result_df['filename'].isin(valid_brands)]
result_df

Extract the significant neighboring brands whose p-values for all the models are significant (<0.05) i.e., X_Pr(>|t|)

In [None]:
def filter_brands_pvalue(brand_pvalue):
    pvalues = brand_pvalue.values
    
    if (pvalues[0] < 0.05) and (pvalues[1] < 0.05) and (pvalues[2] < 0.05) and (pvalues[3] < 0.05):
        return True
    else:
        return False

In [None]:
significant_brands = result_df.groupby('filename')['X_Pr(>|t|)'].apply(filter_brands_pvalue)
significant_brands_list = significant_brands [significant_brands == True].index.to_list()

In [None]:
significant_brands_list

### Making the Figure D1 and D2

1. Then we need to plot their X_Estimate for each of the four models
2. Then we need to put stars at the neighboring brands as per the p-value for each model \
-> *** means p_value <= 0.01 \
-> ** means 0.01 < p_value <= 0.05 \
-> * means 0.05 < p_value <= 0.1

In [None]:
def get_pvalue_stars(p_value):
    if p_value <= 0.01:
        return '***'
    elif 0.01 < p_value <= 0.05:
        return '**'
    elif 0.05 < p_value <= 0.1:
        return '*'
    else:
        return ''

In [None]:
coeff_est_df = result_df[result_df['filename'].isin(significant_brands_list)]
coeff_est_df['X_pvalue_stars'] = coeff_est_df['X_Pr(>|t|)'].apply(get_pvalue_stars)
coeff_est_df

In [None]:
lin_reviews = coeff_est_df[coeff_est_df['type'] == 'fe_reviews_reviews'].sort_values('X_Estimate', ascending=False)
lin_visits = coeff_est_df[coeff_est_df['type'] == 'fe_reviews_visits'].sort_values('X_Estimate', ascending=False)
exp_reviews = coeff_est_df[coeff_est_df['type'] == 'fe_exp_reviews_reviews'].sort_values('X_Estimate', ascending=False)
exp_visits = coeff_est_df[coeff_est_df['type'] == 'fe_exp_reviews_visits'].sort_values('X_Estimate', ascending=False)

In [None]:
def prepare_first_stage_results_review(row):
    # Twitter Vaues
    tw_estimate = format(float(row['IV_firststage_reviews_tw_Estimate']), '.2e')
    tw_std_err = format(float(row['IV_firststage_reviews_tw_Std. Error']), '.2e')
    tw_p_value = float(row['IV_firststage_reviews_tw_Pr(>|t|)'])
    
    # Facebook Vaues
    fb_estimate = format(float(row['IV_firststage_reviews_fb_Estimate']), '.2e')
    fb_std_err = format(float(row['IV_firststage_reviews_fb_Std. Error']), '.2e')
    fb_p_value = float(row['IV_firststage_reviews_fb_Pr(>|t|)'])
    
    # Instagram Vaues
    ig_estimate = format(float(row['IV_firststage_reviews_ig_Estimate']), '.2e')
    ig_std_err = format(float(row['IV_firststage_reviews_ig_Std. Error']), '.2e')
    ig_p_value = float(row['IV_firststage_reviews_ig_Pr(>|t|)'])
    
    # Getting table entries
    twitter_likes = str(tw_estimate) + get_pvalue_stars(tw_p_value) + "({})".format(tw_std_err)
    facebook_likes = str(fb_estimate) + get_pvalue_stars(fb_p_value) + "({})".format(fb_std_err)
    instagram_likes = str(ig_estimate) + get_pvalue_stars(ig_p_value) + "({})".format(ig_std_err)
    
    # Calculating significant variables
    num_sig = 0
    
    if (tw_p_value <= 0.05):
        num_sig += 1

    if (fb_p_value <= 0.05):
        num_sig += 1

    if (ig_p_value <= 0.05):
        num_sig += 1
    
    # Getting WaldTest F-Stats
    wald_f_stat = row['WaldTest_F']
        
    return pd.Series([twitter_likes, facebook_likes, instagram_likes, num_sig, wald_f_stat],
                     index=['Twitter Likes', 'Facebook Likes', 'Instagram Likes', 'Num Sig', 'Weka Instrument (F-stats)'])

In [None]:
def prepare_first_stage_results_visits(row):
    # Visits Vaues
    visits_estimate = format(float(row['IV_firststage_visits_Estimate']), '.2e')
    visits_std_err = format(float(row['IV_firststage_visits_Std. Error']), '.2e')
    visits_p_value = float(row['IV_firststage_visits_Pr(>|t|)'])
        
    # Getting table entries
    visits = str(visits_estimate) + get_pvalue_stars(visits_p_value) + "({})".format(visits_std_err)
        
    # Getting WaldTest F-Stats
    wald_f_stat = row['WaldTest_F']
        
    return pd.Series([visits, wald_f_stat],
                     index=['Visits', 'Weka Instrument (F-stats)'])

In [None]:
first_stage_linear_reviews = lin_reviews.apply(prepare_first_stage_results_review, axis=1, result_type='expand').reset_index(drop=True)
first_stage_linear_reviews

In [None]:
first_stage_linear_visits = lin_visits.apply(prepare_first_stage_results_visits, axis=1, result_type='expand').reset_index(drop=True)
first_stage_linear_visits

In [None]:
first_stage_exp_reviews = exp_reviews.apply(prepare_first_stage_results_review, axis=1, result_type='expand').reset_index(drop=True)
first_stage_exp_reviews

In [None]:
first_stage_exp_visits = exp_visits.apply(prepare_first_stage_results_visits, axis=1, result_type='expand').reset_index(drop=True)
first_stage_exp_visits

In [None]:
custom_colors = {
    'positive': '#6488ea',  # Blue for positive estimates
    'star': '#922b05',  # Red for negative estimates
    'negative': '#3d9973'       # Prettier yellow for stars
}

confidence_level = 0.95
lin_reviews['Lower Bound'] = lin_reviews['X_Estimate'] - 1.96 * lin_reviews['X_Std. Error']
lin_reviews['Upper Bound'] = lin_reviews['X_Estimate'] + 1.96 * lin_reviews['X_Std. Error']

# Create figure and axes
fig, ax = plt.subplots(figsize=(8,8))

# Add a constant offset for stars
star_offset = 0.1
    
# Set the font size for the stars
star_fontsize = 15

# Plotting estimates and confidence intervals for each business
for idx, row in lin_reviews.iterrows():
    biz_biz = row['filename']
    estimate = row['X_Estimate']
    std_error = row['X_Std. Error']
    lower_bound = row['Lower Bound']
    upper_bound = row['Upper Bound']

    # Determine the color based on the sign of the estimate
    color = custom_colors['positive'] if estimate >= 0 else custom_colors['negative']

    # Plotting the bars with different colors for positive and negative estimates
    ax.barh(biz_biz, estimate, xerr=[[estimate - lower_bound], [upper_bound - estimate]], color=color)

    # Add stars based on the number of *s in the "coefficients" column with an offset
    stars = row['X_pvalue_stars'].count('*')
    if stars > 0:
        # Set the alignment for negative estimates
        ha = 'left' if estimate >= 0 else 'right'
        ax.text(estimate + star_offset if estimate >= 0 else estimate - star_offset, biz_biz, '*' * stars, va='center', ha=ha, color=custom_colors['star'], fontsize=star_fontsize)

plt.axvline(x=0, linestyle='--', color='gray')  # Plotting a dashed line for the estimate

plt.tight_layout()

# Add labels and title
ax.set_xlabel('Estimate')
ax.set_title('Linear Reviews')
ax.set_xscale('symlog')

# Show the plot
plt.show()

In [None]:
custom_colors = {
    'positive': '#6488ea',  # Blue for positive estimates
    'star': '#922b05',  # Red for negative estimates
    'negative': '#3d9973'       # Prettier yellow for stars
}

confidence_level = 0.95
lin_visits['Lower Bound'] = lin_visits['X_Estimate'] - 1.96 * lin_visits['X_Std. Error']
lin_visits['Upper Bound'] = lin_visits['X_Estimate'] + 1.96 * lin_visits['X_Std. Error']

# Create figure and axes
fig, ax = plt.subplots(figsize=(8,8))

# Add a constant offset for stars
star_offset = 0.1
    
# Set the font size for the stars
star_fontsize = 15

# Plotting estimates and confidence intervals for each business
for idx, row in lin_visits.iterrows():
    biz_biz = row['filename']
    estimate = row['X_Estimate']
    std_error = row['X_Std. Error']
    lower_bound = row['Lower Bound']
    upper_bound = row['Upper Bound']

    # Determine the color based on the sign of the estimate
    color = custom_colors['positive'] if estimate >= 0 else custom_colors['negative']

    # Plotting the bars with different colors for positive and negative estimates
    ax.barh(biz_biz, estimate, xerr=[[estimate - lower_bound], [upper_bound - estimate]], color=color)

    # Add stars based on the number of *s in the "coefficients" column with an offset
    stars = row['X_pvalue_stars'].count('*')
    if stars > 0:
        # Set the alignment for negative estimates
        ha = 'left' if estimate >= 0 else 'right'
        ax.text(estimate + star_offset if estimate >= 0 else estimate - star_offset, biz_biz, '*' * stars, va='center', ha=ha, color=custom_colors['star'], fontsize=star_fontsize)

plt.axvline(x=0, linestyle='--', color='gray')  # Plotting a dashed line for the estimate

plt.tight_layout()

# Add labels and title
ax.set_xlabel('Estimate')
ax.set_title('Linear Visits')
ax.set_xscale('symlog')
ax.set_xlim()

# Show the plot
plt.show()

In [None]:
custom_colors = {
    'positive': '#6488ea',  # Blue for positive estimates
    'star': '#922b05',  # Red for negative estimates
    'negative': '#3d9973'       # Prettier yellow for stars
}

confidence_level = 0.95
exp_reviews['Lower Bound'] = exp_reviews['X_Estimate'] - 1.96 * exp_reviews['X_Std. Error']
exp_reviews['Upper Bound'] = exp_reviews['X_Estimate'] + 1.96 * exp_reviews['X_Std. Error']

# Create figure and axes
fig, ax = plt.subplots(figsize=(8,8))

# Add a constant offset for stars
star_offset = 0.1
    
# Set the font size for the stars
star_fontsize = 15

# Plotting estimates and confidence intervals for each business
for idx, row in exp_reviews.iterrows():
    biz_biz = row['filename']
    estimate = row['X_Estimate']
    std_error = row['X_Std. Error']
    lower_bound = row['Lower Bound']
    upper_bound = row['Upper Bound']

    # Determine the color based on the sign of the estimate
    color = custom_colors['positive'] if estimate >= 0 else custom_colors['negative']

    # Plotting the bars with different colors for positive and negative estimates
    ax.barh(biz_biz, estimate, xerr=[[estimate - lower_bound], [upper_bound - estimate]], color=color)

    # Add stars based on the number of *s in the "coefficients" column with an offset
    stars = row['X_pvalue_stars'].count('*')
    if stars > 0:
        # Set the alignment for negative estimates
        ha = 'left' if estimate >= 0 else 'right'
        ax.text(estimate + star_offset if estimate >= 0 else estimate - star_offset, biz_biz, '*' * stars, va='center', ha=ha, color=custom_colors['star'], fontsize=star_fontsize)

plt.axvline(x=0, linestyle='--', color='gray')  # Plotting a dashed line for the estimate

plt.tight_layout()

# Add labels and title
ax.set_xlabel('Estimate')
ax.set_title('Exponential Reviews')
ax.set_xscale('symlog')

# Show the plot
plt.show()

In [None]:
custom_colors = {
    'positive': '#6488ea',  # Blue for positive estimates
    'star': '#922b05',  # Red for negative estimates
    'negative': '#3d9973'       # Prettier yellow for stars
}

confidence_level = 0.95
exp_visits['Lower Bound'] = exp_visits['X_Estimate'] - 1.96 * exp_visits['X_Std. Error']
exp_visits['Upper Bound'] = exp_visits['X_Estimate'] + 1.96 * exp_visits['X_Std. Error']

# Create figure and axes
fig, ax = plt.subplots(figsize=(8,8))

# Add a constant offset for stars
star_offset = 0.1
    
# Set the font size for the stars
star_fontsize = 15

# Plotting estimates and confidence intervals for each business
for idx, row in exp_visits.iterrows():
    biz_biz = row['filename']
    estimate = row['X_Estimate']
    std_error = row['X_Std. Error']
    lower_bound = row['Lower Bound']
    upper_bound = row['Upper Bound']

    # Determine the color based on the sign of the estimate
    color = custom_colors['positive'] if estimate >= 0 else custom_colors['negative']

    # Plotting the bars with different colors for positive and negative estimates
    ax.barh(biz_biz, estimate, xerr=[[estimate - lower_bound], [upper_bound - estimate]], color=color)

    # Add stars based on the number of *s in the "coefficients" column with an offset
    stars = row['X_pvalue_stars'].count('*')
    if stars > 0:
        # Set the alignment for negative estimates
        ha = 'left' if estimate >= 0 else 'right'
        ax.text(estimate + star_offset if estimate >= 0 else estimate - star_offset, biz_biz, '*' * stars, va='center', ha=ha, color=custom_colors['star'], fontsize=star_fontsize)

plt.axvline(x=0, linestyle='--', color='gray')  # Plotting a dashed line for the estimate

plt.tight_layout()

# Add labels and title
ax.set_xlabel('Estimate')
ax.set_title('Exponential Visits')
ax.set_xscale('symlog')

# Show the plot
plt.show()