# Import libraries

In [119]:
import numpy as np
import pandas as pd
import plotly.io as pio
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff  33CFA5 d94865

# Create the interactive plot for the evolution of friends' reviews influence on mobility according to weekdays

In [120]:
# Load dataframes for week day study
df_bad = pd.read_csv('./for_plotting/bad_reviews_week.csv')
df_good = pd.read_csv('./for_plotting/good_reviews_week.csv')
# Show an example
df_good.head()

Unnamed: 0,mon,tue,wed,thu,fri,sat,sun,allWeekDays,bucket
0,0.036301,0.043127,0.035008,0.036259,0.036784,0.033494,0.035211,0.036605,0.1
1,0.066043,0.067536,0.064058,0.061815,0.064771,0.053868,0.057454,0.06236,0.33053
2,0.078054,0.08105,0.080477,0.077344,0.076375,0.069528,0.069827,0.076178,1.0925
3,0.085617,0.085395,0.085262,0.084203,0.081403,0.074329,0.077653,0.081993,3.611038
4,0.084256,0.083751,0.082604,0.080948,0.078963,0.074947,0.078276,0.080513,11.935557


In [121]:
# Rename columns names for both dataframes
df_good.rename(columns={'mon':'Monday','tue':'Tuesday','wed':'Wednesday','thu':'Thursday','fri':'Friday','sat':'Saturday','sun':'Sunday'},inplace=True)
df_bad.rename(columns={'mon':'Monday','tue':'Tuesday','wed':'Wednesday','thu':'Thursday','fri':'Friday','sat':'Saturday','sun':'Sunday'},inplace=True)

In [122]:
def set_visible(idx,N):
    """
        Return masks for arrays
        idx: index of arrays
        N: number of all arrays
        Return: mask having the length of arrays. 
                True at position idx and idx+1, False otherwise
    """
    # Create initial mask
    a = np.zeros(2*N)
    # Set True for target indices
    a[2*idx]=True
    a[2*idx+1]=True
    # Create final mask
    a = a>0
    return list(a.copy())

In [123]:
def list_dict(col_names,N):
    """
        Compute the list of dictionaries for interactive plot
        col_names: columns name of the dataframe, which is used as x-axis for the plot
        N: number of all arrays
        Return: list of dictionaries for interactive plot
    """
    # Initialize the array
    res = []
    
    # Plot all weekdays
    res.append(dict(label="Plot all weekdays",
                     method="update",
                     args=[{"visible": list(np.ones(N))},
                           {"title": "Plot all weekdays"}]))
    # Add each week day
    for idx,i in enumerate(col_names): 
        res.append(dict(label=i,
                         method="update",
                         args=[{"visible": set_visible(idx,N)},
                               {"title": i}]))

    return list(res)

In [124]:
# Define distance groups
bucket = df_good['bucket'].values
# Define x axis of the plot
col_names = df_bad.columns[:len(df_bad.columns)-1]

In [125]:
# Initialize figure
fig = go.Figure()

# Add Traces
for i in col_names:
    fig.add_trace(
    go.Scatter(x=bucket,
               y=df_good[i].values,
               name=i+" good review",
               line=dict(color="#FF0000")))
    fig.add_trace(
    go.Scatter(x=bucket,
               y=df_bad[i].values,
               name=i+" bad review",
               line=dict(color="#0000FF")))

# Add layout
fig.update_layout(
    updatemenus=[
        dict(
            active=0,
            buttons=list_dict(col_names,len(col_names)),
        )
    ])


# Define axies
fig.update_xaxes(title_text="Distance travelled in km by weekday in logarithmic scale", type="log")
fig.update_yaxes(title_text="Probability of travelling") 
# Set title
fig.update_layout(title_text="Evolution of friends' reviews influence on mobility according to weekdays")
fig.show()

In [126]:
# Save html
pio.write_html(fig, file='evolution_weekday.html', auto_open=True)

# Create the interactive plot for the evolution of friends' reviews influence on mobility according to categories

In [127]:
# Load dataframes for category study
df_bad_cat = pd.read_csv('./for_plotting/bad_reviews_cat.csv')
df_good_cat = pd.read_csv('./for_plotting/good_reviews_cat.csv')
# Show an example
df_good_cat.head()

Unnamed: 0,Construction,Computers,Home,Merchant,Manufacturing,Travel,Sport,Education,Legal,BusinessSupport,PersonalCare,Entertainment,RealEstate,Automotive,Food,Miscellaneous,Health,All Categories
0,0.011936,0.014444,0.014615,0.035347,0.017167,0.033902,0.045535,0.034448,0.011161,0.035795,0.031081,0.048737,0.021817,0.019601,0.042929,0.026024,0.026828,0.036605
1,0.032927,0.027411,0.039514,0.067379,0.029827,0.073211,0.043014,0.085977,0.031363,0.071918,0.060781,0.070728,0.048294,0.066355,0.064546,0.072068,0.03681,0.06236
2,0.043876,0.047142,0.050519,0.082257,0.049288,0.090685,0.054936,0.085361,0.042151,0.085851,0.070101,0.091973,0.062287,0.065293,0.079677,0.082501,0.045037,0.076178
3,0.04422,0.053504,0.051243,0.083429,0.059445,0.078351,0.062517,0.070358,0.043982,0.080017,0.063602,0.100211,0.063559,0.043501,0.089292,0.066551,0.04917,0.081993
4,0.042705,0.045966,0.041418,0.076462,0.060533,0.067588,0.064907,0.066298,0.044245,0.07545,0.061568,0.098514,0.053847,0.041706,0.091174,0.06631,0.050503,0.080513


In [128]:
# Define x axis of the plot
col_names_cat = df_bad_cat.columns

In [129]:
def list_dict_cat(col_names_cat,N):
    """
        Compute the list of dictionaries for interactive plot
        col_names_cat: columns name of the category dataframe, 
                        which is used as x-axis for the plot
        N: number of all arrays
        Return: list of dictionaries for interactive plot
    """
    res = []
    
    # For plotting all arrays
    res.append(dict(label="Plot all categories",
                     method="update",
                     args=[{"visible": list(np.ones(N))},
                           {"title": "Plot all categories"}]))
    
    # For plotting arrays indiviually
    for idx,i in enumerate(col_names_cat): 
        res.append(dict(label=i,
                         method="update",
                         args=[{"visible": set_visible(idx,N)},
                               {"title": i}]))
    return list(res)

In [130]:
# Initialize figure
fig = go.Figure()

# Add Traces
for i in col_names_cat:
    fig.add_trace(
    go.Scatter(x=bucket,
               y=df_good_cat[i].values,
               name=i+" good review",
               line=dict(color="#FF0000")))
    fig.add_trace(
    go.Scatter(x=bucket,
               y=df_bad_cat[i].values,
               name=i+" bad review",
               line=dict(color="#0000FF")))

# Add layout
fig.update_layout(
    updatemenus=[
        dict(
            active=0,
            buttons=list_dict_cat(col_names_cat,len(col_names_cat)),
        )
    ])

# Define axies
fig.update_xaxes(title_text="Distance travelled in km by category in logarithmic scale", type="log")
fig.update_yaxes(title_text="Probability of travelling") 
# Set title
fig.update_layout(title_text="Evolution of friends' reviews influence on mobility according to categories")
fig.show()

In [131]:
# Save html
pio.write_html(fig, file='evolution_category.html', auto_open=True)

# Create the interactive plot for the probability of giving good ratings study

In [37]:
# Load dataframes for week day study
df_mean_rating = pd.read_csv('./data/prob_give_good_review.csv')

In [38]:
x_ = df_mean_rating['review_id']

In [39]:
y_ =[]
y_.append(-(5.0- 8.154)/2)
y_.append(-(8.154- 13.299)/2)
y_.append(-(13.299- 21.689)/2)
y_.append(-(21.689- 35.372)/2)
y_.append(-(35.372- 57.688)/2)
y_.append(-(57.688- 94.083)/2)
y_.append(-(94.083- 153.438)/2)
y_.append(-(153.438- 250.24)/2)
y_.append(-(250.24- 408.112)/2)
y_.append(-(408.112- 665.584)/2)
y_.append(-(665.584- 1085.491)/2)
y_.append(-(1085.491- 1770.311)/2)
y_.append(-(1770.311- 2887.173)/2)
y_.append(-(2887.173- 4708.646)/2)

In [54]:
# Initialize figure
fig = go.Figure()

# Add Traces
fig.add_trace(
    go.Scatter(x=y_,
               y=x_,
               line=dict(color="#33CFA5")))

# Define axies
fig.update_xaxes(title_text="Distance travelled in km in logarithmic scale", type="log")
fig.update_yaxes(title_text="Probability of giving good ratings") 
# Set title
fig.update_layout(title_text="Probability of giving good ratings (stars >=5) depending on distance")
fig.show()

In [59]:
# Save html
pio.write_html(fig, file='give_good_ratings.html', auto_open=True)

# Create the interactive plot for the probability of visiting good rated business study

In [132]:
# Load data
df_to_good = pd.read_csv('./data/prob_go__to_good_reviewed_place.csv')

In [133]:
x2 = df_to_good['review_id'].values
y2 = np.array([   6.57721204,   10.72667525,   17.49397179,   28.53065297,
         46.53020873,   75.88541092,  123.76036444,  201.83889923,
        329.17599607,  536.84813386,  875.53747015, 1427.90076611,
       2328.74167854, 3797.90944447])

In [165]:
# Initialize figure
fig = go.Figure()

# Add Traces
fig.add_trace(
    go.Scatter(x=y2,
               y=x2,
               line=dict(color="#33CFA5")))

# Define axies
fig.update_xaxes(title_text="Distance travelled in km in logarithmic scale", type="log")
fig.update_yaxes(title_text="Probability to go to good rated business") 
# Set title
fig.update_layout(title_text="Probability to go to good rated business (stars >=5) depending on distance")
fig.show()

In [135]:
# Save html
pio.write_html(fig, file='go_to_good_rated_business.html', auto_open=True)

# Create the interactive plot for words of mouth on friends' recommendation study

In [154]:
# Load dataframes
mouth_friend = pd.read_csv('./for_plotting/basic_to_plot.csv')

In [167]:
# Initialize figure
fig = go.Figure()

# Add Traces
fig.add_trace(
    go.Scatter(x=mouth_friend['buckets'].values,
               y=mouth_friend['good'].values,
               name="Good reviews",
               line=dict(color="#FF0000")))

fig.add_trace(
    go.Scatter(x=mouth_friend['buckets'].values,
               y=mouth_friend['bad'].values,
               name="Bad reviews",
               line=dict(color="#0000FF")))

# Define axies
fig.update_xaxes(title_text="Distance travelled in km in logarithmic scale", type="log")
fig.update_yaxes(title_text="Probability to go to good rated business") 
# Set title
fig.update_layout(title_text="Influence of a friends' recommendations on mobility")
fig.show()

In [156]:
# Save html
pio.write_html(fig, file='friends_recomm_influence.html', auto_open=True)

# Create the interactive plot for week

In [157]:
# Load dataframes for week day study
df_week = pd.read_csv('./for_plotting/week_to_plot.csv')
df_week.rename(columns={'mon':'Monday','tue':'Tuesday','wed':'Wednesday','thu':'Thursday','fri':'Friday','sat':'Saturday','sun':'Sunday'},inplace=True)
df_week

Unnamed: 0.1,Unnamed: 0,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Sunday,buckets
0,0,0.149692,0.16864,0.14685,0.132639,0.136902,0.131217,0.13406,0.1
1,1,0.161906,0.166494,0.151695,0.131715,0.134971,0.118988,0.134231,0.33053
2,2,0.160025,0.158375,0.152935,0.138363,0.133366,0.123422,0.133514,1.0925
3,3,0.157914,0.149693,0.150508,0.138613,0.133828,0.126885,0.142559,3.611038
4,4,0.157123,0.145881,0.144928,0.135116,0.132686,0.134833,0.149433,11.935557
5,5,0.160998,0.143543,0.142705,0.132885,0.130325,0.137393,0.152152,39.450577
6,6,0.166597,0.122535,0.138061,0.111204,0.135124,0.164079,0.1624,130.39593
7,7,0.166556,0.153464,0.149818,0.136725,0.133742,0.123467,0.136228,430.997459
8,8,0.162377,0.15723,0.170332,0.130557,0.115583,0.127749,0.136172,1424.575212
9,9,0.155007,0.169389,0.142756,0.141158,0.123757,0.126776,0.141158,4708.646162


In [158]:
col_names_ = df_week.columns[1:len(df_week.columns)-1].copy()
buckets=df_week['buckets'].values

In [159]:
# Initialize figure
fig = go.Figure()

# Add Traces
for i in col_names_:
    fig.add_trace(
    go.Scatter(x=buckets,
               y=df_week[i].values,
               name=i))

# Add layout
fig.update_layout(
    updatemenus=[
        dict(
            active=0,
            buttons=list_dict(col_names_,len(col_names_)),
        )
    ])

# Define axies
fig.update_xaxes(title_text="Distance travelled in km by weekday in logarithmic scale", type="log")
fig.update_yaxes(title_text="Probability of travelling")  
# Set title
fig.update_layout(title_text="Influence of weekdays on mobility")
fig.show()

In [160]:
# Save html
pio.write_html(fig, file='weekday_influence.html', auto_open=True)

# Create the interactive plot for treated and control model

In [168]:
# Load data
df_goodCheckins = pd.read_csv('./data/good_checkins.csv')
df_badCheckins = pd.read_csv('./data/bad_checkins.csv')
df_badCheckins.head(3)

Unnamed: 0,review_id,user_id,business_id,stars_x,friends,review_count,stars_y,BusinessSupport,Manufacturing,Automotive,...,PersonalCare,Entertainment,Computers,Education,Construction,Travel,Miscellaneous,Sport,Merchant,RealEstate
0,UmFMZ8PyXZTY2QcwzsfQYA,nIJD_7ZXHq-FX8byPMOkMQ,lbrU8StCq3yDfr-QMnGrmQ,1,"B5vnnBub9sscTix_tPAwUw, FKFWX9kiyTvJY8_P9j_Rmw...",30,1.5,False,False,False,...,True,False,False,False,False,False,False,False,False,False
1,T0JLHmc_1Nt_Uv2DNmzhhA,nIJD_7ZXHq-FX8byPMOkMQ,jJDnxINrCKstFyeH3F8Cfw,2,"B5vnnBub9sscTix_tPAwUw, FKFWX9kiyTvJY8_P9j_Rmw...",30,2.0,False,False,True,...,False,False,False,False,False,False,False,False,False,False
2,-LMhqBpTytjSzNB-2g99-Q,rhE4gLtM0gb3XLwSz6HPCw,jJDnxINrCKstFyeH3F8Cfw,4,"BqUm1rHPSxlCWr-3HhNQyw, 6p1NDChnIpGl29Ce3cMKWg...",193,2.0,False,False,True,...,False,False,False,False,False,False,False,False,False,False


In [169]:
col_names = df_goodCheckins.columns[6:len(df_goodCheckins.columns)]
col_names

Index(['BusinessSupport', 'Manufacturing', 'Automotive', 'Home', 'Legal',
       'Food', 'Health', 'PersonalCare', 'Entertainment', 'Computers',
       'Education', 'Construction', 'Travel', 'Miscellaneous', 'Sport',
       'Merchant', 'RealEstate'],
      dtype='object')

In [170]:
def set_visible(idx,N):
    """
        Return masks for arrays
        idx: index of arrays
        N: number of all arrays
        Return: mask having the length of arrays. 
                True at position idx and idx+1, False otherwise
    """
    # Create initial mask
    a = np.zeros(2*N)
    # Set True for target indices
    a[2*idx]=True
    a[2*idx+1]=True
    # Create final mask
    a = a>0
    return list(a.copy())

def list_dict_(col_names,N):
    """
        Compute the list of dictionaries for interactive plot
        col_names: columns name of the dataframe, which is used as x-axis for the plot
        N: number of all arrays
        Return: list of dictionaries for interactive plot
    """
    # Initialize the array
    res = []
    
    # Add each week day
    for idx,i in enumerate(col_names): 
        res.append(dict(label=i,
                         method="update",
                         args=[{"visible": set_visible(idx,N)},
                               {"title": i}]))

    return list(res)

In [174]:
fig = go.Figure()
for i in range(len(col_names)):
    cat1 = col_names[i]

    treated_1 = df_goodCheckins[df_goodCheckins[cat1]].groupby('business_id').count()
    control_1 = df_badCheckins[df_badCheckins[cat1]].groupby('business_id').count()
    treated_1 = treated_1['review_id']
    control_1 = control_1['review_id']

    fig_ = ff.create_distplot([treated_1[treated_1 <= 50], control_1[control_1 <= 50]], ['Treated', 'Control'], bin_size=.2,  show_hist=False,show_rug=False)
    
    fig.add_trace(
        go.Scatter(x=fig_.data[0].x, 
                   y=fig_.data[0].y,
                   name="Treated",
                   line=dict(color="#FF0000")))
    
    fig.add_trace(
        go.Scatter(x=fig_.data[1].x, 
                   y=fig_.data[1].y,
                   name="Control",
                   line=dict(color="#0000FF")))

    # Save html
    #pio.write_html(fig, file=cat1+'.html', auto_open=False)
    
# Add layout
fig.update_layout(
    updatemenus=[
        dict(
            active=0,
            buttons=list_dict_(col_names,len(col_names))
        )
    ])

# Define axies
fig.update_xaxes(title_text="The number of clients")
fig.update_yaxes(title_text="Probability of the number of client the business can have")  
# Set title
fig.update_layout(title_text="Influence of ratings on the number of clients that business can have")
fig.show()    

In [175]:
# Save html
pio.write_html(fig, file='treatedControl.html', auto_open=True)

In [176]:
import chart_studio.plotly as py
import chart_studio
username = 'PM_EPFL'
api_key = 'mhSnWR5XdSgJaEB24BH4'
chart_studio.tools.set_credentials_file(username=username, api_key=api_key)

py.plot(fig, filename = 'treatedControl', auto_open=True)

'https://plotly.com/~PM_EPFL/5/'