# Panel Regression - Review Variance

In [4]:
# global imports
from toolbox import ToolBox
from tqdm import tqdm
from models import *
import pandas as pd
import numpy as np
import datetime
from dateutil.relativedelta import relativedelta
from datetime import date
import math
import matplotlib.pyplot as plt
import seaborn as sns
from linearmodels.panel import PanelOLS
import statsmodels.api as sm

tqdm.pandas()

## Loading the dataset from the database

In [5]:
tb = ToolBox()

In [6]:
df = tb.load_data_sql(table='user_reviews_clean', use_cache=True)
users = df[~df['review'].isnull()]
users.head()

Loaded from cache


Unnamed: 0,id,date,grade,game,review,sentiment
0,128002,2013-01-19,30,007-legends-pc,"Oh, my dear God! What is it with developers th...",-0.9191
1,128003,2014-06-23,30,007-legends-pc,Seriously if you want to have a HUGE laugh wit...,0.9424
2,128004,2014-12-11,0,007-legends-pc,godawful port of the xbox 360 version and ps3....,-0.3869
3,128005,2012-11-03,0,007-legends-pc,Agree. Worst game ever. Its a full copy of Cal...,0.5496
4,128006,2012-11-04,0,007-legends-pc,DO NOT BUY THIS GAME for the PC (or for any ot...,-0.9612


## Data cleaning and preprocessing
### User preprocessing

In [7]:
def user_preprocessing(df, timeframe, n_periods):
    
    # getting the minimum review data of a game as proxy for release date
    min_dates = pd.DataFrame(df.groupby('game', as_index=False).agg({'date': 'min'}))
    min_dates.rename(columns={'date':'release_date'}, inplace=True)
    
    # merge release date with df
    preprocessed_df = pd.merge(df, min_dates, on='game')
    
    # define a function to calculate difference in weeks between release date and review date
    def difference_weeks(date, release_date):
        date_dif=date-release_date
        weeks = math.floor(date_dif/np.timedelta64(1,'W'))
        return weeks
    
    # apply function on each row in df
    preprocessed_df['week'] = preprocessed_df.apply(lambda row: difference_weeks(row['date'], row['release_date']), axis=1)
    
    if timeframe=='2weeks':
        preprocessed_df['2weeks'] = preprocessed_df['week'].apply(lambda week: week//2)
        
    # empty matrix to define nr of reviews per timeframe  
    reviews_per_timeframe = pd.DataFrame(np.zeros((len(df.groupby('game')), n_periods), dtype=np.int8), 
                                         index=df.game.unique())

    # enter reviews
    for index, row in preprocessed_df[preprocessed_df[timeframe]<n_periods].iterrows():
        reviews_per_timeframe.loc[row['game'], row[timeframe]] += 1

    reviews_per_timeframe.reset_index(inplace=True)
    reviews_per_timeframe.rename(columns={'index': 'game'}, inplace=True)
    
    # create panel dataset from matrix
    reviews_per_timeframe = reviews_per_timeframe.melt(id_vars=['game'], var_name=timeframe, value_name='nr_reviews')
    reviews_per_timeframe.sort_values(['game', timeframe], inplace=True)
    
    # calculate variables average grade and sentiment per timeframe
    reviews_grouped = preprocessed_df.groupby(['game', timeframe], as_index=False).agg({'grade': 'mean', 'sentiment': 'std'})
    reviews_grouped.rename(columns={'grade': 'user_avg_grade', 'sentiment': 'user_sentiment_var'}, inplace=True)
    
    # create panel dataset including average grade and sentiment per timeframe
    user_reviews = pd.merge(reviews_per_timeframe, reviews_grouped, how='left', on=['game', timeframe])
    
    # create a lag of time variant independent variables
    user_reviews_lagged = user_reviews
    user_reviews_lagged[['user_avg_grade', 'user_sentiment_var']] = user_reviews.groupby(['game'])['user_avg_grade', 'user_sentiment_var'].shift(1)

    return user_reviews_lagged

#### Assign user panel dataset
##### 2 weeks time frame

In [8]:
user_reviews_2weeks = user_preprocessing(users, '2weeks', 5)

In [9]:
user_reviews_2weeks.head()

Unnamed: 0,game,2weeks,nr_reviews,user_avg_grade,user_sentiment_var
0,007-legends-pc,0,7,,
1,007-legends-pc,1,1,1.428571,0.549131
2,007-legends-pc,2,0,100.0,
3,007-legends-pc,3,1,,
4,007-legends-pc,4,0,70.0,


##### One week time frame

In [10]:
user_reviews_week = user_preprocessing(users, 'week', 10)

In [11]:
user_reviews_week.head()

Unnamed: 0,game,week,nr_reviews,user_avg_grade,user_sentiment_var
0,007-legends-pc,0,6,,
1,007-legends-pc,1,1,0.0,0.578638
2,007-legends-pc,2,1,10.0,
3,007-legends-pc,3,0,100.0,
4,007-legends-pc,4,0,,


### Critic preprocessing

In [12]:
critics = pd.read_pickle('critic_review_clean.pkl')

In [13]:
def critic_preprocessing(df):
    # create aggregated variables for each game
    reviews_grouped = df.groupby(['game'], as_index=False).agg({'grade': 'mean', 'sentiment': 'std', 
                                                                        'review': 'count'})
    
    reviews_grouped.rename({'grade': 'critic_avg_grade', 'sentiment': 'critic_sentiment_var',
                       'review': 'critic_volume'}, inplace=True, axis=1)
    
    return reviews_grouped

#### Assign critics datasets

In [14]:
critic_reviews = critic_preprocessing(critics)

In [15]:
critic_reviews.head()

Unnamed: 0,game,critic_avg_grade,critic_sentiment_var,critic_volume
0,007-legends-pc,26.8,0.351563,5
1,007-legends-playstation-3,42.318182,0.61941,22
2,007-legends-wii-u,40.666667,0.080168,3
3,007-legends-xbox-360,46.44898,0.669014,49
4,007-quantum-of-solace-ds,64.3,0.41152,10


### Create final panel dataset
#### Panel df two weeks

In [16]:
panel_df_2weeks = pd.merge(user_reviews_2weeks, critic_reviews, how='left', on='game')

In [17]:
panel_df_2weeks.head()

Unnamed: 0,game,2weeks,nr_reviews,user_avg_grade,user_sentiment_var,critic_avg_grade,critic_sentiment_var,critic_volume
0,007-legends-pc,0,7,,,26.8,0.351563,5.0
1,007-legends-pc,1,1,1.428571,0.549131,26.8,0.351563,5.0
2,007-legends-pc,2,0,100.0,,26.8,0.351563,5.0
3,007-legends-pc,3,1,,,26.8,0.351563,5.0
4,007-legends-pc,4,0,70.0,,26.8,0.351563,5.0


#### Panel df two weeks enough reviews

In [18]:
# check minimum number of reviews over all periods for each game
panel_df_2weeks_grouped = panel_df_2weeks.groupby(['game'], as_index=False).agg({'nr_reviews': 'min'})
panel_df_2weeks_grouped.rename(columns={'nr_reviews': 'min_reviews'}, inplace=True)
panel_df_2weeks_grouped.head()

Unnamed: 0,game,min_reviews
0,007-legends-pc,0
1,007-legends-playstation-3,0
2,007-legends-wii-u,0
3,007-legends-xbox-360,0
4,007-quantum-of-solace-pc,0


In [19]:
# a list with games with more than 1 review in each timeframe
games_enough_reviews = list(panel_df_2weeks_grouped[panel_df_2weeks_grouped['min_reviews']>1]['game'])
len(games_enough_reviews)

483

In [20]:
# filter all games with more than 1 review in each timeframe
panel_df_2weeks_enough_reviews = panel_df_2weeks[panel_df_2weeks['game'].isin(games_enough_reviews)]

In [21]:
panel_df_2weeks_enough_reviews.head()

Unnamed: 0,game,2weeks,nr_reviews,user_avg_grade,user_sentiment_var,critic_avg_grade,critic_sentiment_var,critic_volume
55,1-2-switch-switch,0,36,,,58.190476,0.48737,65.0
56,1-2-switch-switch,1,12,63.888889,0.503946,58.190476,0.48737,65.0
57,1-2-switch-switch,2,2,53.333333,0.698784,58.190476,0.48737,65.0
58,1-2-switch-switch,3,2,0.0,0.329582,58.190476,0.48737,65.0
59,1-2-switch-switch,4,2,55.0,0.014991,58.190476,0.48737,65.0


#### Panel df week

In [22]:
panel_df_week = pd.merge(user_reviews_week, critic_reviews, how='left', on='game')

In [23]:
panel_df_week.head()

Unnamed: 0,game,week,nr_reviews,user_avg_grade,user_sentiment_var,critic_avg_grade,critic_sentiment_var,critic_volume
0,007-legends-pc,0,6,,,26.8,0.351563,5.0
1,007-legends-pc,1,1,0.0,0.578638,26.8,0.351563,5.0
2,007-legends-pc,2,1,10.0,,26.8,0.351563,5.0
3,007-legends-pc,3,0,100.0,,26.8,0.351563,5.0
4,007-legends-pc,4,0,,,26.8,0.351563,5.0


## Summary statistics
### 2 weeks timeframe

In [24]:
panel_df_2weeks.describe() # look especially at count, mean values and std

Unnamed: 0,nr_reviews,user_avg_grade,user_sentiment_var,critic_avg_grade,critic_sentiment_var,critic_volume
count,65340.0,21916.0,9391.0,59275.0,56765.0,59300.0
mean,1.256382,69.814345,0.516605,70.00417,0.477669,22.776644
std,4.290756,26.460174,0.369603,14.067308,0.138777,19.947632
min,0.0,0.0,0.0,0.0,0.0,1.0
25%,0.0,56.666667,0.131986,62.421053,0.404505,7.0
50%,0.0,78.888889,0.559013,72.354167,0.489651,17.0
75%,1.0,90.0,0.800868,80.0,0.562654,32.0
max,191.0,100.0,1.409759,100.0,1.310481,129.0


### 2 weeks timeframe enough reviews

In [25]:
panel_df_2weeks_enough_reviews.describe()

Unnamed: 0,nr_reviews,user_avg_grade,user_sentiment_var,critic_avg_grade,critic_sentiment_var,critic_volume
count,2415.0,1932.0,1932.0,2400.0,2395.0,2400.0
mean,8.652174,66.433882,0.602992,79.783589,0.482034,46.945833
std,11.060424,21.833124,0.312272,9.638023,0.107202,25.513509
min,2.0,0.0,0.000354,32.958333,0.11232,1.0
25%,3.0,52.5,0.369304,75.695135,0.407905,27.0
50%,5.0,70.323887,0.686604,81.59404,0.488107,43.0
75%,9.0,83.75,0.814974,86.377589,0.564899,65.25
max,176.0,100.0,1.39201,98.954545,0.786171,129.0


### week timeframe

In [26]:
panel_df_week.describe()

Unnamed: 0,nr_reviews,user_avg_grade,user_sentiment_var,critic_avg_grade,critic_sentiment_var,critic_volume
count,130680.0,30151.0,11859.0,118550.0,113530.0,118600.0
mean,0.628191,69.356333,0.510027,70.00417,0.477669,22.776644
std,2.509015,27.454927,0.392297,14.067248,0.138776,19.947547
min,0.0,0.0,0.0,0.0,0.0,1.0
25%,0.0,55.0,0.109298,62.421053,0.404505,7.0
50%,0.0,80.0,0.516613,72.354167,0.489651,17.0
75%,0.0,90.0,0.820215,80.0,0.562654,32.0
max,153.0,100.0,1.409759,100.0,1.310481,129.0


Remarks:
* mean nr_reviews: very low --> data sparsity
* count of user_sentiment_var: very low compared to entire dataset

### Summary statistics per period

In [27]:
def sum_stats_per_period(df, timeframe):
    mean = df.groupby(timeframe).mean().transpose()
    std = df.groupby(timeframe).std().transpose()
    
    mean.rename(columns = {0: 'mean_0',
                      1: 'mean_1',
                      2: 'mean_2',
                      3: 'mean_3',
                      4: 'mean_4',
                      5: 'mean_5',
                      6: 'mean_6',
                      7: 'mean_7',
                      8: 'mean_8',
                      9: 'mean_9'},
                       inplace=True)
    
    std.rename(columns = {0: 'std_0',
                      1: 'std_1',
                      2: 'std_2',
                      3: 'std_3',
                      4: 'std_4',
                      5: 'std_5',
                      6: 'std_6',
                      7: 'std_7',
                      8: 'std_8',
                      9: 'std_9'},
                       inplace=True)
    return mean.join(std)

#### 2 weeks timeframe

In [28]:
sum_stats_per_period(panel_df_2weeks, '2weeks')

2weeks,mean_0,mean_1,mean_2,mean_3,mean_4,std_0,std_1,std_2,std_3,std_4
nr_reviews,4.003367,0.868993,0.554484,0.465794,0.389272,8.322542,2.641796,1.541552,1.573429,1.181694
user_avg_grade,,71.083674,68.288503,67.082653,68.426182,,26.111579,25.94611,27.472371,27.340277
user_sentiment_var,,0.501503,0.531683,0.548002,0.510676,,0.349388,0.375434,0.392724,0.400712
critic_avg_grade,70.00417,70.00417,70.00417,70.00417,70.00417,14.067783,14.067783,14.067783,14.067783,14.067783
critic_sentiment_var,0.477669,0.477669,0.477669,0.477669,0.477669,0.138782,0.138782,0.138782,0.138782,0.138782
critic_volume,22.776644,22.776644,22.776644,22.776644,22.776644,19.948304,19.948304,19.948304,19.948304,19.948304


#### 2 weeks timeframe enough reviews

In [29]:
sum_stats_per_period(panel_df_2weeks_enough_reviews, '2weeks')

2weeks,mean_0,mean_1,mean_2,mean_3,mean_4,std_0,std_1,std_2,std_3,std_4
nr_reviews,21.322981,7.720497,5.308489,4.79089,4.118012,17.41569,7.884181,3.17571,4.333571,3.13866
user_avg_grade,,69.750806,65.031285,64.725465,66.22797,,20.237449,21.647385,22.049276,23.008924
user_sentiment_var,,0.605137,0.626807,0.617374,0.562651,,0.236221,0.293911,0.336697,0.364284
critic_avg_grade,79.783589,79.783589,79.783589,79.783589,79.783589,9.646069,9.646069,9.646069,9.646069,9.646069
critic_sentiment_var,0.482034,0.482034,0.482034,0.482034,0.482034,0.107292,0.107292,0.107292,0.107292,0.107292
critic_volume,46.945833,46.945833,46.945833,46.945833,46.945833,25.534805,25.534805,25.534805,25.534805,25.534805


#### week timeframe

In [30]:
sum_stats_per_period(panel_df_week, 'week')

week,mean_0,mean_1,mean_2,mean_3,mean_4,mean_5,mean_6,mean_7,mean_8,mean_9,std_0,std_1,std_2,std_3,std_4,std_5,std_6,std_7,std_8,std_9
nr_reviews,3.179369,0.823998,0.495103,0.37389,0.299357,0.255127,0.240282,0.225513,0.197735,0.191537,6.426677,2.439773,1.525846,1.327954,0.916208,0.858176,0.913723,0.851597,0.683753,0.663562
user_avg_grade,,71.299583,69.365053,68.165507,67.692786,66.638561,66.671392,69.062374,67.228587,66.994494,,26.546406,25.901336,26.856206,28.326701,29.203373,29.087317,28.524324,29.009069,29.164989
user_sentiment_var,,0.491029,0.519194,0.530807,0.516204,0.518394,0.548052,0.484385,0.532323,0.495593,,0.360374,0.375526,0.39746,0.4119,0.422574,0.426936,0.41822,0.434612,0.432362
critic_avg_grade,70.00417,70.00417,70.00417,70.00417,70.00417,70.00417,70.00417,70.00417,70.00417,70.00417,14.067783,14.067783,14.067783,14.067783,14.067783,14.067783,14.067783,14.067783,14.067783,14.067783
critic_sentiment_var,0.477669,0.477669,0.477669,0.477669,0.477669,0.477669,0.477669,0.477669,0.477669,0.477669,0.138782,0.138782,0.138782,0.138782,0.138782,0.138782,0.138782,0.138782,0.138782,0.138782
critic_volume,22.776644,22.776644,22.776644,22.776644,22.776644,22.776644,22.776644,22.776644,22.776644,22.776644,19.948304,19.948304,19.948304,19.948304,19.948304,19.948304,19.948304,19.948304,19.948304,19.948304


Remarks:
* mean nr_reviews: very low, especially after period 1 --> data sparsity
* less sparsity for 2 weeks period with only games with enough reviews in each period, however probably still too sparse for reliable variances

## Correlations

### 2 weeks timeframe

In [31]:
panel_df_2weeks.corr()

Unnamed: 0,nr_reviews,user_avg_grade,user_sentiment_var,critic_avg_grade,critic_sentiment_var,critic_volume
nr_reviews,1.0,-0.040329,0.11888,0.091909,0.022174,0.215805
user_avg_grade,-0.040329,1.0,-0.38081,0.326457,-0.056829,0.093584
user_sentiment_var,0.11888,-0.38081,1.0,-0.096712,0.125443,0.020012
critic_avg_grade,0.091909,0.326457,-0.096712,1.0,-0.134207,0.358971
critic_sentiment_var,0.022174,-0.056829,0.125443,-0.134207,1.0,0.071815
critic_volume,0.215805,0.093584,0.020012,0.358971,0.071815,1.0


### 2 weeks timeframe enough reviews

In [32]:
panel_df_2weeks_enough_reviews.corr()

Unnamed: 0,nr_reviews,user_avg_grade,user_sentiment_var,critic_avg_grade,critic_sentiment_var,critic_volume
nr_reviews,1.0,-0.023055,0.037247,-0.06099,0.059812,0.07222
user_avg_grade,-0.023055,1.0,-0.381537,0.31888,-0.057794,0.246644
user_sentiment_var,0.037247,-0.381537,1.0,-0.107901,0.129541,-0.131933
critic_avg_grade,-0.06099,0.31888,-0.107901,1.0,-0.418916,0.144385
critic_sentiment_var,0.059812,-0.057794,0.129541,-0.418916,1.0,-0.036288
critic_volume,0.07222,0.246644,-0.131933,0.144385,-0.036288,1.0


### week timeframe

In [33]:
panel_df_week.corr()

Unnamed: 0,nr_reviews,user_avg_grade,user_sentiment_var,critic_avg_grade,critic_sentiment_var,critic_volume
nr_reviews,1.0,-0.035275,0.097199,0.078586,0.01896,0.184523
user_avg_grade,-0.035275,1.0,-0.358016,0.293598,-0.056525,0.087698
user_sentiment_var,0.097199,-0.358016,1.0,-0.097016,0.116881,-0.011637
critic_avg_grade,0.078586,0.293598,-0.097016,1.0,-0.134207,0.358971
critic_sentiment_var,0.01896,-0.056525,0.116881,-0.134207,1.0,0.071815
critic_volume,0.184523,0.087698,-0.011637,0.358971,0.071815,1.0


## Panel Regression Analysis

In [34]:
def panel_regression(df, timeframe):
    '''
    Run a panel regression with time fixed effects
    '''
    data = df.set_index(['game', timeframe])
    mod = PanelOLS.from_formula('''nr_reviews ~ 1 + user_sentiment_var*critic_sentiment_var + user_avg_grade + critic_avg_grade + 
                                critic_volume + TimeEffects''', data, )
    res = mod.fit(cov_type='unadjusted')
    return res

### Panel Regression with time frames of two weeks

In [35]:
panel_regression(panel_df_2weeks, '2weeks')

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,nr_reviews,R-squared:,0.0969
Estimator:,PanelOLS,R-squared (Between):,0.0993
No. Observations:,9049,R-squared (Within):,-0.0252
Date:,"Sat, Oct 26 2019",R-squared (Overall):,0.0922
Time:,11:13:20,Log-likelihood,-2.392e+04
Cov. Estimator:,Unadjusted,,
,,F-statistic:,161.69
Entities:,4576,P-value,0.0000
Avg Obs:,1.9775,Distribution:,"F(6,9039)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
Intercept,-1.1292,0.3655,-3.0894,0.0020,-1.8457,-0.4127
user_sentiment_var,0.7232,0.3946,1.8327,0.0669,-0.0503,1.4967
critic_sentiment_var,1.1232,0.5003,2.2452,0.0248,0.1425,2.1038
user_sentiment_var:critic_sentiment_var,0.1093,0.7891,0.1384,0.8899,-1.4376,1.6562
user_avg_grade,-0.0185,0.0019,-9.6540,0.0000,-0.0223,-0.0147
critic_avg_grade,0.0358,0.0034,10.515,0.0000,0.0292,0.0425
critic_volume,0.0359,0.0016,22.772,0.0000,0.0329,0.0390


**Remarks**
* A lot of instances were not included in the regression, because of missing values (due to 0 or 1 reviews within time period)
* Three significant control variables at 1% level:
    * user_avg_grade: the higher user_avg_grade, the lower nr_reviews (proxy for performance)
        --> This is not what one would expect, as we would expect that a higher user_avg_grade improves performance. However, it could be that with our proxy for performance (nr_reviews), users leave more reviews when they are extremely dissatisfied with a product.
    * critic_avg_grade: the higher critic_avg_grade, the higher nr_reviews (proxy for perfomance)
        --> In line with expectations
    * critic_volume: the higher critic_volume, the higher nr_reviews (proxy for perfomance)
        --> In line with expectations
* Independent variable is significant at 10% level:
    The higher user_sentiment_var, the higher nr_reviews (proxy for performance) --> contrary to what was expected. However, we have to sparse data in order to calculate reliable sentiment variance variables. 
* Moderator is not significant

### Panel regression with time frames of two weeks and only games with enough reviews

In [36]:
panel_regression(panel_df_2weeks_enough_reviews, '2weeks')

0,1,2,3
Dep. Variable:,nr_reviews,R-squared:,0.0275
Estimator:,PanelOLS,R-squared (Between):,0.0481
No. Observations:,1916,R-squared (Within):,-0.0102
Date:,"Sat, Oct 26 2019",R-squared (Overall):,0.0225
Time:,11:13:20,Log-likelihood,-5788.6
Cov. Estimator:,Unadjusted,,
,,F-statistic:,8.9725
Entities:,479,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(6,1906)"
Min Obs:,4.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
Intercept,1.6407,1.6920,0.9697,0.3323,-1.6777,4.9592
user_sentiment_var,1.5351,1.6929,0.9068,0.3646,-1.7851,4.8552
critic_sentiment_var,4.4511,2.3676,1.8800,0.0603,-0.1924,9.0945
user_sentiment_var:critic_sentiment_var,-2.6906,3.4545,-0.7789,0.4361,-9.4656,4.0843
user_avg_grade,-0.0192,0.0061,-3.1437,0.0017,-0.0312,-0.0072
critic_avg_grade,0.0180,0.0138,1.3021,0.1930,-0.0091,0.0451
critic_volume,0.0300,0.0046,6.4892,0.0000,0.0210,0.0391


**Remarks**
* A lot of instances were not included in the regression, because of missing values (due to 0 or 1 reviews within time period)
* Two significant control variables at 1% level:
    * user_avg_grade: the higher user_avg_grade, the lower nr_reviews (proxy for performance)
        --> This is not what one would expect, as we would expect that a higher user_avg_grade improves performance. However, it could be that with our proxy for performance (nr_reviews), users leave more reviews when they are extremely dissatisfied with a product.
    * critic_volume: the higher critic_volume, the higher nr_reviews (proxy for perfomance)
        --> In line with expectations
* Independent variable is not significant
* Moderator is not significant

### Panel Regression with time frames of one week

In [37]:
panel_regression(panel_df_week, 'week')

0,1,2,3
Dep. Variable:,nr_reviews,R-squared:,0.0749
Estimator:,PanelOLS,R-squared (Between):,0.0047
No. Observations:,11522,R-squared (Within):,-0.0191
Date:,"Sat, Oct 26 2019",R-squared (Overall):,0.0487
Time:,11:13:21,Log-likelihood,-2.889e+04
Cov. Estimator:,Unadjusted,,
,,F-statistic:,155.19
Entities:,4213,P-value,0.0000
Avg Obs:,2.7349,Distribution:,"F(6,11507)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
Intercept,-0.6179,0.2921,-2.1156,0.0344,-1.1904,-0.0454
user_sentiment_var,0.3762,0.3009,1.2504,0.2112,-0.2136,0.9660
critic_sentiment_var,1.2533,0.3884,3.2270,0.0013,0.4920,2.0146
user_sentiment_var:critic_sentiment_var,0.4376,0.6010,0.7280,0.4666,-0.7405,1.6156
user_avg_grade,-0.0123,0.0014,-9.0076,0.0000,-0.0150,-0.0096
critic_avg_grade,0.0225,0.0027,8.3275,0.0000,0.0172,0.0277
critic_volume,0.0280,0.0012,23.538,0.0000,0.0257,0.0304


**Remarks**
* A lot of instances were not included in the regression, because of missing values (due to 0 or 1 reviews within time period)
* Three significant control variables at 1% level:
    * user_avg_grade: the higher user_avg_grade, the lower nr_reviews (proxy for performance)
        --> This is not what one would expect, as we would expect that a higher user_avg_grade improves performance. However, it could be that with our proxy for performance (nr_reviews), users leave more reviews when they are extremely dissatisfied with a product.
    * critic_avg_grade: the higher critic_avg_grade, the higher nr_reviews (proxy for perfomance)
        --> In line with expectations
    * critic_volume: the higher critic_volume, the higher nr_reviews (proxy for perfomance)
        --> In line with expectations
* Independent variable is not significant
* Moderator is not significant