# Robustness checks - Timeframes

## Robustness check: timeframe 4 weeks (OLS minimum nr of reviews: 10)

In [10]:
# Read data for OLS model from file
data_OLS <- read.csv("../datasets/final-df-cs-4weeks-10-reviews.csv")
OLS_model <- lm(nr_reviews ~ user_sentiment_var * critic_sentiment_var + user_avg_grade + critic_avg_grade + critic_volume + is_action + is_pc,
                data = data_OLS)
summary(OLS_model)


Call:
lm(formula = nr_reviews ~ user_sentiment_var * critic_sentiment_var + 
    user_avg_grade + critic_avg_grade + critic_volume + is_action + 
    is_pc, data = data_OLS)

Residuals:
   Min     1Q Median     3Q    Max 
-6.719 -3.444 -1.384  1.212 82.605 

Coefficients:
                                         Estimate Std. Error t value Pr(>|t|)
(Intercept)                              -1.37634    9.52085  -0.145  0.88517
user_sentiment_var                       13.37534   12.07726   1.107  0.26915
critic_sentiment_var                     27.41561   18.02157   1.521  0.12946
user_avg_grade                           -0.03560    0.03148  -1.131  0.25924
critic_avg_grade                          0.05043    0.06116   0.825  0.41041
critic_volume                             0.05247    0.02006   2.616  0.00945
is_action                                -0.94355    0.98022  -0.963  0.33668
is_pc                                     1.15991    1.09541   1.059  0.29068
user_sentiment_var:criti

In [12]:
# Read data for count models from file
data_count <- read.csv("../datasets/final-df-count-models-4weeks.csv")

# Poisson Regression
poisson <- glm(nr_reviews ~ user_sentiment_var*critic_sentiment_var + user_avg_grade + critic_avg_grade + critic_volume + is_action + is_pc,
               data=data_count, family='poisson')

summary(poisson)


Call:
glm(formula = nr_reviews ~ user_sentiment_var * critic_sentiment_var + 
    user_avg_grade + critic_avg_grade + critic_volume + is_action + 
    is_pc, family = "poisson", data = data_count)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-4.8358  -1.6356  -0.7576   0.4771  17.2512  

Coefficients:
                                          Estimate Std. Error z value Pr(>|z|)
(Intercept)                             -1.6326649  0.1121319 -14.560   <2e-16
user_sentiment_var                       0.2353105  0.1178312   1.997   0.0458
critic_sentiment_var                     0.3620923  0.1587386   2.281   0.0225
user_avg_grade                          -0.0108629  0.0005525 -19.663   <2e-16
critic_avg_grade                         0.0283536  0.0010226  27.728   <2e-16
critic_volume                            0.0183656  0.0003777  48.631   <2e-16
is_action                                0.0316885  0.0189582   1.671   0.0946
is_pc                                    

In [13]:
# Negative Binomial Regression
negbin <- glm.nb(nr_reviews ~ user_sentiment_var*critic_sentiment_var + user_avg_grade + critic_avg_grade + critic_volume + is_action + is_pc,
                 data=data_count)
summary(negbin)


Call:
glm.nb(formula = nr_reviews ~ user_sentiment_var * critic_sentiment_var + 
    user_avg_grade + critic_avg_grade + critic_volume + is_action + 
    is_pc, data = data_count, init.theta = 0.8755224082, link = log)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.2343  -1.1874  -0.4191   0.2499   4.9412  

Coefficients:
                                          Estimate Std. Error z value Pr(>|z|)
(Intercept)                             -1.8394861  0.1979295  -9.294   <2e-16
user_sentiment_var                       0.3908195  0.2109571   1.853   0.0639
critic_sentiment_var                     0.1921213  0.2723399   0.705   0.4805
user_avg_grade                          -0.0112235  0.0011635  -9.646   <2e-16
critic_avg_grade                         0.0290181  0.0019124  15.173   <2e-16
critic_volume                            0.0211318  0.0008747  24.159   <2e-16
is_action                                0.0665111  0.0389179   1.709   0.0874
is_pc              

In [14]:
# Hurdle Regression
mod_hurdle <- hurdle(nr_reviews ~ user_sentiment_var * critic_sentiment_var + user_avg_grade + critic_avg_grade + critic_volume + is_action + is_pc,
                     data = data_count,
                     dist = "negbin")
summary(mod_hurdle)


Call:
hurdle(formula = nr_reviews ~ user_sentiment_var * critic_sentiment_var + 
    user_avg_grade + critic_avg_grade + critic_volume + is_action + is_pc, 
    data = data_count, dist = "negbin")

Pearson residuals:
    Min      1Q  Median      3Q     Max 
-0.9723 -0.6506 -0.3573  0.2733 15.4219 

Count model coefficients (truncated negbin with log link):
                                         Estimate Std. Error z value Pr(>|z|)
(Intercept)                             -1.745443   0.299419  -5.829 5.56e-09
user_sentiment_var                       0.741161   0.384022   1.930  0.05361
critic_sentiment_var                     0.706318   0.486646   1.451  0.14667
user_avg_grade                          -0.012300   0.001574  -7.816 5.47e-15
critic_avg_grade                         0.027426   0.002253  12.173  < 2e-16
critic_volume                            0.016849   0.001063  15.856  < 2e-16
is_action                                0.059292   0.048448   1.224  0.22101
is_pc           