In [1]:
import pandas as pd
import numpy as np

import statsmodels.formula.api as smf
import imp

import tvshow_functions as f

In [2]:
pd.set_option('precision', 4)


In [15]:
df_m = f.get_data(f.SHOW_MASTERCHEF)
df_b = f.get_data(f.SHOW_BAKEOFF)

# Do good and bad outcomes persist?

Cleaning the data

In [16]:
df = df_b.copy()
df['number'] = df.groupby(['season', 'episode']).contestant.transform(lambda x: len(x))
df['top_previous'] = df.groupby('contestant_id').top.shift(1)
df['bottom_previous'] = df.groupby('contestant_id').bottom.shift(1)
df['low_previous'] = df.groupby('contestant_id').low.shift(1)

In [17]:
df_reg = df[df.bottom_previous.notnull()]

# Do good and bad outcomes persist?

## General approach

Predicting whether a contestant ends up in a top position

In [22]:
smf.ols('I(top*1) ~ I(top_previous*1) + I(low_previous*1) + number', data=df_reg).fit().summary()

0,1,2,3
Dep. Variable:,I(top * 1),R-squared:,0.011
Model:,OLS,Adj. R-squared:,0.005
Method:,Least Squares,F-statistic:,1.726
Date:,"Thu, 26 Apr 2018",Prob (F-statistic):,0.161
Time:,17:19:53,Log-Likelihood:,-279.02
No. Observations:,454,AIC:,566.0
Df Residuals:,450,BIC:,582.5
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.4528,0.080,5.644,0.000,0.295,0.610
I(top_previous * 1)[T.1],-0.0573,0.050,-1.141,0.254,-0.156,0.041
I(low_previous * 1)[T.1],-0.0729,0.056,-1.298,0.195,-0.183,0.038
number,-0.0175,0.009,-2.014,0.045,-0.035,-0.000

0,1,2,3
Omnibus:,164.296,Durbin-Watson:,2.168
Prob(Omnibus):,0.0,Jarque-Bera (JB):,88.69
Skew:,0.948,Prob(JB):,5.51e-20
Kurtosis:,1.955,Cond. No.,35.2


Predicting whether a contestant leaves the show ('out')

In [23]:
smf.ols('I(out*1) ~ I(top_previous*1) + I(low_previous*1) + number', data=df_reg).fit().summary()

0,1,2,3
Dep. Variable:,I(out * 1),R-squared:,0.042
Model:,OLS,Adj. R-squared:,0.035
Method:,Least Squares,F-statistic:,6.554
Date:,"Thu, 26 Apr 2018",Prob (F-statistic):,0.000241
Time:,17:19:57,Log-Likelihood:,-146.1
No. Observations:,454,AIC:,300.2
Df Residuals:,450,BIC:,316.7
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.1428,0.060,2.386,0.017,0.025,0.260
I(top_previous * 1)[T.1],0.0040,0.037,0.106,0.916,-0.070,0.078
I(low_previous * 1)[T.1],0.1683,0.042,4.016,0.000,0.086,0.251
number,-0.0056,0.006,-0.862,0.389,-0.018,0.007

0,1,2,3
Omnibus:,173.719,Durbin-Watson:,2.124
Prob(Omnibus):,0.0,Jarque-Bera (JB):,427.051
Skew:,2.023,Prob(JB):,1.85e-93
Kurtosis:,5.491,Cond. No.,35.2


In [25]:
## Comparing the results for Bakeoff and Masterchef

Regression results with the bakeoff data

In [26]:
f.get_results(df_b)['table']

Unnamed: 0,beta_top,std_top,beta_bottom,std_bottom
Intercept,0.4528,0.0802,0.2707,0.0816
I(top_previous * 1)[T.1],-0.0573,0.0502,0.0474,0.051
I(low_previous * 1)[T.1],-0.0729,0.0562,0.2648,0.0571
number,-0.0175,0.0087,-0.0026,0.0088


Regression results with the masterchef data

In [27]:
f.get_results(df_m)['table']

Unnamed: 0,beta_top,std_top,beta_bottom,std_bottom
Intercept,0.3947,0.0443,0.2996,0.0352
I(top_previous * 1)[T.1],-0.0138,0.0298,0.0325,0.0237
I(low_previous * 1)[T.1],-0.0537,0.0473,0.0159,0.0375
number,0.0037,0.0029,-0.0096,0.0023


## Conclusion
It looks