# OPPG 17, fishing experiment

In [2]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
import scipy.stats as stats
fish = pd.read_csv('FishingExperiment.csv', sep=';', decimal=',')

### OPPG 17c)

In [3]:
model_fish = ols('Yield ~ C(Hook, Sum) * C(Lake, Sum)', data=fish).fit()
anova_fish = sm.stats.anova_lm(model_fish)
anova_fish

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
"C(Hook, Sum)",1.0,79.053333,79.053333,30.662616,2.109975e-06
"C(Lake, Sum)",3.0,125.594167,41.864722,16.238175,4.681421e-07
"C(Hook, Sum):C(Lake, Sum)",3.0,99.765,33.255,12.898701,4.889917e-06
Residual,40.0,103.126667,2.578167,,


### H0: sigma2_tb = 0, Ha: sigma2_tb > 0

In [4]:
MS_ab = anova_fish['mean_sq'][2]
MS_e = anova_fish['mean_sq'][3]
df_ab = anova_fish['df'][2]
df_e = anova_fish['df'][3]
alpha = 0.05
F0 = MS_ab/MS_e
f_test = stats.f.sf(F0, df_ab, df_e)
print(f'F0: {F0}\nAlpha: {alpha}\nf_value: {f_test}\nReject: {f_test < alpha}')

F0: 12.898700627060576
Alpha: 0.05
f_value: 4.889916619634598e-06
Reject: True


### H0: sigma2_t = 0, Ha: sigma2_t > 0

In [5]:
MS_ab = anova_fish['mean_sq'][2]
MS_a = anova_fish['mean_sq'][1]
df_ab = anova_fish['df'][2]
df_a = anova_fish['df'][1]
alpha = 0.05
F0 = MS_a/MS_ab
f_test = stats.f.sf(F0, df_a, df_ab)
print(f'F0: {F0}\nAlpha: {alpha}\nf_value: {f_test}\nReject: {f_test < alpha}')

F0: 1.2589000818590363
Alpha: 0.05
f_value: 0.4271949458002132
Reject: False


### H0: sigma2_b = 0, Ha: sigma2_b > 0

In [6]:
MS_ab = anova_fish['mean_sq'][2]
MS_b = anova_fish['mean_sq'][0]
df_ab = anova_fish['df'][2]
df_b = anova_fish['df'][0]
alpha = 0.05
F0 = MS_b/MS_ab
f_test = stats.f.sf(F0, df_b, df_ab)
print(f'F0: {F0}\nAlpha: {alpha}\nf_value: {f_test}\nReject: {f_test < alpha}')

F0: 2.3771863880118276
Alpha: 0.05
f_value: 0.22078369980839863
Reject: False


## oppg 17d)

In [7]:
model_fish.summary()

0,1,2,3
Dep. Variable:,Yield,R-squared:,0.747
Model:,OLS,Adj. R-squared:,0.703
Method:,Least Squares,F-statistic:,16.87
Date:,"Fri, 21 Aug 2020",Prob (F-statistic):,3.86e-10
Time:,10:25:15,Log-Likelihood:,-86.463
No. Observations:,48,AIC:,188.9
Df Residuals:,40,BIC:,203.9
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,9.9292,0.232,42.843,0.000,9.461,10.398
"C(Hook, Sum)[S.Lure]",1.2833,0.232,5.537,0.000,0.815,1.752
"C(Lake, Sum)[S.Lake1]",2.1458,0.401,5.346,0.000,1.335,2.957
"C(Lake, Sum)[S.Lake2]",-2.1875,0.401,-5.449,0.000,-2.999,-1.376
"C(Lake, Sum)[S.Lake3]",0.7542,0.401,1.879,0.068,-0.057,1.565
"C(Hook, Sum)[S.Lure]:C(Lake, Sum)[S.Lake1]",0.7750,0.401,1.931,0.061,-0.036,1.586
"C(Hook, Sum)[S.Lure]:C(Lake, Sum)[S.Lake2]",-0.8250,0.401,-2.055,0.046,-1.636,-0.014
"C(Hook, Sum)[S.Lure]:C(Lake, Sum)[S.Lake3]",1.9000,0.401,4.733,0.000,1.089,2.711

0,1,2,3
Omnibus:,0.193,Durbin-Watson:,2.316
Prob(Omnibus):,0.908,Jarque-Bera (JB):,0.221
Skew:,0.136,Prob(JB):,0.895
Kurtosis:,2.808,Cond. No.,2.0


In [8]:
my = 9.93  #leser det fra summaryen
sigma2_t = (MS_a-MS_ab)/(2*6)
sigma2_b = (MS_b-MS_ab)/(4*6)
sigma2_tb = (MS_ab-MS_e)/6
sigma2 = MS_e
print(f'Mu: {my}\nSigma2_t: {round(sigma2_t,2)}\nSigma2_b: {round(sigma2_b,2)}\nSigma2_tb: {round(sigma2_tb,2)}\nSigma2: {round(sigma2,2)}')

Mu: 9.93
Sigma2_t: 0.72
Sigma2_b: 1.91
Sigma2_tb: 5.11
Sigma2: 2.58


#### Gjennomsnittet er ca 10 hg = 1 kg, estimatene for sigmaene t og b er veldig små, altså er de ikke signifikant forskjellige fra 0. Sigma tb så ser man at det er en sterk interaksjon mellom 'lake' og 'hook', med andre ord så ville man brukt forskjellig krok til forkjellig innsjø

## oppg 17e)

In [35]:
from statsmodels.formula.api import mixedlm


In [42]:
model = mixedlm('Yield ~ C(Hook, Sum) * C(Lake, Sum)', groups=fish['Lake'], data=fish).fit()
model.summary()

0,1,2,3
Model:,MixedLM,Dependent Variable:,Yield
No. Observations:,48,Method:,REML
No. Groups:,4,Scale:,2.5782
Min. group size:,12,Log-Likelihood:,-88.4113
Max. group size:,12,Converged:,Yes
Mean group size:,12.0,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,9.929,0.836,11.882,0.000,8.291,11.567
"C(Hook, Sum)[S.Lure]",1.283,0.232,5.537,0.000,0.829,1.738
"C(Lake, Sum)[S.Lake1]",2.146,1.447,1.483,0.138,-0.691,4.983
"C(Lake, Sum)[S.Lake2]",-2.187,1.447,-1.511,0.131,-5.024,0.649
"C(Lake, Sum)[S.Lake3]",0.754,1.447,0.521,0.602,-2.083,3.591
"C(Hook, Sum)[S.Lure]:C(Lake, Sum)[S.Lake1]",0.775,0.401,1.931,0.054,-0.012,1.562
"C(Hook, Sum)[S.Lure]:C(Lake, Sum)[S.Lake2]",-0.825,0.401,-2.055,0.040,-1.612,-0.038
"C(Hook, Sum)[S.Lure]:C(Lake, Sum)[S.Lake3]",1.900,0.401,4.733,0.000,1.113,2.687
Group Var,2.578,53877245.752,,,,


In [43]:
model = mixedlm('Yield ~ C(Hook, Sum) * C(Lake, Sum)', groups=fish['Hook'], data=fish).fit()
model.summary()

0,1,2,3
Model:,MixedLM,Dependent Variable:,Yield
No. Observations:,48,Method:,REML
No. Groups:,2,Scale:,2.5782
Min. group size:,24,Log-Likelihood:,-88.4113
Max. group size:,24,Converged:,Yes
Mean group size:,24.0,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,9.929,1.159,8.569,0.000,7.658,12.200
"C(Hook, Sum)[S.Lure]",1.283,1.159,1.107,0.268,-0.988,3.555
"C(Lake, Sum)[S.Lake1]",2.146,0.401,5.346,0.000,1.359,2.933
"C(Lake, Sum)[S.Lake2]",-2.188,0.401,-5.449,0.000,-2.974,-1.401
"C(Lake, Sum)[S.Lake3]",0.754,0.401,1.879,0.060,-0.033,1.541
"C(Hook, Sum)[S.Lure]:C(Lake, Sum)[S.Lake1]",0.775,0.401,1.931,0.054,-0.012,1.562
"C(Hook, Sum)[S.Lure]:C(Lake, Sum)[S.Lake2]",-0.825,0.401,-2.055,0.040,-1.612,-0.038
"C(Hook, Sum)[S.Lure]:C(Lake, Sum)[S.Lake3]",1.900,0.401,4.733,0.000,1.113,2.687
Group Var,2.578,68149924.254,,,,
