In [7]:
# Base ------------------------
import numpy as np
import pandas as pd
import scipy.stats as ss

# Inference -------------------
import statsmodels.stats.weightstats as smw

In [5]:
pwd

'C:\\Users\\Bea\\Documents\\Python Scripts\\math&stats\\Session Documents'

In [8]:
anorexia = pd.read_excel('../Data Sets/anorexia.xlsx')
anorexia.head()

Unnamed: 0,ID,group,prewt,postwt,difwt
0,101,1,80.5,82.2,1.7
1,201,2,80.7,80.2,-0.5
2,301,3,83.8,95.2,11.4
3,102,1,84.9,85.6,0.7
4,202,2,89.4,80.1,-9.3


**Find the 95% confidence interval of the difference in weight for all the treatments. Interpret it**

In [4]:
# Data ------------------------------------
n = anorexia.shape[0]
SL = 0.01

# Descritipve Stats object ----------------
des = smw.DescrStatsW(anorexia['difwt'])

# Confidence Interval ---------------------
if n > 40:
  low, up = des.zconfint_mean(SL)
else:
  low, up = des.tconfint_mean(SL)

print(f'The {1-SL:4.1%} CI for the average difference in weight is [{low:4.2f},{up:4.2f}]')

The 99.0% CI for the average difference in weight is [0.34,5.19]


In [5]:
# Data ------------------------------------
n1 = anorexia.loc[anorexia.group == 1,:].shape[0]
SL1 = 0.01

# Descritipve Stats object ----------------
des = smw.DescrStatsW(anorexia.loc[anorexia.group == 1,'difwt'])

# Confidence Interval ---------------------
if n1 > 40:
  low, up = des.zconfint_mean(SL1)
else:
  low, up = des.tconfint_mean(SL1)

print("{:.2f}".format(low),"{:.2f}".format(up))

-0.74 6.76


In [6]:
# Data ------------------------------------
n2 = anorexia.loc[anorexia.group == 2,:].shape[0]
SL2 = 0.01

# Descritipve Stats object ----------------
des = smw.DescrStatsW(anorexia.loc[anorexia.group == 2,'difwt'])

# Confidence Interval ---------------------
if n2 > 40:
  low, up = des.zconfint_mean(SL2)
else:
  low, up = des.tconfint_mean(SL2)

print("{:.2f}".format(low),"{:.2f}".format(up))

-4.82 3.92


In [7]:
# Data ------------------------------------
n3 = anorexia.loc[anorexia.group == 3,:].shape[0]
SL3 = 0.01

# Descritipve Stats object ----------------
des = smw.DescrStatsW(anorexia.loc[anorexia.group == 3,'difwt'])

# Confidence Interval ---------------------
if n3 > 40:
  low, up = des.zconfint_mean(SL3)
else:
  low, up = des.tconfint_mean(SL3)

print("{:.2f}".format(low),"{:.2f}".format(up))

2.19 12.33


Professor's solution:

In [8]:
SL = .01 # significance level
groups = anorexia.group.unique()

for group in groups:
    # Descritipve Stats object ----------------
    des = smw.DescrStatsW(anorexia.loc[anorexia.group == group,'difwt'])
    n = anorexia.loc[anorexia.group == group,:].shape[0]

    # Confidence Interval ---------------------
    if n > 40:
      low, up = des.zconfint_mean(SL)
    else:
      low, up = des.tconfint_mean(SL)
    
    print(f'The {1-SL:.0%} CI for the average difference in weight in group {group} is [{low:4.2f},{up:4.2f}]')

The 99% CI for the average difference in weight in group 1 is [-0.74,6.76]
The 99% CI for the average difference in weight in group 2 is [-4.82,3.92]
The 99% CI for the average difference in weight in group 3 is [2.19,12.33]


\begin{equation}
H_0, \quad H_1
\end{equation}

**Test if for all the groups (separatedly) the average weight after the treatment is increased and then find the probability that if the increase (or decrease in the corresponding case) is of one unit, you may detect it from your sample.**

In [21]:
mu0 = 0
SL = .05
groups = anorexia.group.unique()

for group in groups:
    # Descritipve Stats object ----------------
    des = smw.DescrStatsW(anorexia.loc[anorexia.group == group,'difwt'])
    n = anorexia.loc[anorexia.group == group,'difwt'].shape[0]

    # Confidence Interval ---------------------
    if n > 40:
      zstat, pval = des.ztest_mean(mu0, alternative = 'larger')
    else:
      tstat, pval, dof = des.ttest_mean(mu0, alternative = 'larger')
    
    if pval < SL:
      print("For group",group,':',f'alpha: {SL:4.2%}',f'p-value: {pval:4.2%}',' reject H0')
    else:
      print("For group",group,':',f'alpha: {SL:4.2%}',f'p-value: {pval:4.2%}',' fail to reject H0')

For group 1 : alpha: 5.00% p-value: 1.75%  reject H0
For group 2 : alpha: 5.00% p-value: 61.18%  fail to reject H0
For group 3 : alpha: 5.00% p-value: 0.04%  reject H0


In [22]:
from statsmodels.stats.power import TTestPower # There is no normal distribution here

power = TTestPower()
# Data ---------------------------
SL = 0.05
mu0 = 0
mu1 = 1

groups = anorexia.group.unique()

for group in groups:
    # Descritipve Stats object ----------------
    std = anorexia.loc[anorexia.group == group,'difwt'].std()
    n = anorexia.loc[anorexia.group == group,'difwt'].shape[0]
   
    # ------------------------
    prob = power.solve_power(effect_size = (mu0-mu1)/std,
                  nobs = n,
                  alpha = SL,
                  alternative = 'smaller')
    print("For group",group,'the probability is',"{:.2%}".format(prob))

For group 1 the probability is 17.73%
For group 2 the probability is 15.30%
For group 3 the probability is 13.72%
