## Imagine that we have height vs age data for people fed diet a vs b

In [1]:
import numpy as np
from scipy import stats
import pandas as pd
from statsmodels.formula.api import ols



In [2]:
height = np.array([12,11,13,17,24,22,25,33,31])
age = np.array([1,1,1,2,2,2,3,3,3])
df_1 = pd.DataFrame({'age':age,'height':height,'diet':'a'})
height = np.array([15,17,14,29,28,35,48,53,50])
age = np.array([1,1,1,2,2,2,3,3,3])
df_2 = pd.DataFrame({'age':age,'height':height,'diet':'b'})
df = pd.concat([df_1,df_2]).reset_index(drop=True)
df

Unnamed: 0,age,diet,height
0,1,a,12
1,1,a,11
2,1,a,13
3,2,a,17
4,2,a,24
5,2,a,22
6,3,a,25
7,3,a,33
8,3,a,31
9,1,b,15


### First diet a

In [3]:
z, c = np.polyfit(df[df.diet=='a'].age, df[df.diet=='a'].height, 1, cov=True)
slope_diet_a = z[0]
intercept_diet_a = z[1]
stdev_slope_diet_a = np.sqrt(c[0][0])
stdev_intercept_diet_a = np.sqrt(c[1][1])
print("slope = %.2f +/- %.2f"%(slope_diet_a,stdev_slope_diet_a ))
print("intercept = %.2f +/- %.2f"%(intercept_diet_a, stdev_intercept_diet_a ))

slope = 8.83 +/- 1.45
intercept = 3.22 +/- 3.12


### Then diet b

In [4]:
z, c = np.polyfit(df[df.diet=='b'].age, df[df.diet=='b'].height, 1, cov=True)
slope_diet_b = z[0]
intercept_diet_b = z[1]
stdev_slope_diet_b = np.sqrt(c[0][0])
stdev_intercept_diet_b = np.sqrt(c[1][1])
print("slope = %.2f +/- %.2f"%(slope_diet_b,stdev_slope_diet_b ))
print("intercept = %.2f +/- %.2f"%(intercept_diet_b, stdev_intercept_diet_b ))

slope = 17.50 +/- 1.36
intercept = -2.89 +/- 2.94


### Ratio in growth rates

In [5]:
ratio = slope_diet_a / slope_diet_b
stdev_ratio = ratio * np.sqrt( (stdev_slope_diet_a / slope_diet_a)**2 + (stdev_slope_diet_b/slope_diet_b)**2 )
print("ratio = %.2f +/- %.2f"%(ratio,stdev_ratio))

ratio = 0.50 +/- 0.09


### Calculate a linear least-squares regression for two sets of measurements
* ### p-value that you can reject hypothesis that slope is zero
* ### correlation coefficient

In [6]:
slope, intercept, r_value, p_value, std_err = stats.linregress(age, height)
print("p-value = %.2E"%p_value)
print("r_value = %.2f"%r_value)

p-value = 1.26E-06
r_value = 0.99


### Now, calculate an analysis of covariance

http://www.alexsalo.xyz/ancova-comparing-regression-slopes/

http://www.statsmakemecry.com/smmctheblog/stats-soup-anova-ancova-manova-mancova

In [7]:
formula = 'height ~ age * C(diet)'  # ANCOVA formula
lm = ols(formula, df)
fit = lm.fit()
print(fit.summary())

                            OLS Regression Results                            
Dep. Variable:                 height   R-squared:                       0.961
Model:                            OLS   Adj. R-squared:                  0.952
Method:                 Least Squares   F-statistic:                     113.5
Date:                Tue, 21 Mar 2017   Prob (F-statistic):           4.63e-10
Time:                        13:06:19   Log-Likelihood:                -42.472
No. Observations:                  18   AIC:                             92.94
Df Residuals:                      14   BIC:                             96.51
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------
Intercept            3.2222      2.562  

  "anyway, n=%i" % int(n))


In [8]:
fit.pvalues

Intercept           0.229004
C(diet)[T.b]        0.113762
age                 0.000003
age:C(diet)[T.b]    0.000143
dtype: float64

We can reject the hypothesis that the height as a function of age has the same slope for diet "a" and "b"