In [19]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from scipy import stats

__Three different assembly methods, referred to as methods A, B and C, have been proposed. <br>
Managers at Chemitech want to determine which assembly method can produce the greatest number of filtration systems per week.__

In [3]:
data = pd.read_excel("Chemitech.xlsx")
data

Unnamed: 0,A,B,C
0,58,58,48
1,64,69,57
2,55,71,59
3,66,64,47
4,67,68,49


In [8]:
data_r = pd.melt(data.reset_index(), id_vars = ['index'], value_vars = ['A','B','C'])
data_r.columns = ['index', 'treatment', 'value']
data_r

Unnamed: 0,index,treatment,value
0,0,A,58
1,1,A,64
2,2,A,55
3,3,A,66
4,4,A,67
5,0,B,58
6,1,B,69
7,2,B,71
8,3,B,64
9,4,B,68


In [24]:
model = ols('value ~ C(treatment)', data = data_r).fit()
anova_table = sm.stats.anova_lm(model, typ = 1)
anova_table

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
C(treatment),2.0,520.0,260.0,9.176471,0.003818
Residual,12.0,340.0,28.333333,,


__Result: <br>
    p = 0.003818 which means that all the means are not equal.__

In [10]:
just_dummies = pd.get_dummies(data_r['treatment'])
just_dummies

Unnamed: 0,A,B,C
0,1,0,0
1,1,0,0
2,1,0,0
3,1,0,0
4,1,0,0
5,0,1,0
6,0,1,0
7,0,1,0
8,0,1,0
9,0,1,0


In [11]:
step_1 = pd.concat([data_r, just_dummies], axis = 1)
step_1.drop(['treatment', 'C'], inplace = True, axis = 1)
step_1

Unnamed: 0,index,value,A,B
0,0,58,1,0
1,1,64,1,0
2,2,55,1,0
3,3,66,1,0
4,4,67,1,0
5,0,58,0,1
6,1,69,0,1
7,2,71,0,1
8,3,64,0,1
9,4,68,0,1


In [20]:
result = sm.OLS(step_1['value'], sm.add_constant(step_1[['A', 'B']])).fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:                  value   R-squared:                       0.605
Model:                            OLS   Adj. R-squared:                  0.539
Method:                 Least Squares   F-statistic:                     9.176
Date:                Sun, 18 Jun 2023   Prob (F-statistic):            0.00382
Time:                        20:27:11   Log-Likelihood:                -44.691
No. Observations:                  15   AIC:                             95.38
Df Residuals:                      12   BIC:                             97.51
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         52.0000      2.380     21.844      0.0



__Result (Point Estimation): <br>
    A: 52 + 10 = 62 <br>
    B: 52 + 14 = 66 <br>
    C: 52__

In [23]:
model = ols('value ~ A+B', data = step_1).fit()
anova_table = sm.stats.anova_lm(model, typ = 1)
anova_table

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
A,1.0,30.0,30.0,1.058824,0.323779
B,1.0,490.0,490.0,17.294118,0.001326
Residual,12.0,340.0,28.333333,,
