In [1]:
import numpy as np

In [2]:
from scipy import stats

In [4]:
pip install scipy

Note: you may need to restart the kernel to use updated packages.


# One sample T test

In [6]:
sample_data=np.array([85,86,77,42,50,82,90,75,72,70,80,45,55,68,92,41,54,63,82,88])

In [7]:
hypothesized_mean=75

In [8]:
t_stats,p_value=stats.ttest_1samp(sample_data,hypothesized_mean)

TtestResult(statistic=np.float64(-1.374002016325974), pvalue=np.float64(0.18543474680273658), df=np.int64(19))

In [9]:
t_stats,p_value=stats.ttest_1samp(sample_data,hypothesized_mean)

In [10]:
alpha=0.05

In [13]:
p_value

np.float64(0.18543474680273658)

In [12]:
if p_value < alpha:
    print("Reject the null hypothesis")
else:
    print("Fail to reject null hypothesis")

Fail to reject null hypothesis


In [14]:
# p_value is greater than alpha value so, we have to accept the null hypothesis

# one sample proportion test

In [3]:
pip install statsmodels

Note: you may need to restart the kernel to use updated packages.


In [3]:
import statsmodels.api as sm

In [18]:
hypo_prop=0.4
sample_size=200
n_success=100

In [19]:
z_stats,p_value=sm.stats.proportions_ztest(n_success,sample_size,hypo_prop)

In [20]:
p_value

np.float64(0.004677734981047275)

In [21]:
if p_value < alpha:
    print("Reject the null hypothesis")
else:
    print("Fail to reject null hypothesis")

Reject the null hypothesis


# Two sample T test

In [30]:
method_A_scores=np.array([90,75,80,45,68,52,98,76,71,95])

In [31]:
method_B_scores=np.array([90,80,60,78,55,85,94,84,71,87])

In [32]:
t_stats,p_values=stats.ttest_ind(method_A_scores,method_B_scores)

In [33]:
p_value

np.float64(0.004677734981047275)

In [34]:
t_stats

np.float64(-0.501036470367726)

In [35]:
if p_value < alpha:
    print("Reject the null hypothesis")
else:
    print("Fail to reject null hypothesis")

Reject the null hypothesis


# Two sample proportion test

In [6]:
n_samples_x=100
n_success_x=45
n_samples_y=120
n_success_y=30
z_stats,p_value=sm.stats.proportions_ztest([n_success_x,n_success_y],[n_samples_x,n_samples_y])

In [7]:
z_stats

np.float64(3.116142619904648)

In [8]:
p_value

np.float64(0.0018323362992630889)

In [12]:
alpha=0.05

In [11]:
if p_value < alpha:
    print("Reject the null hypothesis")
else:
    print("Fail to reject null hypothesis")

Reject the null hypothesis


# Paired T Test

In [1]:
import numpy as np

In [2]:
from scipy import stats

In [4]:
before_training=np.array([50,60,65,70,55,74,68,82,90,95])
after_training=np.array([80,75,78,98,55,65,88,64,84,68])

In [5]:
difference=after_training - before_training

In [6]:
difference

array([ 30,  15,  13,  28,   0,  -9,  20, -18,  -6, -27])

In [7]:
t_stats,p_value=stats.ttest_rel(after_training,before_training)

In [8]:
p_value

np.float64(0.47560044636292165)

In [9]:
alpha=0.05

In [10]:
if p_value < alpha:
    print("Reject the null hypothesis")
else:
    print("Fail to reject null hypothesis")

Fail to reject null hypothesis


# Anova Test (Analysis of variance)

## One way anova

In [14]:
fertilizer_a=np.array([50,55,52,48,45,49])
fertilizer_b=np.array([55,59,56,46,48,50])
fertilizer_c=np.array([58,60,55,49,51,53])

In [15]:
stats.f_oneway(fertilizer_a,fertilizer_b,fertilizer_c)

F_onewayResult(statistic=np.float64(1.660617059891107), pvalue=np.float64(0.22311255505031893))

In [16]:
if p_value < alpha:
    print("Reject the null hypothesis")
else:
    print("Fail to reject null hypothesis")

Fail to reject null hypothesis


## Two way anova

In [18]:
import pandas as pd

In [23]:
from statsmodels.stats.anova import anova_lm
from statsmodels.formula.api import ols

In [20]:
data={"Gender":['Male','Male','Male','Male','Male','Male','Male','Male'],
     "Diet":['A','A','B','C','B','A','C','C'],
     "weight_loss":[4.5,6.5,3.5,1.0,2.5,2.9,8.5,5.5]}

In [21]:
df=pd.DataFrame(data)

In [22]:
df

Unnamed: 0,Gender,Diet,weight_loss
0,Male,A,4.5
1,Male,A,6.5
2,Male,B,3.5
3,Male,C,1.0
4,Male,B,2.5
5,Male,A,2.9
6,Male,C,8.5
7,Male,C,5.5


In [29]:
formula="weight_loss~C(Gender)+C(Diet)+C(Gender):C(Diet)" #Linear model

In [27]:
model=ols(formula,df).fit()

In [28]:
anova_lm(model)

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
C(Gender),0.0,0.0,,,
C(Diet),2.0,5.152083,2.576042,0.362755,0.712672
C(Gender):C(Diet),0.0,0.0,,,
Residual,5.0,35.506667,7.101333,,


In [30]:
anova_res=anova_lm(model)

In [32]:
anova_res["PR(>F)"]

C(Gender)                 NaN
C(Diet)              0.712672
C(Gender):C(Diet)         NaN
Residual                  NaN
Name: PR(>F), dtype: float64

In [33]:
# ols=ordinary least square.