In [1]:
import numpy as np
import pandas as pd
from scipy import stats
from statsmodels.stats.weightstats import ztest
from sklearn import datasets

# t-test

### 1. One-Sample t-testing

In [2]:
Height_of_boys=[5.7,5.8,6.0,5.9,6.2,5.6,6.0,5.9,5.8,6.1]

In [3]:
stats.ttest_1samp(a=Height_of_boys, popmean=6.0)

Ttest_1sampResult(statistic=-1.7320508075688705, pvalue=0.11730680301423944)

###### Since, the p-value is 0.11730680301423944 which is greater than 0.05, which implies that we don't have sufficient evidence to reject null hypothesis that suggests that the mean of the height of 10 individual boys is 6.0 

### 2. Two-Sample t-testing

In [4]:
Height_of_boys1=[5.7,5.8,6.0,5.9,6.2,5.6,6.0,5.9,5.8,6.1]
Height_of_boys2=[6.0,5.9,6.2,5.8,6.1,5.6,6.0,5.9,5.8,6.1]

In [5]:
stats.ttest_ind(a=Height_of_boys1, b=Height_of_boys2)

Ttest_indResult(statistic=-0.49656353316141005, pvalue=0.6255065770539847)

###### Since, the p-value is 0.6255065770539847 which is greater than 0.05, which implies that we don't have sufficient evidence to reject null hypothesis that suggests that the mean of the height of the boys in two different groups is same.

### 3. Paired Samples t-testing

In [6]:
Height_of_boys=[5.7,5.8,6.0,5.9,6.2,5.6,6.0,5.9,5.8,6.1]
Height_of_girls=[5.5,5.3,5.6,5.4,5.2,5.4,5.7,5.5,5.3,5.4]

In [7]:
stats.ttest_rel(a=Height_of_boys, b=Height_of_girls)

Ttest_relResult(statistic=6.17732215617036, pvalue=0.00016325566893217534)

###### Since, the p-value is 0.00016325566893217564 which is less than 0.05, which implies that we've sufficient evidence to reject the null hypothesis that suggests that the mean of the height of boys is not same as that of girls.

# z-test

### 1. One-Sample z-test

In [8]:
Height_of_boys=[5.7,5.8,6.0,5.9,6.2,5.6,6.0,5.9,5.8,6.1]

In [9]:
ztest(Height_of_boys, value=6.0)

(-1.7320508075688705, 0.08326451666355161)

###### Since, the p-value is 0.08326451666355161 which is greater than 0.05, which implies that we don't have sufficient evidence to reject null hypothesis that suggests that the mean of the height of 10 individual boys is 6.0 

### 2. Two-Sample z-test

In [10]:
Height_of_boys1=[5.7,5.8,6.0,5.9,6.2,5.6,6.0,5.9,5.8,6.1]
Height_of_boys2=[6.0,5.9,6.2,5.8,6.1,5.6,6.0,5.9,5.8,6.1]

In [11]:
ztest(Height_of_boys1, Height_of_boys2, value=0)

(-0.49656353316141005, 0.6194968743386287)

###### Since, the p-value is 0.6194968743386287 which is greater than 0.05, which implies that we don't have sufficient evidence to reject null hypothesis that suggests that the mean of the height of the boys in two different groups is same.

# IRIS Dataset

In [12]:
iris = datasets.load_iris()
iris_df=pd.DataFrame(iris.data)

In [13]:
iris_df

Unnamed: 0,0,1,2,3
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [14]:
iris_df['class']=iris.target

iris_df.columns=['sepal_len', 'sepal_wid', 'petal_len', 'petal_wid', 'class']

In [15]:
df1=iris_df.petal_wid

In [16]:
df1

0      0.2
1      0.2
2      0.2
3      0.2
4      0.2
      ... 
145    2.3
146    1.9
147    2.0
148    2.3
149    1.8
Name: petal_wid, Length: 150, dtype: float64

### Here, our Null hypothesis(H₀) is that the mean of petal width is 1.1, while alternative hypothesis(Hₐ) is that the mean is not equal to 1.1

In [17]:
stats.ttest_1samp(a=df1, popmean=1.1)

Ttest_1sampResult(statistic=1.5960637411710012, pvalue=0.11259335851740065)

###### Since, our p-value (0.11259335851740065-t-test & 0.11047456294053219-z-test) is greater that 0.05, which implies that we don't have sufficient evidence to reject null hypothesis and the mean of petal width is 1.1 

In [18]:
ztest(df1, value=1.1)

(1.5960637411710012, 0.11047456294053219)

In [19]:
df1=iris_df.petal_wid
df2=iris_df.sepal_wid

In [20]:
stats.ttest_ind(a=df1, b=df2)

Ttest_indResult(statistic=-25.916012615382527, pvalue=2.493672595696879e-78)

##### Since, our p-value is less that 0.05(for both t-test & z-test), which implies that we do have sufficient evidence to reject null hypothesis and the mean of petal width and sepal widht is not identical.

In [21]:
ztest(df1, df2, value=0)

(-25.916012615382527, 4.3955956589549194e-148)