In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats

# One-Sample T-test with python

![image.png](attachment:image.png)

 ![image-2.png](attachment:image-2.png)

In [3]:
ages=[10,20,35,50,28,40,55,18,16,55,30,25,43,18,30,28,14,24,16,17,32,35,26,27,65,18,43,23,21,20,19,70]

In [5]:
len(ages)

32

In [9]:
ages_mean = np.mean(ages)
ages_mean

30.34375

## Sample Size 

In [15]:
sample_size = 10
age_sample_size = np.random.choice(ages,sample_size)
age_sample_size

array([65, 70, 35, 35, 20, 25, 16, 65, 17, 30])

## Import Library for T-test

In [16]:
from scipy.stats import ttest_1samp

In [19]:
ttest,p_value = ttest_1samp(age_sample_size,30.34)

In [20]:
ttest

1.1215826134397515

In [21]:
p_value

0.2910717809025103

In [22]:
if p_value < 0.05:
    print("We are rejected Null Hypothesis")
else:
    print("We are accepted Null Hypothesis")

We are accepted Null Hypothesis


# Some more Example

In [23]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import math

In [24]:
np.random.seed(6)
school_ages=stats.poisson.rvs(loc=18,mu=35,size=1500)
classA_ages=stats.poisson.rvs(loc=18,mu=30,size=60)

In [25]:
school_ages

array([62, 59, 44, ..., 45, 52, 50])

In [26]:
classA_ages

array([52, 46, 40, 40, 47, 50, 51, 45, 44, 52, 46, 53, 43, 44, 51, 50, 54,
       42, 54, 45, 61, 53, 49, 46, 47, 41, 45, 51, 43, 45, 48, 50, 40, 52,
       44, 55, 54, 40, 45, 46, 54, 42, 46, 35, 51, 51, 46, 48, 47, 35, 52,
       52, 39, 44, 48, 40, 42, 46, 47, 45])

In [27]:
classA_ages.mean()

46.9

In [28]:
_,p_value = stats.ttest_1samp(a=classA_ages,popmean=school_ages.mean())

In [29]:
p_value

1.139027071016194e-13

In [30]:
if p_value < 0.05:
    print("We are rejected Null Hypothesis")
else:
    print("We are accepted Null Hypothesis")   

We are rejected Null Hypothesis


## Two-sample T-test With Python

The Independent Samples t Test or 2-sample t-test compares the means of two independent groups in order to determine whether there is statistical evidence that the associated population means are significantly different. The Independent Samples t Test is a parametric test. This test is also known as: Independent t Test.

![image.png](attachment:image.png)

In [31]:
np.random.seed(12)
school_ages = stats.poisson.rvs(loc=18,mu=35,size=1500)
classA_ages = stats.poisson.rvs(loc=18,mu=30,size=60)
classB_ages = stats.poisson.rvs(loc=18,mu=33,size=60)

In [32]:
school_ages

array([46, 49, 62, ..., 58, 57, 46])

In [33]:
classA_ages

array([55, 47, 46, 49, 42, 51, 58, 45, 38, 47, 59, 40, 46, 56, 38, 50, 48,
       50, 44, 38, 46, 53, 50, 39, 36, 39, 40, 37, 43, 44, 57, 39, 57, 54,
       41, 48, 50, 49, 51, 45, 49, 51, 40, 44, 44, 39, 38, 54, 41, 42, 50,
       40, 51, 55, 46, 49, 54, 48, 48, 57])

In [34]:
classB_ages

array([62, 50, 48, 43, 46, 48, 50, 44, 45, 51, 47, 44, 48, 45, 54, 53, 50,
       52, 49, 48, 50, 61, 51, 43, 55, 67, 53, 58, 44, 54, 44, 54, 55, 47,
       48, 51, 41, 42, 42, 61, 48, 55, 46, 56, 51, 51, 50, 52, 53, 60, 51,
       46, 51, 42, 55, 51, 52, 38, 44, 55])

In [35]:
classA_ages.mean()

46.75

In [37]:
classB_ages.mean()

50.083333333333336

In [39]:
_,p_value = stats.ttest_ind(a = classA_ages,b = classB_ages,equal_var = False)

In [40]:
p_value

0.002728967211474832

In [42]:
if p_value < 0.05:
    print("We are Rejected Null Hypothesis")
else:
    print("We are Accepted Null Hypothesis")
    

We are Rejected Null Hypothesis


## Paired T-test With Python
When you want to check how different samples from the same group are, you can go for a paired T-test

In [43]:
weight1=[25,30,28,35,28,34,26,29,30,26,28,32,31,30,45]

In [44]:
weight1

[25, 30, 28, 35, 28, 34, 26, 29, 30, 26, 28, 32, 31, 30, 45]

In [45]:
weight2 = weight1+stats.norm.rvs(scale=5,loc=-1.25,size=15)

In [46]:
weight2

array([27.75056052, 29.41747658, 20.97615599, 42.60520626, 27.38015579,
       40.16095336, 20.1307789 , 26.45001838, 36.96450305, 16.48080248,
       26.18373459, 34.00204331, 37.07369327, 27.94262245, 51.54442977])

## Convert into DataFrame

In [47]:
weight_df = pd.DataFrame({"weight_10":np.array(weight1),
                         "weight_20":np.array(weight2),
                       "weight_change":np.array(weight2)-np.array(weight1)})

In [48]:
weight_df

Unnamed: 0,weight_10,weight_20,weight_change
0,25,27.750561,2.750561
1,30,29.417477,-0.582523
2,28,20.976156,-7.023844
3,35,42.605206,7.605206
4,28,27.380156,-0.619844
5,34,40.160953,6.160953
6,26,20.130779,-5.869221
7,29,26.450018,-2.549982
8,30,36.964503,6.964503
9,26,16.480802,-9.519198


In [49]:
_,p_value = stats.ttest_rel(a = weight1,b = weight2)

In [50]:
p_value

0.7097115910094005

In [51]:
if p_value < 0.05:
    print("We are Rejected Null Hypothesis")
else:
    print("We are Accepted Null Hypothesis")
    

We are Accepted Null Hypothesis
