In [1]:
import scipy, numpy as np, pandas as pd
from numpy.random import randint as rd
import plotly.express as px

### Computing probability for a given t-statistic

In [2]:
scipy.stats.t.cdf(-3.87,14)

0.0008496154651404193

### Computing t-score for a given probability

In [41]:
scipy.stats.t.ppf(0.05,14)

-1.7613101357748564

### t-test : Single Sample

In [25]:
sample = rd(-30,30,25)
px.box(x=sample,height=300)

In [26]:
np.mean(sample)

3.92

In [15]:
# scipy.stats.ttest_1samp(sample,0)

In [27]:
t_statistic,p_value = scipy.stats.ttest_1samp(sample,0)
print("t statistic is :",round(t_statistic,5))
print("p value is :",round(p_value,5))

t statistic is : 0.93913
p value is : 0.35702


### t-test : Two Samples Independent

In [30]:
n=25
t_test_data_A=pd.DataFrame({'Group':['Grp_A']*n,'Scores':rd(10,90,n)})
t_test_data_B=pd.DataFrame({'Group':['Grp_B']*n,'Scores':rd(35,150,n)})
t_test_data = pd.concat([t_test_data_A,t_test_data_B])
t_test_data.head()

Unnamed: 0,Group,Scores
0,Grp_A,78
1,Grp_A,40
2,Grp_A,58
3,Grp_A,88
4,Grp_A,21


In [33]:
px.box(t_test_data,y='Group', x='Scores',height=300)

In [32]:
t_statistic,p_value = scipy.stats.ttest_ind(t_test_data_A['Scores'],t_test_data_B['Scores'])
print("t statistic is :",round(t_statistic,5))
print("p value is :",round(p_value,5))

t statistic is : -4.36128
p value is : 7e-05


### t-test : Two Samples Dependent (paired t-test)

In [34]:
scores_pre = [18,21,16,22,19,24,17,21,23,18,14,16,16,19,18,20,12,22,15,17]
scores_post = [22,25,17,24,16,29,20,23,19,20,15,15,18,26,18,24,18,25,19,16] 

In [35]:
t_statistic,p_value = scipy.stats.ttest_rel(scores_pre,scores_post)
print("t statistic is :",round(t_statistic,5))
print("p value is :",round(p_value,5))

t statistic is : -3.23125
p value is : 0.00439


#### Conducting a single sample t-test on 'differences' generates the same results

In [36]:
diffs = [j-i for i,j in zip(scores_pre,scores_post)]
diffs

[4, 4, 1, 2, -3, 5, 3, 2, -4, 2, 1, -1, 2, 7, 0, 4, 6, 3, 4, -1]

In [37]:
t_statistic,p_value = scipy.stats.ttest_1samp(diffs,0)
print("t statistic is :",round(t_statistic,5))
print("p value is :",round(p_value,5))

t statistic is : 3.23125
p value is : 0.00439


## ANOVA - Analysis of Variance

In [38]:
anova_data = pd.read_csv('one_way_anova.csv')
anova_data

Unnamed: 0,A,AC,AR,P
0,0.32,0.05,0.67,0.32
1,0.1,0.17,0.24,0.31
2,0.19,0.09,0.56,0.66
3,-0.13,0.28,0.67,0.42
4,0.31,0.38,0.49,0.41
5,0.02,0.39,0.31,0.21
6,0.06,0.17,0.5,0.2
7,-0.37,0.37,0.29,0.39
8,0.1,0.44,0.49,0.66
9,0.02,0.05,0.1,0.57


In [40]:
f_statistic, p_value = scipy.stats.f_oneway(anova_data['A'],anova_data['AC'],anova_data['AR'],anova_data['P'])
print("F statistic :",round(f_statistic,4))
print("p-value :",round(p_value,5))

F statistic : 10.184
p-value : 4e-05
