## Chi-Squared Test

In [1]:
import seaborn as sns
from scipy.stats import chi2_contingency
import pandas as pd
data=sns.load_dataset('tips')
data.head() 

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [2]:
#Compute a simple cross tabulation of two or more factors
crosstab=pd.crosstab(data['smoker'],data['time'])
print(crosstab)

time    Lunch  Dinner
smoker               
Yes        23      70
No         45     106


In [3]:
#This function computes the chi-square statistic and p-value 
#for the hypothesis test of independence of the observed frequencies in the contingency table
stat, p, dof, expected = chi2_contingency(crosstab)
  
# interpret p-value
alpha = 0.05
print("p value is " + str(p))
if p <= alpha:
    print('(Reject H0) There is relation between the variables')
else:
    print('(Failed to reject H0) There is no relation between the variables')


p value is 0.4771485672079724
(Failed to reject H0) There is no relation between the variables


## T-Test

In [4]:
from numpy.random import randn
from scipy.stats import ttest_ind
# generate two independent samples
data1 = randn(100) + 50
data2 = randn(100) 
# Calculate the T-test for the means of two independent samples of scores
stat, p = ttest_ind(data1, data2)
print('Statistics=%.3f, p=%.3f' % (stat, p))
# interpret
alpha = 0.05
if p > alpha:
    print('Same distributions (fail to reject H0)')
else:
    print('Different distributions (reject H0)')

Statistics=344.763, p=0.000
Different distributions (reject H0)


## ANOVA Test

In [5]:
from scipy.stats import f_oneway
data1 = randn(100) 
data2 = randn(100) 
data3 = randn(100) 
data4 = randn(100) + 51
# compare samples
stat, p = f_oneway(data1, data2, data3, data4)
print('Statistics=%.3f, p=%.3f' % (stat, p))
# interpret
alpha = 0.05
if p > alpha:
    print('Same distributions (fail to reject H0)')
else:
    print('Different distributions (reject H0)')

Statistics=69912.820, p=0.000
Different distributions (reject H0)
