|
| 1 | +##chi-square goodness of fit test |
| 2 | +#it tests whether the distribution of sample categorical data matches an |
| 3 | +#expected distribution |
| 4 | +import numpy as np |
| 5 | +from scipy import stats |
| 6 | +import pandas as pd |
| 7 | +import os |
| 8 | + |
| 9 | +dir = 'F:/' |
| 10 | +titanic_train = pd.read_csv(os.path.join(dir, 'train.csv')) |
| 11 | +print(titanic_train.info()) |
| 12 | + |
| 13 | +#one sample t-test |
| 14 | +#A one-sample t-test checks whether a sample mean differs from the population mean. |
| 15 | +fare_sample = titanic_train[['Fare']].sample(frac=0.6) |
| 16 | +stats.ttest_1samp(a = fare_sample, popmean = titanic_train[['Fare']].mean()) |
| 17 | + |
| 18 | +#two sample t-test |
| 19 | +#A two-sample t-test investigates whether the means of two independent data samples |
| 20 | +#differ from one another. |
| 21 | +fare_by_non_survived = titanic_train.Fare[titanic_train.Survived==0] |
| 22 | +fare_by_survived = titanic_train.Fare[titanic_train.Survived==1] |
| 23 | +stats.ttest_ind(a = fare_by_non_survived, |
| 24 | + b = fare_by_survived, |
| 25 | + equal_var=False) |
| 26 | + |
| 27 | +#paired t-test |
| 28 | +#testing differences between samples of the same group at different points in time. |
| 29 | +#a hospital might want to test whether a weight-loss drug works |
| 30 | +#by checking the weights of the same group patients before and after treatment. |
| 31 | +#A paired t-test lets you check whether the means of samples from the same group differ. |
| 32 | +before= stats.norm.rvs(scale=30, loc=250, size=100) |
| 33 | +after = before + stats.norm.rvs(scale=5, loc=-1.25, size=100) |
| 34 | +weight_df = pd.DataFrame({"weight_before":before, |
| 35 | + "weight_after":after, |
| 36 | + "weight_change":after-before}) |
| 37 | +weight_df.describe() |
| 38 | +stats.ttest_rel(a = before, b = after) |
| 39 | + |
| 40 | +#anova test |
| 41 | +#The one-way ANOVA tests whether the mean of some numeric variable differs |
| 42 | +#across the levels of one categorical variable(do any of the group means differ from one another?) |
| 43 | +fare_by_class1 = titanic_train.Fare[titanic_train.Pclass==1] |
| 44 | +fare_by_class2 = titanic_train.Fare[titanic_train.Pclass==2] |
| 45 | +fare_by_class3 = titanic_train.Fare[titanic_train.Pclass==3] |
| 46 | + |
| 47 | +stats.f_oneway(fare_by_class1, fare_by_class2, fare_by_class3) |
0 commit comments