Skip to content

Commit 170253c

Browse files
author
Algorithmica
authored
Add files via upload
1 parent 4311a1b commit 170253c

File tree

3 files changed

+102
-0
lines changed

3 files changed

+102
-0
lines changed
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
##chi-square goodness of fit test
2+
#it tests whether the distribution of sample categorical data matches an
3+
#expected distribution
4+
from scipy import stats
5+
import pandas as pd
6+
import os
7+
8+
dir = 'F:/'
9+
titanic_train = pd.read_csv(os.path.join(dir, 'train.csv'))
10+
print(titanic_train.info())
11+
12+
#anova test
13+
#The one-way ANOVA tests whether the mean of some numeric variable differs
14+
#across the levels of one categorical variable(do any of the group means differ from one another?)
15+
fare_by_class1 = titanic_train.Fare[titanic_train.Pclass==1]
16+
fare_by_class2 = titanic_train.Fare[titanic_train.Pclass==2]
17+
fare_by_class3 = titanic_train.Fare[titanic_train.Pclass==3]
18+
19+
stats.f_oneway(fare_by_class1, fare_by_class2, fare_by_class3)
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
##chi-square goodness of fit test
2+
#it tests whether the distribution of sample categorical data matches an
3+
#expected distribution
4+
import numpy as np
5+
from scipy import stats
6+
import pandas as pd
7+
import os
8+
9+
n_trails = 120
10+
n_outcomes = 6
11+
result = np.random.randint(1, n_outcomes+1, n_trails)
12+
13+
outcomes, observed = np.unique(result, return_counts=True)
14+
for (o,f) in zip(outcomes, observed):
15+
print(o,f)
16+
expected = np.array(n_outcomes * [n_trails/n_outcomes], dtype=np.int64)
17+
18+
stats.chisquare(f_obs = observed, f_exp = expected)
19+
20+
observed = [15,29,18,19,20,19]
21+
observed = [20,20,20,20,20,20]
22+
observed = [30,10,20,20,20,20]
23+
observed = [10,30,30,10, 10,30]
24+
25+
#chi-square independence test
26+
#The chi-squared test of independence tests whether two categorical variables
27+
#are independent
28+
dir = 'F:/'
29+
titanic_train = pd.read_csv(os.path.join(dir, 'train.csv'))
30+
print(titanic_train.info())
31+
32+
observed = pd.crosstab(titanic_train.Sex, titanic_train.Survived)
33+
stats.chi2_contingency(observed = observed)
34+
35+
observed = pd.crosstab(titanic_train.Pclass, titanic_train.Survived)
36+
stats.chi2_contingency(observed = observed)
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
##chi-square goodness of fit test
2+
#it tests whether the distribution of sample categorical data matches an
3+
#expected distribution
4+
import numpy as np
5+
from scipy import stats
6+
import pandas as pd
7+
import os
8+
9+
dir = 'F:/'
10+
titanic_train = pd.read_csv(os.path.join(dir, 'train.csv'))
11+
print(titanic_train.info())
12+
13+
#one sample t-test
14+
#A one-sample t-test checks whether a sample mean differs from the population mean.
15+
fare_sample = titanic_train[['Fare']].sample(frac=0.6)
16+
stats.ttest_1samp(a = fare_sample, popmean = titanic_train[['Fare']].mean())
17+
18+
#two sample t-test
19+
#A two-sample t-test investigates whether the means of two independent data samples
20+
#differ from one another.
21+
fare_by_non_survived = titanic_train.Fare[titanic_train.Survived==0]
22+
fare_by_survived = titanic_train.Fare[titanic_train.Survived==1]
23+
stats.ttest_ind(a = fare_by_non_survived,
24+
b = fare_by_survived,
25+
equal_var=False)
26+
27+
#paired t-test
28+
#testing differences between samples of the same group at different points in time.
29+
#a hospital might want to test whether a weight-loss drug works
30+
#by checking the weights of the same group patients before and after treatment.
31+
#A paired t-test lets you check whether the means of samples from the same group differ.
32+
before= stats.norm.rvs(scale=30, loc=250, size=100)
33+
after = before + stats.norm.rvs(scale=5, loc=-1.25, size=100)
34+
weight_df = pd.DataFrame({"weight_before":before,
35+
"weight_after":after,
36+
"weight_change":after-before})
37+
weight_df.describe()
38+
stats.ttest_rel(a = before, b = after)
39+
40+
#anova test
41+
#The one-way ANOVA tests whether the mean of some numeric variable differs
42+
#across the levels of one categorical variable(do any of the group means differ from one another?)
43+
fare_by_class1 = titanic_train.Fare[titanic_train.Pclass==1]
44+
fare_by_class2 = titanic_train.Fare[titanic_train.Pclass==2]
45+
fare_by_class3 = titanic_train.Fare[titanic_train.Pclass==3]
46+
47+
stats.f_oneway(fare_by_class1, fare_by_class2, fare_by_class3)

0 commit comments

Comments
 (0)