Skip to content

Commit

Permalink
two group proportions unit testing
Browse files Browse the repository at this point in the history
  • Loading branch information
chen0040 committed Jun 5, 2017
1 parent fe8c001 commit 4ceb7da
Show file tree
Hide file tree
Showing 3 changed files with 173 additions and 13 deletions.
4 changes: 2 additions & 2 deletions pysie/dsl/one_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ def __init__(self, sampling_distribution, p_null, significance_level=None):
else:
simulated_proportions = self.simulate()

self.p_value_one_tail = sum(x for x in simulated_proportions if x > sampling_distribution.point_estimate) / 1000.0
self.p_value_two_tail = self.p_value_one_tail * 2
self.p_value_one_tail = sum(1.0 for x in simulated_proportions if x > sampling_distribution.point_estimate) / 1000.0
self.p_value_two_tail = self.p_value_one_tail

if significance_level is not None:
self.reject_mean_null = (self.p_value_one_tail < significance_level,
Expand Down
44 changes: 33 additions & 11 deletions pysie/dsl/two_groups.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import random

from pysie.stats.distributions import DistributionFamily
from scipy.stats import norm, t
import math
Expand Down Expand Up @@ -37,12 +39,16 @@ def __init__(self, sampling_distribution, significance_level=None):
self.reject_mean_same = (self.p_value_one_tail < significance_level,
self.p_value_two_tail < significance_level)

def will_reject(self, significance_level):

return self.p_value_one_tail < significance_level, self.p_value_two_tail < significance_level


class ProportionDiffTesting(object):
sampling_distribution = None
p_value_one_tail = None
p_value_two_tail = None
mean_null = None
p_null = None
test_statistic = None
significance_level = None
reject_proportion_same = None
Expand All @@ -55,7 +61,7 @@ def __init__(self, sampling_distribution, significance_level=None):
self.significance_level = significance_level

if self.sampling_distribution.distribution_family == DistributionFamily.normal:
standard_error_null = math.sqrt(p_null * (1 - p_null) / sampling_distribution.sample_size)
standard_error_null = math.sqrt(p_null * (1 - p_null) / sampling_distribution.grp1_sample_size + p_null * (1-p_null) / sampling_distribution.grp2_sample_size)
Z = sampling_distribution.point_estimate / standard_error_null
self.test_statistic = Z
pf = norm.cdf(Z)
Expand All @@ -64,15 +70,31 @@ def __init__(self, sampling_distribution, significance_level=None):
self.p_value_one_tail = 1 - pf
self.p_value_two_tail = self.p_value_one_tail * 2
else:
standard_error_null = math.sqrt(p_null * (1 - p_null) / sampling_distribution.sample_size)
td_df = sampling_distribution.point_estimate / standard_error_null
self.test_statistic = td_df
pf = t.cdf(td_df, sampling_distribution.df)
if td_df < 0:
pf = 1 - pf
self.p_value_one_tail = 1 - pf
self.p_value_two_tail = self.p_value_one_tail * 2
simulated_proportions = self.simulate()
diff = sampling_distribution.grp1_point_estimate - sampling_distribution.grp2_point_estimate
pf = sum(1.0 for x in simulated_proportions if x > diff) / 1000.0
self.p_value_one_tail = pf
self.p_value_two_tail = sum(1.0 for x in simulated_proportions if x > diff or x < -diff) / 1000.0

if significance_level is not None:
self.reject_proportion_same = (self.p_value_one_tail < significance_level,
self.p_value_two_tail < significance_level)
self.p_value_two_tail < significance_level)

def simulate(self):
simulated_proportions = [0] * 1000
for i in range(1000):
count1 = 0
for trials in range(self.sampling_distribution.grp1_sample_size):
if random.random() <= self.p_null:
count1 += 1
count2 = 0
for trials in range(self.sampling_distribution.grp2_sample_size):
if random.random() <= self.p_null:
count2 += 1

simulated_proportions[i] = float(count1) / self.sampling_distribution.grp1_sample_size - float(count2) / self.sampling_distribution.grp2_sample_size
return sorted(simulated_proportions)

def will_reject(self, significance_level):

return self.p_value_one_tail < significance_level, self.p_value_two_tail < significance_level
138 changes: 138 additions & 0 deletions tests/dsl/two_group_unit_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import unittest
from random import random

from numpy.random.mtrand import normal

from pysie.dsl.two_groups import MeanDiffTesting, ProportionDiffTesting
from pysie.stats.distributions import MeanDiffSamplingDistribution, DistributionFamily, \
ProportionDiffSamplingDistribution
from pysie.stats.samples import Sample, SampleDistribution


class MeanDiffTestingUnitTest(unittest.TestCase):

def test_normal(self):
grp1_mu = 0.0
grp1_sigma = 1.0
grp1_sample_size = 31
grp1_sample = Sample()

grp2_mu = 0.09
grp2_sigma = 2.0
grp2_sample_size = 36
grp2_sample = Sample()

for i in range(grp1_sample_size):
grp1_sample.add_numeric(normal(grp1_mu, grp1_sigma))

for i in range(grp2_sample_size):
grp2_sample.add_numeric(normal(grp2_mu, grp2_sigma))

sampling_distribution = MeanDiffSamplingDistribution(grp1_sample_distribution=SampleDistribution(grp1_sample),
grp2_sample_distribution=SampleDistribution(grp2_sample))
self.assertEqual(sampling_distribution.distribution_family, DistributionFamily.normal)
testing = MeanDiffTesting(sampling_distribution=sampling_distribution)
print('one tail p-value: ' + str(testing.p_value_one_tail))
print('two tail p-value: ' + str(testing.p_value_two_tail))
reject_one_tail, reject_two_tail = testing.will_reject(0.01)
print('will reject mean_1 == mean_2 (one-tail) ? ' + str(reject_one_tail))
print('will reject mean_1 == mean_2 (two-tail) ? ' + str(reject_two_tail))
self.assertFalse(reject_one_tail)
self.assertFalse(reject_two_tail)

def test_student(self):
grp1_mu = 0.0
grp1_sigma = 1.0
grp1_sample_size = 29
grp1_sample = Sample()

grp2_mu = 0.09
grp2_sigma = 2.0
grp2_sample_size = 28
grp2_sample = Sample()

for i in range(grp1_sample_size):
grp1_sample.add_numeric(normal(grp1_mu, grp1_sigma))

for i in range(grp2_sample_size):
grp2_sample.add_numeric(normal(grp2_mu, grp2_sigma))

sampling_distribution = MeanDiffSamplingDistribution(grp1_sample_distribution=SampleDistribution(grp1_sample),
grp2_sample_distribution=SampleDistribution(grp2_sample))
self.assertEqual(sampling_distribution.distribution_family, DistributionFamily.student_t)
testing = MeanDiffTesting(sampling_distribution=sampling_distribution)
print('one tail p-value: ' + str(testing.p_value_one_tail))
print('two tail p-value: ' + str(testing.p_value_two_tail))
reject_one_tail, reject_two_tail = testing.will_reject(0.01)
print('will reject mean_1 == mean_2 (one-tail) ? ' + str(reject_one_tail))
print('will reject mean_1 == mean_2 (two-tail) ? ' + str(reject_two_tail))
self.assertFalse(reject_one_tail)
self.assertFalse(reject_two_tail)


class ProportionDiffTestingUnitTest(unittest.TestCase):

def test_normal(self):
grp1_sample = Sample()
grp2_sample = Sample()

for i in range(100):
if random() <= 0.6:
grp1_sample.add_category("OK")
else:
grp1_sample.add_category("CANCEL")

for i in range(100):
if random() <= 0.61:
grp2_sample.add_category("OK")
else:
grp2_sample.add_category("CANCEL")

sampling_distribution = ProportionDiffSamplingDistribution(grp1_sample_distribution=SampleDistribution(
grp1_sample, categorical_value="OK"),
grp2_sample_distribution=SampleDistribution(
grp2_sample, categorical_value="OK"))
self.assertEqual(sampling_distribution.distribution_family, DistributionFamily.normal)

testing = ProportionDiffTesting(sampling_distribution=sampling_distribution)
print('one tail p-value: ' + str(testing.p_value_one_tail))
print('two tail p-value: ' + str(testing.p_value_two_tail))
reject_one_tail, reject_two_tail = testing.will_reject(0.01)
print('will reject p_1 == p_2 (one-tail) ? ' + str(reject_one_tail))
print('will reject p_1 == p_2 (two-tail) ? ' + str(reject_two_tail))
self.assertFalse(reject_one_tail)
self.assertFalse(reject_two_tail)

def test_student(self):
grp1_sample = Sample()
grp2_sample = Sample()

for i in range(20):
if random() <= 0.6:
grp1_sample.add_category("OK")
else:
grp1_sample.add_category("CANCEL")

for i in range(20):
if random() <= 0.61:
grp2_sample.add_category("OK")
else:
grp2_sample.add_category("CANCEL")

sampling_distribution = ProportionDiffSamplingDistribution(grp1_sample_distribution=SampleDistribution(
grp1_sample, categorical_value="OK"),
grp2_sample_distribution=SampleDistribution(
grp2_sample, categorical_value="OK"))
self.assertEqual(sampling_distribution.distribution_family, DistributionFamily.simulation)

testing = ProportionDiffTesting(sampling_distribution=sampling_distribution)
print('one tail p-value: ' + str(testing.p_value_one_tail))
print('two tail p-value: ' + str(testing.p_value_two_tail))
reject_one_tail, reject_two_tail = testing.will_reject(0.01)
print('will reject p_1 == p_2 (one-tail) ? ' + str(reject_one_tail))
print('will reject p_1 == p_2 (two-tail) ? ' + str(reject_two_tail))
self.assertFalse(reject_one_tail)
self.assertFalse(reject_two_tail)

if __name__ == '__main__':
unittest.main()

0 comments on commit 4ceb7da

Please sign in to comment.