/
test_multiset.rb
158 lines (147 loc) · 5.64 KB
/
test_multiset.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
class StatsampleMultisetTestCase < MiniTest::Unit::TestCase
def setup
@x=%w{a a a a b b b b}.to_vector
@y=[1,2,3,4,5,6,7,8].to_scale
@z=[10,11,12,13,14,15,16,17].to_scale
@ds={'x'=>@x,'y'=>@y,'z'=>@z}.to_dataset
@ms=@ds.to_multiset_by_split('x')
end
def test_creation
v1a=[1,2,3,4,5].to_vector
v2b=[11,21,31,41,51].to_vector
v3c=[21,23,34,45,56].to_vector
ds1={'v1'=>v1a,'v2'=>v2b,'v3'=>v3c}.to_dataset
v1b=[15,25,35,45,55].to_vector
v2b=[11,21,31,41,51].to_vector
v3b=[21,23,34,45,56].to_vector
ds2={'v1'=>v1b,'v2'=>v2b,'v3'=>v3b}.to_dataset
ms=Statsample::Multiset.new(['v1','v2','v3'])
ms.add_dataset('ds1',ds1)
ms.add_dataset('ds2',ds2)
assert_equal(ds1,ms['ds1'])
assert_equal(ds2,ms['ds2'])
assert_equal(v1a,ms['ds1']['v1'])
assert_not_equal(v1b,ms['ds1']['v1'])
ds3={'v1'=>v1b,'v2'=>v2b}.to_dataset
assert_raise ArgumentError do
ms.add_dataset(ds3)
end
end
def test_creation_empty
ms=Statsample::Multiset.new_empty_vectors(%w{id age name},%w{male female})
ds_male={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
ds_female={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
ms2=Statsample::Multiset.new(%w{id age name})
ms2.add_dataset('male',ds_male)
ms2.add_dataset('female',ds_female)
assert_equal(ms2.fields,ms.fields)
assert_equal(ms2['male'],ms['male'])
assert_equal(ms2['female'],ms['female'])
end
def test_to_multiset_by_split_one
sex=%w{m m m m m f f f f m}.to_vector(:nominal)
city=%w{London Paris NY London Paris NY London Paris NY Tome}.to_vector(:nominal)
age=[10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
ds={'sex'=>sex,'city'=>city,'age'=>age}.to_dataset
ms=ds.to_multiset_by_split('sex')
assert_equal(2,ms.n_datasets)
assert_equal(%w{f m},ms.datasets.keys.sort)
assert_equal(6,ms['m'].cases)
assert_equal(4,ms['f'].cases)
assert_equal(%w{London Paris NY London Paris Tome},ms['m']['city'].to_a)
assert_equal([34,33,35,36],ms['f']['age'].to_a)
end
def test_to_multiset_by_split_multiple
sex=%w{m m m m m m m m m m f f f f f f f f f f}.to_vector(:nominal)
city=%w{London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris}.to_vector(:nominal)
hair=%w{blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black}.to_vector(:nominal)
age=[10,10,20,30,34,34,33,35,36,40, 10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
ds={'sex'=>sex,'city'=>city,'hair'=>hair,'age'=>age}.to_dataset(%w{sex city hair age})
ms=ds.to_multiset_by_split('sex','city','hair')
assert_equal(8,ms.n_datasets)
assert_equal(3,ms[%w{m London blonde}].cases)
assert_equal(3,ms[%w{m London blonde}].cases)
assert_equal(1,ms[%w{m Paris black}].cases)
end
def test_stratum_proportion
ds1={'q1'=>[1,1,1,1,1,0,0,0,0,0,0,0].to_vector}.to_dataset
ds2={'q1'=>[1,1,1,1,1,1,1,0,0].to_vector}.to_dataset
assert_equal(5.0/12, ds1['q1'].proportion )
assert_equal(7.0/9, ds2['q1'].proportion )
ms=Statsample::Multiset.new(['q1'])
ms.add_dataset('d1',ds1)
ms.add_dataset('d2',ds2)
ss=Statsample::StratifiedSample.new(ms,{'d1'=>50,'d2'=>100})
assert_in_delta(0.655, ss.proportion('q1'),0.01)
assert_in_delta(0.345, ss.proportion('q1',0),0.01)
end
def test_stratum_scale
boys={'test'=>[50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90].to_vector(:scale)}.to_dataset
girls={'test'=>[70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90].to_vector(:scale)}.to_dataset
ms=Statsample::Multiset.new(['test'])
ms.add_dataset('boys',boys)
ms.add_dataset('girls',girls)
ss=Statsample::StratifiedSample.new(ms,{'boys'=>10000,'girls'=>10000})
assert_equal(2,ss.strata_number)
assert_equal(20000,ss.population_size)
assert_equal(10000,ss.stratum_size('boys'))
assert_equal(10000,ss.stratum_size('girls'))
assert_equal(36,ss.sample_size)
assert_equal(75,ss.mean('test'))
assert_in_delta(1.45,ss.standard_error_wor('test'),0.01)
assert_in_delta(ss.standard_error_wor('test'), ss.standard_error_wor_2('test'),0.00001)
end
def test_each
xpe={
'a'=>%w{a a a a}.to_vector,
'b'=>%w{b b b b}.to_vector
}
ype={
'a'=>[1,2,3,4].to_scale,
'b'=>[5,6,7,8].to_scale,
}
zpe={
'a'=>[10,11,12,13].to_scale,
'b'=>[14,15,16,17].to_scale,
}
xp,yp,zp=Hash.new(),Hash.new(),Hash.new()
@ms.each {|k,ds|
xp[k]=ds['x']
yp[k]=ds['y']
zp[k]=ds['z']
}
assert_equal(xpe,xp)
assert_equal(ype,yp)
assert_equal(zpe,zp)
end
def test_multiset_union_with_block
r1=rand()
r2=rand()
ye=[1*r1,2*r1,3*r1,4*r1,5*r2,6*r2,7*r2,8*r2].to_scale
ze=[10*r1,11*r1,12*r1,13*r1, 14*r2,15*r2,16*r2,17*r2].to_scale
ds2=@ms.union {|k,ds|
ds['y'].recode!{|v|
k=='a' ? v*r1 : v*r2}
ds['z'].recode!{|v|
k=='a' ? v*r1 : v*r2}
}
assert_equal(ye,ds2['y'])
assert_equal(ze,ds2['z'])
end
def test_multiset_union
r1=rand()
r2=rand()
ye=[1*r1,2*r1,3*r1,4*r1,5*r2,6*r2,7*r2,8*r2].to_scale
ze=[10*r1,11*r1,12*r1,13*r1, 14*r2,15*r2,16*r2,17*r2].to_scale
@ms.each {|k,ds|
ds['y'].recode!{|v|
k=='a' ? v*r1 : v*r2}
ds['z'].recode!{|v|
k=='a' ? v*r1 : v*r2}
}
ds2=@ms.union
assert_equal(ye,ds2['y'])
assert_equal(ze,ds2['z'])
end
end