forked from sassoftware/saspy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_sastabulate.py
202 lines (168 loc) · 10.1 KB
/
test_sastabulate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
import unittest
from contextlib import redirect_stdout
from io import StringIO
from re import match
import pandas as pd
import saspy
from saspy.sastabulate import Tabulate, Class, Var, Statistic, Grouping
class TestSASTabulate(unittest.TestCase):
def setUp(self):
# Use the first entry in the configuration list
self.sas = saspy.SASsession() #cfgname=saspy.SAScfg.SAS_config_names[0])
self.assertIsInstance(self.sas, saspy.SASsession, msg="sas = saspy.SASsession(...) failed")
# load a sas-help dataset
self.cars = self.sas.sasdata('cars', libref='sashelp', results='text')
def tearDown(self):
if self.sas:
self.sas._endsas()
def test_tabulate(self):
# check for tabulate being available on data set
self.assertIsInstance(self.cars.tabulate, Tabulate, msg="tabulate should be available on data sets")
def test_classes(self):
# extract a class with options
by_drivetrain = self.cars.tabulate.as_class('drivetrain', label="Drive", all="Total")
self.assertIsInstance(by_drivetrain, Class, msg=".as_class() method failed")
self.assertEqual(by_drivetrain.label, "Drive", msg=".as_class() 'label' keyword not applied")
self.assertEqual(by_drivetrain.all, "Total", msg=".as_class() 'all' keyword not applied")
# test apply option functionally using .with_()
with_adjusted_label = by_drivetrain.with_(label="Train")
self.assertEqual(with_adjusted_label.label, "Train", msg=".with_() method did not apply keyword")
# should not mutate original; intended for composition
self.assertEqual(by_drivetrain.label, "Drive", msg=".with_() should clone, not mutate")
# test basic serialization
self.assertEqual(str(by_drivetrain), "(drivetrain='Drive' ALL='Total')",
msg="error with serialization of tabulation class with arguments")
# test get multiple classes as tuple
by_origin, by_type = self.cars.tabulate.classes('origin', 'type')
self.assertIsInstance(by_origin, Class, msg=".classes() method failed")
self.assertIsInstance(by_type, Class, msg=".classes() method failed")
def test_vars(self):
# extract a variable with options
horsepower = self.cars.tabulate.as_var('horsepower', label="Horse")
self.assertIsInstance(horsepower, Var, msg=".as_var() method failed")
self.assertEqual(horsepower.label, "Horse", msg=".as_var() 'label' keyword not applied")
# test apply option functionally using .with_()
with_adjusted_label = horsepower.with_(label="Power")
self.assertEqual(with_adjusted_label.label, "Power", msg=".with_() method did not apply keyword")
# should not mutate original; intended for composition
self.assertEqual(horsepower.label, "Horse", msg=".with_() should clone, not mutate")
# test basic serialization
self.assertEqual(str(horsepower), "horsepower='Horse'",
msg="error with serialization of tabulation var with arguments")
# test get multiple vars as tuple
enginesize, cylinders = self.cars.tabulate.vars('enginesize', 'cylinders')
self.assertIsInstance(enginesize, Var, msg=".vars() method failed")
self.assertIsInstance(cylinders, Var, msg=".vars() method failed")
def test_stats(self):
# create a statistic with options
stdev = self.cars.tabulate.stat('std', label="StDev", format='5.2')
self.assertIsInstance(stdev, Statistic, msg=".stat() method failed")
self.assertEqual(stdev.label, "StDev", msg=".stat() 'label' keyword not applied")
self.assertEqual(stdev.format, "5.2", msg=".stat() 'format' keyword not applied")
# test apply option functionally using .with_()
with_adjusted_format = stdev.with_(format="6.2")
self.assertEqual(with_adjusted_format.format, "6.2", msg=".with_() method did not apply keyword")
# should not mutate original; intended for composition
self.assertEqual(stdev.format, "5.2", msg=".with_() should clone, not mutate")
# test basic serialization
self.assertEqual(str(stdev), "std='StDev'*f=5.2",
msg="error with serialization of tabulation statistic with arguments")
# test get multiple stats as tuple
mean, n = self.cars.tabulate.stats('mean', 'n')
self.assertIsInstance(mean, Statistic, msg=".stats() method failed")
self.assertIsInstance(n, Statistic, msg=".stats() method failed")
def test_hierarchy(self):
by_origin, by_type = self.cars.tabulate.classes('origin', 'type')
enginesize, cylinders = self.cars.tabulate.vars('enginesize', 'cylinders')
mean, n = self.cars.tabulate.stats('mean', 'n')
# test valid same-level concatenations
concat_classes = by_origin | by_type
self.assertIsInstance(concat_classes, Grouping, msg="concatenation of classes failed")
concat_vars = enginesize | cylinders
self.assertIsInstance(concat_vars, Grouping, msg="concatenation of vars failed")
concat_stats = mean | n
self.assertIsInstance(concat_stats, Grouping, msg="concatenation of stats failed")
# test valid nestings; applies right side as child of left side
nest_classes = by_origin * by_type
self.assertIsInstance(nest_classes.child, Class, msg="nesting of classes failed")
nest_class_var = by_origin * enginesize
self.assertIsInstance(nest_class_var.child, Var, msg="nesting of var under class failed")
nest_var_stat = enginesize * mean
self.assertIsInstance(nest_var_stat.child, Statistic, msg="nesting of statistic under var failed")
# nesting of concatenations should work
nest_concats = (by_origin | by_type) * (mean | n)
self.assertIsInstance(nest_concats, Grouping, msg="nesting of concatenated elements failed")
self.assertIsInstance(nest_concats.child, Grouping, msg="nesting of concatenated elements failed")
# test invalid nestings for appropriate rejection
self.assertRaises(SyntaxError, lambda: enginesize * by_origin) # class under var
self.assertRaises(SyntaxError, lambda: mean * enginesize) # var under stat
self.assertRaises(SyntaxError, lambda: n * mean) # stat under stat
self.assertRaises(SyntaxError, lambda: mean * by_origin) # class under stat
def test_composition_serialization(self):
by_origin, by_type, by_drivetrain = self.cars.tabulate.classes('origin', 'type', 'drivetrain')
enginesize, cylinders = self.cars.tabulate.vars('enginesize', 'cylinders')
mean, n = self.cars.tabulate.stats('mean', 'n')
# compoase a larger fragment using all options, check its serialization
my_tabulation = (
(by_origin | by_type) * by_drivetrain.with_(all="Total") * enginesize
* (mean.with_(label="Average") | n)
)
self.assertEqual(
str(my_tabulation),
"((origin type) * (drivetrain ALL='Total') * enginesize * (mean='Average' n))",
msg="serialized table composition did not match expectation"
)
def test_procedure(self):
by_origin, by_type, by_drivetrain = self.cars.tabulate.classes('origin', 'type', 'drivetrain')
enginesize, cylinders = self.cars.tabulate.vars('enginesize', 'cylinders')
mean, n = self.cars.tabulate.stats('mean', 'n')
# check the full generated syntax of a command
def get_generated_code(method: str) -> dict:
captured = StringIO()
with redirect_stdout(captured):
self.sas.teach_me_SAS(True)
method()
self.sas.teach_me_SAS(False)
lines = captured.getvalue().split('\n')
# break submitted code into statements for assertions
match_keyword = '^\s*(\w+?)\s'
return dict(
(match(match_keyword, l).group(1), l) for l in lines if match(match_keyword, l)
)
invocation = lambda: \
self.cars.tabulate.table(
where="cylinders > 0",
left= by_drivetrain.with_(all="Total") * by_type,
top= by_origin * (enginesize | cylinders) * (mean | n),
)
statements = get_generated_code(invocation)
self.assertIn("proc tabulate data=sashelp.cars", statements['proc'])
# gathered all classes used?
expected_classes = {"drivetrain", "origin", "type"}
classes_sent = statements['class'].replace(';','').split(' ')
self.assertTrue(expected_classes.issubset(set(classes_sent)), msg="classes were not gathered")
# gathered all vars used?
expected_vars = {"cylinders", "enginesize"}
vars_sent = statements['var'].replace(';','').split(' ')
self.assertTrue(expected_vars.issubset(set(vars_sent)), msg="vars were not gathered")
# passed the additional valid "where" option?
self.assertIn('where cylinders > 0', statements['where'], msg="additional options (where) failed")
# check table statement
self.assertIn(
"table (drivetrain ALL='Total') * type, origin * ((enginesize cylinders) * (mean n))",
statements['table'],
msg="generated table syntax did not match expectation"
)
def test_to_dataframe(self):
by_origin, by_type, by_drivetrain = self.cars.tabulate.classes('origin', 'type', 'drivetrain')
enginesize, cylinders = self.cars.tabulate.vars('enginesize', 'cylinders')
mean, n = self.cars.tabulate.stats('mean', 'n')
# generate a MultiIndex DataFrame instead of printing results
frame = self.cars.tabulate.to_dataframe(
left= by_drivetrain.with_(all="Total") * by_type *
by_origin * (enginesize | cylinders) * (mean | n),
)
# verify that the frame was generated correctly
self.assertIsInstance(frame, pd.DataFrame, msg=".to_dataframe() method failed")
self.assertEqual(set(frame.index.names), {'Type', 'Origin', 'DriveTrain'})
self.assertEqual(set(frame.columns), {'Cylinders_N', 'Cylinders_Mean', 'EngineSize_Mean', 'EngineSize_N'})