Skip to content

Commit

Permalink
Merge pull request #46 from cmmorrow/BUGFIX-Categorical_numeric_group…
Browse files Browse the repository at this point in the history
…_name_fix

Bugfix categorical numeric group name fix
  • Loading branch information
cmmorrow committed Apr 3, 2019
2 parents 23d8f81 + 47117cf commit c33f397
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 2 deletions.
14 changes: 12 additions & 2 deletions sci_analysis/data/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,21 @@ def __init__(self, sequence=None, name=None, order=None, dropna=False):
self._summary = sequence.summary
else:
self._name = name
self._values = pd.Series(sequence)
try:
self._values = pd.Series(sequence).astype('category')
self._values.astype('category')
except TypeError:
self._values = pd.Series(flatten(sequence)).astype('category')
self._values = pd.Series(flatten(sequence))
except ValueError:
self._values = pd.Series([])
# Try to preserve the original dtype of the categories.
try:
if not any(self._values % 1):
self._values = self._values.astype(int)
except TypeError:
pass
self._values = self._values.astype('category')

if order is not None:
if not is_iterable(order):
order = [order]
Expand All @@ -91,6 +100,7 @@ def __init__(self, sequence=None, name=None, order=None, dropna=False):
'percents': (counts / counts.sum() * 100) if not all(counts == 0) else 0.0
})
self._summary['categories'] = self._summary.index.to_series()

if order is not None:
self._summary.sort_index(level=self._order, inplace=True, axis=0, na_position='last')
else:
Expand Down
69 changes: 69 additions & 0 deletions sci_analysis/test/test_cat_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,75 @@ def test_106_single_category(self):
self.assertEqual(str(test), output)
self.assertListEqual(test.results, [{'Frequency': 2, 'Category': 'c', 'Rank': 1, 'Percent': 100.0}])

def test_107_numeric_group_name(self):
input_array = [1., 2., 1., 3., 3., 4.]
output = """
Overall Statistics
------------------
Total = 6
Number of Groups = 4
Statistics
----------
Rank Frequency Percent Category
--------------------------------------------------------
1 2 33.3333 1
1 2 33.3333 3
2 1 16.6667 2
2 1 16.6667 4 """
exp = CategoricalStatistics(input_array, display=False)
self.assertEqual(str(exp), output)

def test_108_year_group_name(self):
input_array = [2015, 2016, 2017, 2018, 2019]
exp = CategoricalStatistics(input_array, display=False)
output = """
Overall Statistics
------------------
Total = 5
Number of Groups = 5
Statistics
----------
Rank Frequency Percent Category
--------------------------------------------------------
1 1 20.0000 2015
1 1 20.0000 2016
1 1 20.0000 2017
1 1 20.0000 2018
1 1 20.0000 2019 """
self.assertEqual(str(exp), output)

def test_109_float_group_name(self):
input_array = [.123, .456, .789]
exp = CategoricalStatistics(input_array, display=True)
output = """
Overall Statistics
------------------
Total = 3
Number of Groups = 3
Statistics
----------
Rank Frequency Percent Category
--------------------------------------------------------
1 1 33.3333 0.1230
1 1 33.3333 0.4560
1 1 33.3333 0.7890 """
self.assertEqual(str(exp), output)


if __name__ == '__main__':
unittest.main()

0 comments on commit c33f397

Please sign in to comment.