Skip to content

Commit

Permalink
Merge 5a825b1 into 86db076
Browse files Browse the repository at this point in the history
  • Loading branch information
cmmorrow committed Apr 1, 2019
2 parents 86db076 + 5a825b1 commit f69fd53
Show file tree
Hide file tree
Showing 6 changed files with 317 additions and 187 deletions.
11 changes: 8 additions & 3 deletions sci_analysis/analysis/comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,9 +222,6 @@ def __init__(self, xdata, ydata=None, groups=None, alpha=None, display=True):
vector = Vector(xdata, other=ydata, groups=groups)
if vector.is_empty():
raise NoDataError("Cannot perform test because there is no data")
for grp, seq in vector.groups.items():
if len(seq) <= self._min_size:
raise MinimumSizeError("length of {} is less than the minimum size {}".format(grp, self._min_size))
super(GroupComparison, self).__init__(vector, display=display)
self._alpha = alpha or self._default_alpha
self.logic()
Expand All @@ -251,6 +248,9 @@ def __init__(self, xdata, ydata=None, groups=None, alpha=None, display=True):

def run(self):
out = []
# Remove any groups that are less than or equal to the minimum value from analysis.
small_grps = [grp for grp, seq in self.data.groups.items() if len(seq) <= self._min_size]
self.data.drop_groups(small_grps)
if NormTest(*self.data.flatten(), display=False, alpha=self._alpha).p_value > self._alpha:
r = "pearson"
func = pearsonr
Expand Down Expand Up @@ -308,6 +308,9 @@ class GroupLinearRegression(GroupComparison):

def run(self):
out = []
# Remove any groups that are less than or equal to the minimum value from analysis.
small_grps = [grp for grp, seq in self.data.groups.items() if len(seq) <= self._min_size]
self.data.drop_groups(small_grps)
for grp, pairs in self.data.paired_groups.items():
slope, intercept, r, p_value, std_err = linregress(*pairs)
count = len(pairs[0])
Expand All @@ -321,6 +324,8 @@ def run(self):
self._p_value: p_value,
self._group_name: str(grp)
})
if not out:
raise NoDataError
self._results = DataFrame(out).sort_values(self._group_name).to_dict(orient='records')

def __str__(self):
Expand Down
27 changes: 24 additions & 3 deletions sci_analysis/data/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

# Import from local
from .data import Data, is_data
from .data_operations import flatten
from .data_operations import flatten, is_iterable


class EmptyVectorError(Exception):
Expand Down Expand Up @@ -143,7 +143,7 @@ def drop_nan(self):
Returns
-------
arr : pandas.DataFrame
arr : pandas.Series
A copy of the Numeric object's internal Series with all NaN values removed.
"""
return self._values.dropna(how='any', subset=[self._ind])
Expand All @@ -156,10 +156,31 @@ def drop_nan_intersect(self):
Returns
-------
arr : pandas.DataFrame
A tuple of numpy Arrays corresponding to the internal Vector and seq with all nan values removed.
A copy of the Numeric object's internal DataFrame with all nan values removed.
"""
return self._values.dropna(how='any', subset=[self._ind, self._dep])

def drop_groups(self, grps):
"""Drop the specified group name from the Numeric object.
Parameters
----------
grps : str|int|list[str]|list[int]
The name of the group to remove.
Returns
-------
arr : pandas.DataFrame
A copy of the Numeric object's internal DataFrame with all records belonging to the specified group removed.
"""
if not is_iterable(grps):
grps = [grps]
dropped = self._values.query("{} not in {}".format(self._grp, grps)).copy()
dropped[self._grp] = dropped[self._grp].cat.remove_categories(grps)
self._values = dropped
return dropped

@property
def data_type(self):
return self._type
Expand Down
16 changes: 16 additions & 0 deletions sci_analysis/test/test_analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,22 @@ def test_140_scatter_highlight_labels(self):
), ['Bivariate']
)

def test_141_scatter_groups_one_below_min_size(self):
np.random.seed(self._seed)
df = pd.DataFrame(np.random.randn(100, 2), columns=list('xy'))
df['groups'] = np.random.choice(list('ABC'), len(df)).tolist()
df.at[24, 'groups'] = "D"
self.assertEqual(
analyze(
df['x'],
df['y'],
df['groups'],
debug=True,
save_to='{}test_analyze_141'.format(self.save_path)
),
['Group Bivariate']
)


if __name__ == '__main__':
unittest.main()

0 comments on commit f69fd53

Please sign in to comment.