Skip to content

Commit

Permalink
sensitivity power analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
krajcsi committed Mar 20, 2020
1 parent 70337e8 commit 161dad9
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 6 deletions.
4 changes: 3 additions & 1 deletion changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@ Upcoming release

## New features
- Behavioral data diffusion analysis
- Find text in results
- Sensitivity power analysis for one-sample t-test, two-sample t-test, paired samples t-test, Chi-square test, one-way ANOVA
- Variation ratio for nominal variables
- Various output refinements
- GUI
- Find text in results

## Fixes
- Various output fixes
Expand Down
55 changes: 50 additions & 5 deletions cogstat/cogstat_stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,17 @@ def one_t_test(pdf, data_measlevs, var_name, test_value=0):
ci = (cih-cil)/2
prec = cs_util.precision(data)+1
ci_text = '[%0.*f, %0.*f]' %(prec, cil, prec, cih)
text_result += _('One sample t-test against %g')%float(test_value)+': <i>t</i>(%d) = %0.3g, %s\n' %(df, t, cs_util.print_p(p))

# Sensitivity power analysis
from statsmodels.stats.power import TTestPower
power_analysis = TTestPower()
text_result = _(
'Sensitivity power analysis. Minimal effect size to reach 95%% power (effect size is in %s):') % _(
'd') + ' %0.2f\n' % power_analysis.solve_power(effect_size=None, nobs=len(data), alpha=0.05, power=0.95,
alternative='two-sided')

text_result += _('One sample t-test against %g') % \
float(test_value)+': <i>t</i>(%d) = %0.3g, %s\n' %(df, t, cs_util.print_p(p))

# Graph
image = cs_chart.create_variable_population_chart(data, var_name, ci)
Expand Down Expand Up @@ -584,11 +594,19 @@ def paired_t_test(pdf, var_names):
# Not available in statsmodels
if len(var_names) != 2:
return _('Paired t-test requires two variables.')

variables = pdf[var_names].dropna()

# Sensitivity power analysis
from statsmodels.stats.power import TTestPower
power_analysis = TTestPower()
text_result = _('Sensitivity power analysis. Minimal effect size to reach 95%% power (effect size is in %s):') % _(
'd') + ' %0.2f\n' % power_analysis.solve_power(effect_size=None, nobs=len(variables), alpha=0.05, power=0.95,
alternative='two-sided')

df = len(variables)-1
t, p = stats.ttest_rel(variables.iloc[:, 0], variables.iloc[:, 1])
text_result = _('Result of paired samples t-test')+': <i>t</i>(%d) = %0.3g, %s\n' %(df, t, cs_util.print_p(p))
text_result += _('Result of paired samples t-test')+': <i>t</i>(%d) = %0.3g, %s\n' %(df, t, cs_util.print_p(p))

return text_result

Expand Down Expand Up @@ -801,6 +819,15 @@ def independent_t_test(pdf, var_name, grouping_name):
lci = mean_diff - t_cl*s_m1m2
hci = mean_diff + t_cl*s_m1m2
prec = cs_util.precision(var1.append(var2))+1

# Sensitivity power analysis
from statsmodels.stats.power import TTestIndPower
power_analysis = TTestIndPower()
text_result += _(
'Sensitivity power analysis. Minimal effect size to reach 95%% power (effect size is in %s):') % _(
'd') + ' %0.2f\n' % power_analysis.solve_power(effect_size=None, nobs1=len(var1), alpha=0.05, power=0.95,
ratio=len(var2) / len(var1), alternative='two-sided')

text_result += _('Difference between the two groups:') +' %0.*f, ' % (prec, mean_diff) + \
_('95%% confidence interval [%0.*f, %0.*f]') % (prec, lci, prec, hci)+'\n'
text_result += _('Result of independent samples t-test:')+' <i>t</i>(%0.3g) = %0.3g, %s\n' % \
Expand Down Expand Up @@ -910,6 +937,15 @@ def one_way_anova(pdf, var_name, grouping_name):
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
data = pdf.dropna(subset=[var_name, grouping_name])

# Sensitivity power analysis
from statsmodels.stats.power import FTestAnovaPower
power_analysis = FTestAnovaPower()
text_result += _(
'Sensitivity power analysis. Minimal effect size to reach 95%% power (effect size is in %s):') % _('f') + \
' %0.2f\n' % power_analysis.solve_power(effect_size=None, nobs=len(data), alpha=0.05, power=0.95,
k_groups=len(set(data[grouping_name])))

# from IPython import embed; embed()
# FIXME If there is a variable called 'C', then patsy is confused whether C is the variable or the categorical variable
# http://gotoanswer.stanford.edu/?q=Statsmodels+Categorical+Data+from+Formula+%28using+pandas%
Expand Down Expand Up @@ -1054,6 +1090,15 @@ def chi_square_test(pdf, var_name, grouping_name):
cramer_result = _('Cramér\'s V measure of association: ')+'&phi;<i><sub>c</sub></i> = %.3f\n' % cramersv
except ZeroDivisionError: # TODO could this be avoided?
cramer_result = _('Cramér\'s V measure of association cannot be computed (division by zero).')
chi_result = _("Result of the Pearson's Chi-square test: ")+'</i>&chi;<sup>2</sup></i>(%g, <i>N</i> = %d) = %.3f, %s' % \
(dof, cont_table_data.values.sum(), chi2, cs_util.print_p(p))

# Sensitivity power analysis
from statsmodels.stats.power import GofChisquarePower
power_analysis = GofChisquarePower()
chi_result = _('Sensitivity power analysis. Minimal effect size to reach 95%% power (effect size is in %s):') % _(
'w') + ' %0.2f\n' % power_analysis.solve_power(effect_size=None, nobs=cont_table_data.values.sum(), alpha=0.05,
power=0.95, n_bins=cont_table_data.size)

chi_result += _("Result of the Pearson's Chi-square test: ") + \
'</i>&chi;<sup>2</sup></i>(%g, <i>N</i> = %d) = %.3f, %s' % \
(dof, cont_table_data.values.sum(), chi2, cs_util.print_p(p))
return cramer_result, chi_result

1 comment on commit 161dad9

@krajcsi
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.