Skip to content
Permalink
Browse files

Making local commits before surrendering laptop to Apple T.T.

  • Loading branch information...
emjun committed Apr 14, 2019
1 parent 95a9f91 commit 3cf07f1fbd0b8d5fa2f5f4a9f5dbc43a1076323a
Showing with 321 additions and 3 deletions.
  1. +316 −0 Tea for UIST 2019.ipynb
  2. +2 −1 tea/evaluate.py
  3. +3 −2 tea/evaluate_helper_methods.py
@@ -0,0 +1,316 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import tea"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Load data\n",
"data = \"./datasets/UScrime.csv\"\n",
"tea.data(data)\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Declare and annotate the variables of interest\n",
"variables = [\n",
" {\n",
" 'name' : 'So',\n",
" 'data type' : 'nominal',\n",
" 'categories' : ['0', '1']\n",
" },\n",
" {\n",
" 'name' : 'Prob',\n",
" 'data type' : 'ratio',\n",
" 'range' : [0,1]\n",
" }\n",
"]\n",
"\n",
"tea.define_variables(variables)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"assumptions = {\n",
" 'groups normally distributed': [['So', 'Prob']],\n",
" 'Type I (False Positive) Error Rate': 0.05,\n",
"}\n",
"\n",
"tea.assume(assumptions)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"experimental_design = {\n",
" 'study type': 'observational study',\n",
" 'contributor variables': 'So',\n",
" 'outcome variables': 'Prob',\n",
" }\n",
"tea.define_study_design(experimental_design)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"students_t\n",
"welchs_t\n",
"mannwhitney_u\n",
"f_test\n",
"kruskall_wallis\n",
"factorial_ANOVA\n",
"\n",
"\n",
"Results:\n",
"--------------\n",
"Test: students_t\n",
"***Test assumptions:\n",
"Exactly two variables involved in analysis: So, Prob\n",
"Exactly one explanatory variable: So, Prob\n",
"Exactly one explained variable: So, Prob\n",
"Independent (not paired) observations: So, Prob\n",
"Variable is categorical: So\n",
"Variable has two categories: So\n",
"Continuous (not categorical) data: Prob\n",
"Equal variance: So, Prob\n",
"Groups are normally distributed: So, Prob: Assumed true.\n",
"\n",
"***Test results:\n",
"statistic: 4.202130736875173\n",
"p: 0.00012364897266532775\n",
"adjusted_p: 6.182448633266387e-05\n",
"\n",
"Test: welchs_t\n",
"***Test assumptions:\n",
"Exactly two variables involved in analysis: So, Prob\n",
"Exactly one explanatory variable: So, Prob\n",
"Exactly one explained variable: So, Prob\n",
"Independent (not paired) observations: So, Prob\n",
"Variable is categorical: So\n",
"Variable has two categories: So\n",
"Continuous (not categorical) data: Prob\n",
"Groups are normally distributed: So, Prob: Assumed true.\n",
"\n",
"***Test results:\n",
"statistic: -3.8953717090736655\n",
"pvalue: 0.0006505783178002014\n",
"\n",
"Test: mannwhitney_u\n",
"***Test assumptions:\n",
"Exactly one explanatory variable: So, Prob\n",
"Exactly one explained variable: So, Prob\n",
"Independent (not paired) observations: So, Prob\n",
"Variable is categorical: So\n",
"Variable has two categories: So\n",
"Continuous OR ORDINAL (not nominal) data: Prob\n",
"\n",
"***Test results:\n",
"statistic: 81.0\n",
"pvalue: 0.00018546387565891538\n",
"\n",
"Test: f_test\n",
"***Test assumptions:\n",
"Independent (not paired) observations: So, Prob\n",
"Exactly one explanatory variable: So, Prob\n",
"Exactly one explained variable: So, Prob\n",
"Continuous (not categorical) data: Prob\n",
"Variable is categorical: So\n",
"Variable has two or more categories: So\n",
"Groups are normally distributed: So, Prob: Assumed true.\n",
"Equal variance: So, Prob\n",
"\n",
"***Test results:\n",
" df sum_sq mean_sq F PR(>F)\n",
"C(So) 1.0 0.006702 0.006702 17.657903 0.000124\n",
"Residual 45.0 0.017079 0.000380 NaN NaN\n",
"\n",
"Test: kruskall_wallis\n",
"***Test assumptions:\n",
"Independent (not paired) observations: So, Prob\n",
"Exactly one explanatory variable: So, Prob\n",
"Exactly one explained variable: So, Prob\n",
"Continuous (not categorical) data: Prob\n",
"Variable is categorical: So\n",
"Variable has two or more categories: So\n",
"\n",
"***Test results:\n",
"statistic: 14.056955645161281\n",
"pvalue: 0.00017735665596242664\n",
"\n",
"Test: factorial_ANOVA\n",
"***Test assumptions:\n",
"Exactly one explained variable: So, Prob\n",
"Continuous (not categorical) data: Prob\n",
"Variable is categorical: So\n",
"Variable has two or more categories: So\n",
"Groups are normally distributed: So, Prob: Assumed true.\n",
"Equal variance: So, Prob\n",
"\n",
"***Test results:\n",
" df sum_sq mean_sq F PR(>F)\n",
"C(So) 1.0 0.006702 0.006702 17.657903 0.000124\n",
"Residual 45.0 0.017079 0.000380 NaN NaN\n",
"\n"
]
},
{
"data": {
"text/plain": [
"\n",
"Results:\n",
"--------------\n",
"Test: students_t\n",
"***Test assumptions:\n",
"Exactly two variables involved in analysis: So, Prob\n",
"Exactly one explanatory variable: So, Prob\n",
"Exactly one explained variable: So, Prob\n",
"Independent (not paired) observations: So, Prob\n",
"Variable is categorical: So\n",
"Variable has two categories: So\n",
"Continuous (not categorical) data: Prob\n",
"Equal variance: So, Prob\n",
"Groups are normally distributed: So, Prob: Assumed true.\n",
"\n",
"***Test results:\n",
"statistic: 4.202130736875173\n",
"p: 0.00012364897266532775\n",
"adjusted_p: 6.182448633266387e-05\n",
"\n",
"Test: welchs_t\n",
"***Test assumptions:\n",
"Exactly two variables involved in analysis: So, Prob\n",
"Exactly one explanatory variable: So, Prob\n",
"Exactly one explained variable: So, Prob\n",
"Independent (not paired) observations: So, Prob\n",
"Variable is categorical: So\n",
"Variable has two categories: So\n",
"Continuous (not categorical) data: Prob\n",
"Groups are normally distributed: So, Prob: Assumed true.\n",
"\n",
"***Test results:\n",
"statistic: -3.8953717090736655\n",
"pvalue: 0.0006505783178002014\n",
"\n",
"Test: mannwhitney_u\n",
"***Test assumptions:\n",
"Exactly one explanatory variable: So, Prob\n",
"Exactly one explained variable: So, Prob\n",
"Independent (not paired) observations: So, Prob\n",
"Variable is categorical: So\n",
"Variable has two categories: So\n",
"Continuous OR ORDINAL (not nominal) data: Prob\n",
"\n",
"***Test results:\n",
"statistic: 81.0\n",
"pvalue: 0.00018546387565891538\n",
"\n",
"Test: f_test\n",
"***Test assumptions:\n",
"Independent (not paired) observations: So, Prob\n",
"Exactly one explanatory variable: So, Prob\n",
"Exactly one explained variable: So, Prob\n",
"Continuous (not categorical) data: Prob\n",
"Variable is categorical: So\n",
"Variable has two or more categories: So\n",
"Groups are normally distributed: So, Prob: Assumed true.\n",
"Equal variance: So, Prob\n",
"\n",
"***Test results:\n",
" df sum_sq mean_sq F PR(>F)\n",
"C(So) 1.0 0.006702 0.006702 17.657903 0.000124\n",
"Residual 45.0 0.017079 0.000380 NaN NaN\n",
"\n",
"Test: kruskall_wallis\n",
"***Test assumptions:\n",
"Independent (not paired) observations: So, Prob\n",
"Exactly one explanatory variable: So, Prob\n",
"Exactly one explained variable: So, Prob\n",
"Continuous (not categorical) data: Prob\n",
"Variable is categorical: So\n",
"Variable has two or more categories: So\n",
"\n",
"***Test results:\n",
"statistic: 14.056955645161281\n",
"pvalue: 0.00017735665596242664\n",
"\n",
"Test: factorial_ANOVA\n",
"***Test assumptions:\n",
"Exactly one explained variable: So, Prob\n",
"Continuous (not categorical) data: Prob\n",
"Variable is categorical: So\n",
"Variable has two or more categories: So\n",
"Groups are normally distributed: So, Prob: Assumed true.\n",
"Equal variance: So, Prob\n",
"\n",
"***Test results:\n",
" df sum_sq mean_sq F PR(>F)\n",
"C(So) 1.0 0.006702 0.006702 17.657903 0.000124\n",
"Residual 45.0 0.017079 0.000380 NaN NaN"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tea.hypothesize(['So', 'Prob'], ['So:1 > 0'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
@@ -391,6 +391,7 @@ def evaluate(dataset: Dataset, expr: Node, assumptions: Dict[str, str], design:
results = {}
if len(tests) == 0:
tests.append('bootstrap') # Default to bootstrap


for test in tests:
test_result = execute_test(dataset, design, expr.predictions, combined_data, test)
@@ -415,7 +416,7 @@ def evaluate(dataset: Dataset, expr: Node, assumptions: Dict[str, str], design:

# TODO For f_test/... post hoc comparisons depending on predictions
pass
import pdb; pdb.set_trace()
# import pdb; pdb.set_trace()
return ResultData(results)

elif isinstance(expr, Mean):
@@ -292,7 +292,7 @@ def is_dependent_samples(var_name: str, design: Dict[str, str]):

# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_ind.html
# Possible parameters: a, b : array | axis (without, over entire arrays) | equal_var (default is True) | nan_policy (optional)
StudentsTResult = namedtuple('StudentsTResult', ('statistic', 'p', 'adjusted_p', 'interpretation'))
StudentsTResult = namedtuple('StudentsTResult', ('statistic', 'p', 'adjusted_p')) #, 'interpretation'))
def students_t(dataset, predictions, combined_data: BivariateData):

# predictions = [[GreaterThans]]
@@ -383,7 +383,8 @@ class TTestResult(Enum):
else:
assert False, "ttest_result case without an associated interpretation."

return StudentsTResult(t_stat, p_val, adjusted_p, interpretation)
# return StudentsTResult(t_stat, p_val, adjusted_p, interpretation)
return StudentsTResult(t_stat, p_val, adjusted_p)



0 comments on commit 3cf07f1

Please sign in to comment.
You can’t perform that action at this time.