Skip to content

Commit

Permalink
Reorganize code. Remove circular depenency in ResultData
Browse files Browse the repository at this point in the history
  • Loading branch information
emjun committed Apr 28, 2019
1 parent b6d185c commit 59b3d1c
Show file tree
Hide file tree
Showing 17 changed files with 81 additions and 141 deletions.
15 changes: 6 additions & 9 deletions tea/api.py
@@ -1,19 +1,16 @@
from tea.build import (load_data, load_data_from_url, const,
from .build import (load_data, load_data_from_url, const,
ordinal, isordinal,
nominal, isnominal,
ratio, isratio,
interval, isinterval, isnumeric,
select, compare, relate, predict,
get_var_from_list
# , nominal, interval, ratio, load_data, model,
# mean, median, standard_deviation, variance, kurtosis, skew, normality, frequency,
# between_experiment, within_experiment, mixed_experiment, model, equation,
# load_data_arrs, hypothesis, experiment_design
)
from tea.evaluate import evaluate
from tea.evaluate_helper_methods import determine_study_type, assign_roles
from tea.evaluate_data_structures import BivariateData, MultivariateData
from .solver import which_props
from .evaluate import evaluate
import tea.helpers
import tea.runtimeDataStructures
import tea.z3_solver

from typing import Dict
from .global_vals import *

Expand Down
2 changes: 0 additions & 2 deletions tea/ast.py
Expand Up @@ -4,8 +4,6 @@

from enum import Enum
from typing import Dict, Union
# from collections import OrderedDict
# from pandas.api.types import CategoricalDtype

class Node(object):
def relate(self, other):
Expand Down
8 changes: 5 additions & 3 deletions tea/build.py
@@ -1,9 +1,11 @@
from typing import Dict, Union
from collections import OrderedDict
from tea.runtimeDataStructures.dataset import Dataset
from tea.ast import ( Variable, DataType, Literal,
Relate, Relationship
)
from tea.dataset import Dataset


from typing import Dict, Union
from collections import OrderedDict

iv_identifier = 'independent variable'
dv_identifier = 'dependent variable'
Expand Down
25 changes: 15 additions & 10 deletions tea/evaluate.py
@@ -1,21 +1,27 @@
from tea.ast import *
from tea.dataset import Dataset
from tea.evaluate_data_structures import VarData, BivariateData, MultivariateData, ResultData # runtime data structures
# from tea.evaluate_result_data_structures import ResultData
from tea.evaluate_helper_methods import determine_study_type, assign_roles, add_paired_property, compute_data_properties, compute_combined_data_properties, execute_test, correct_multiple_comparison
from .solver import synthesize_tests
# from tea.solver import find_applicable_bivariate_tests
from tea.ast import ( Node, Variable, Literal,
Equal, NotEqual, LessThan,
LessThanEqual, GreaterThan, GreaterThanEqual,
Relate
)
from tea.runtimeDataStructures.dataset import Dataset
from tea.runtimeDataStructures.varData import VarData
from tea.runtimeDataStructures.bivariateData import BivariateData
from tea.runtimeDataStructures.multivariateData import MultivariateData
from tea.runtimeDataStructures.resultData import ResultData
from tea.helpers.evaluateHelperMethods import determine_study_type, assign_roles, add_paired_property, execute_test, correct_multiple_comparison
from tea.z3_solver.solver import synthesize_tests

import attr
from typing import Any
from types import SimpleNamespace # allows for dot notation access for dictionaries
from typing import Dict

from scipy import stats # Stats library used
import statsmodels.api as sm
import statsmodels.formula.api as smf
import numpy as np # Use some stats from numpy instead
import pandas as pd
# import bootstrapped as bs


# TODO: Pass participant_id as part of experimental design, not load_data
def evaluate(dataset: Dataset, expr: Node, assumptions: Dict[str, str], design: Dict[str, str]=None):
Expand Down Expand Up @@ -385,7 +391,6 @@ def evaluate(dataset: Dataset, expr: Node, assumptions: Dict[str, str], design:
if len(tests) == 0:
tests.append('bootstrap') # Default to bootstrap


for test in tests:
test_result = execute_test(dataset, design, expr.predictions, combined_data, test)
results[test] = test_result
Expand All @@ -400,7 +405,7 @@ def evaluate(dataset: Dataset, expr: Node, assumptions: Dict[str, str], design:
# There are multiple comparisons
# if len(preds > 1):
# FOR DEBUGGING:
if len(preds >= 1):
if len(preds) >= 1:
correct_multiple_comparison(res_data)

return res_data
Expand Down
Empty file added tea/helpers/__init__.py
Empty file.
@@ -1,28 +1,32 @@
# Tea
from tea.global_vals import *
from tea.ast import *
from tea.dataset import Dataset
from tea.evaluate_data_structures import VarData, CombinedData, BivariateData, MultivariateData, ResultData
# from tea.evaluate_result_data_structures import ResultData
# from tea.solver import Tests, Assumptions

import attr
from typing import Any, Dict, List
from types import SimpleNamespace # allows for dot notation access for dictionaries
from collections import namedtuple
from enum import Enum
import copy

from tea.ast import DataType, GreaterThan
from tea.runtimeDataStructures.dataset import Dataset
from tea.runtimeDataStructures.varData import VarData
from tea.runtimeDataStructures.combinedData import CombinedData
from tea.runtimeDataStructures.bivariateData import BivariateData
from tea.runtimeDataStructures.multivariateData import MultivariateData
from tea.runtimeDataStructures.resultData import ResultData

# Stats
from scipy import stats # Stats library used
from sklearn import preprocessing # for creating interaction effects
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.formula.api import ols
# import numpy as np # Use some stats from numpy instead

import pandas as pd
from statsmodels.stats.anova import AnovaRM
import bootstrapped.bootstrap as bs
import bootstrapped.stats_functions as bs_stats

# Other
import attr
from typing import Any, Dict, List
from types import SimpleNamespace # allows for dot notation access for dictionaries
from collections import namedtuple
from enum import Enum
import copy

def determine_study_type(vars_data: list, design: Dict[str, str]):
if design:
Expand Down Expand Up @@ -837,77 +841,4 @@ def execute_test(dataset, design, predictions, combined_data: CombinedData, test
# Correct for multiple comparisons
def correct_multiple_comparison(res_data: ResultData):
# TODO: refactor ResultData first.
pass

"""
def explanatory_strings_for_assumptions(assumptions: Assumptions) -> List[str]:
explanation = []
if assumptions & Assumptions.INDEPENDENT_OBSERVATIONS:
explanation.append("Assumes independent observations.")
assumptions &= ~Assumptions.INDEPENDENT_OBSERVATIONS
if assumptions & Assumptions.NORMALLY_DISTRIBUTED_VARIABLES:
explanation.append("Assumes samples are normally distributed.")
assumptions &= ~Assumptions.NORMALLY_DISTRIBUTED_VARIABLES
if assumptions & Assumptions.NORMALLY_DISTRIBUTED_DIFFERENCE_BETWEEN_VARIABLES:
explanation.append("Assumes difference between paired values is normally distributed.")
assumptions &= ~Assumptions.NORMALLY_DISTRIBUTED_DIFFERENCE_BETWEEN_VARIABLES
if assumptions & Assumptions.SYMMETRICALLY_DISTRIBUTED_DIFFERENCE_BETWEEN_VARIABLES:
explanation.append("Assumes difference between paired values is symmetrically distributed.")
assumptions &= ~Assumptions.SYMMETRICALLY_DISTRIBUTED_DIFFERENCE_BETWEEN_VARIABLES
if assumptions & Assumptions.SIMILAR_VARIANCES:
explanation.append("Assumes samples have similar variances.")
assumptions &= ~Assumptions.SIMILAR_VARIANCES
if assumptions & Assumptions.LARGE_SAMPLE_SIZE:
explanation.append("Assumes a large enough sample size.")
assumptions &= ~Assumptions.LARGE_SAMPLE_SIZE
if assumptions & Assumptions.VALUES_ARE_FREQUENCIES:
explanation.append("Assumes values are frequencies (and not, e.g., percentages).")
assumptions &= ~Assumptions.VALUES_ARE_FREQUENCIES
if assumptions & Assumptions.PAIRED_OBSERVATIONS:
explanation.append("Assumes observations are paired (e.g. within subjects).")
assumptions &= ~Assumptions.PAIRED_OBSERVATIONS
if assumptions & Assumptions.NO_OUTLIERS:
explanation.append("Assumes there are no outliers in the data.")
assumptions &= ~Assumptions.NO_OUTLIERS
if assumptions & Assumptions.NO_OUTLIERS_IN_DIFFERENCE_BETWEEN_VARIABLES:
explanation.append("Assumes there are no outliers in the difference between paired values.")
assumptions &= ~Assumptions.NO_OUTLIERS_IN_DIFFERENCE_BETWEEN_VARIABLES
if assumptions & Assumptions.LINEAR_RELATIONSHIP:
explanation.append("Assumes there is a linear relationship between the variables.")
assumptions &= ~Assumptions.LINEAR_RELATIONSHIP
if assumptions & Assumptions.BIVARIATE_NORMAL_VARIABLES:
explanation.append("Assumes the two variables have a bivariate normal distribution.")
assumptions &= ~Assumptions.BIVARIATE_NORMAL_VARIABLES
if assumptions & Assumptions.RELATED_SAMPLES:
explanation.append("Assumes the samples come from related sources (e.g. within subjects).")
assumptions &= ~Assumptions.RELATED_SAMPLES
if assumptions & Assumptions.MONOTONIC_RELATIONSHIP:
explanation.append("Assumes there is a monotonic relationship between the variables.")
assumptions &= ~Assumptions.MONOTONIC_RELATIONSHIP
if assumptions & Assumptions.ALL_VARIABLES_CONTINUOUS_OR_ORDINAL:
explanation.append("Assumes all variables are continuous or ordinal.")
assumptions &= ~Assumptions.ALL_VARIABLES_CONTINUOUS_OR_ORDINAL
if assumptions & Assumptions.DEPENDENT_VARIABLE_CONTINUOUS_OR_ORDINAL:
explanation.append("Assumes the dependent variable is continuous or ordinal.")
assumptions &= ~Assumptions.DEPENDENT_VARIABLE_CONTINUOUS_OR_ORDINAL
assert assumptions == Assumptions.NONE, \
"Not all assumptions have a corresponding explanatory string: %s" % assumptions
return explanation
"""
pass
Empty file.
@@ -1,4 +1,5 @@
import attr
from .combinedData import CombinedData

@attr.s(init=True, auto_attribs=True)
class BivariateData(CombinedData):
Expand Down
@@ -1,5 +1,7 @@
from tea.global_vals import *
from tea.ast import *
import attr
import value
from .value import Value

# CombinedData is the runtime data structure used to unify experimental design and variable declarations
@attr.s(init=True)
Expand Down
File renamed without changes.
@@ -1,5 +1,6 @@
import attr
import value
from .value import Value
from .combinedData import CombinedData

@attr.s(init=True, auto_attribs=True)
class MultivariateData(CombinedData):
Expand Down
@@ -1,5 +1,7 @@
# from tea.z3_solver.solver import __ALL_TESTS__
from tea.runtimeDataStructures.value import Value

import attr
from .solver import __ALL_TESTS__

@attr.s(init=False, repr=False, str=False)
class ResultData(Value):
Expand All @@ -9,22 +11,22 @@ class ResultData(Value):
def __init__(self, test_to_results):
self.test_to_results = test_to_results
self.test_to_assumptions = {}
for test in __ALL_TESTS__:
if test.name in test_to_results:
print(test.name)
test_assumptions = []
# TODO: The names get stale if hypothesize() is called multiple times in a row.
for applied_prop in test._properties:
assumption = f"{applied_prop.property.description}: "
for stat_var in applied_prop.vars:
assumption += f"{stat_var.name}, "

assumption = assumption[:-2]
if applied_prop.property_test_results is not None:
assumption += f": {applied_prop.property_test_results}"
test_assumptions.append(assumption)

self.test_to_assumptions[test.name] = test_assumptions
# for test in __ALL_TESTS__:
# if test.name in test_to_results:
# print(test.name)
# test_assumptions = []
# # TODO: The names get stale if hypothesize() is called multiple times in a row.
# for applied_prop in test._properties:
# assumption = f"{applied_prop.property.description}: "
# for stat_var in applied_prop.vars:
# assumption += f"{stat_var.name}, "

# assumption = assumption[:-2]
# if applied_prop.property_test_results is not None:
# assumption += f": {applied_prop.property_test_results}"
# test_assumptions.append(assumption)

# self.test_to_assumptions[test.name] = test_assumptions

def _pretty_print(self):
output = "\nResults:\n--------------"
Expand Down
File renamed without changes.
@@ -1,5 +1,8 @@
from tea.global_vals import *
from .value import Value
from tea.ast import DataType

import attr
import value

@attr.s(init=True)
class VarData(Value):
Expand Down
2 changes: 0 additions & 2 deletions tea/runtime_data_structures/__init__.py

This file was deleted.

Empty file added tea/z3_solver/__init__.py
Empty file.
14 changes: 7 additions & 7 deletions tea/z3_solver/solver.py
@@ -1,12 +1,12 @@
from tea.global_vals import *
from tea.runtimeDataStructures.dataset import Dataset
from tea.runtimeDataStructures.varData import VarData
from tea.runtimeDataStructures.combinedData import CombinedData
from tea.runtimeDataStructures.bivariateData import BivariateData
from tea.helpers.evaluateHelperMethods import get_data, compute_normal_distribution, compute_eq_variance

import attr
# from z3 import *
import z3
# from z3 import BoolSort
# from z3 import z3.is_true
from tea.dataset import Dataset
from tea.evaluate_data_structures import VarData, CombinedData, BivariateData, MultivariateData
from tea.evaluate_helper_methods import get_data, compute_normal_distribution, compute_eq_variance
from tea.global_vals import *
from typing import Dict, List

# Prog -> List[StatisticalTest] -> Query
Expand Down

0 comments on commit 59b3d1c

Please sign in to comment.