Skip to content

Commit

Permalink
Debug concepts not updating in ConceptGraph. Add sample program test …
Browse files Browse the repository at this point in the history
…case.WIP.
  • Loading branch information
emjun committed Jan 13, 2021
1 parent 1a4535e commit fa86a7a
Show file tree
Hide file tree
Showing 7 changed files with 142 additions and 109 deletions.
147 changes: 95 additions & 52 deletions tests/test_end_to_end.py
Expand Up @@ -6,85 +6,128 @@

class EndToEndTests(unittest.TestCase):

def test_generate_effects_sets(self):
analysis = ts.Tisane(task="explanation") # analysis has one task
# def test_generate_effects_sets(self):
# analysis = ts.Tisane(task="explanation") # analysis has one task

test_score = ts.Concept("Test Score")
intelligence = ts.Concept("Intelligence")
tutoring = ts.Concept("Tutoring")
concepts = [test_score, intelligence, tutoring]
# test_score = ts.Concept("Test Score")
# intelligence = ts.Concept("Intelligence")
# tutoring = ts.Concept("Tutoring")
# concepts = [test_score, intelligence, tutoring]

analysis.addRelationship(intelligence, test_score, "cause")
analysis.addRelationship(tutoring, test_score, "cause")
analysis.addRelationship(intelligence, tutoring, "correlate")
# analysis.addRelationship(intelligence, test_score, "cause")
# analysis.addRelationship(tutoring, test_score, "cause")
# analysis.addRelationship(intelligence, tutoring, "correlate")

effects = analysis.generate_effects_sets(ivs=[intelligence, tutoring], dv=test_score)
# effects = analysis.generate_effects_sets(ivs=[intelligence, tutoring], dv=test_score)

# check total number of effect sets
self.assertEqual(len(effects), 7)
# check each effect set is valid
for es in effects:
es_dict = es.to_dict()
self.assertTrue(es_dict in DataForTests.expected_effects_set)
# # check total number of effect sets
# self.assertEqual(len(effects), 7)
# # check each effect set is valid
# for es in effects:
# es_dict = es.to_dict()
# self.assertTrue(es_dict in DataForTests.expected_effects_set)


def test_effects_ASP(self):
# def test_effects_ASP(self):
# analysis = ts.Tisane(task="explanation")

# # Add concepts
# test_score = ts.Concept("Test Score")
# intelligence = ts.Concept("Intelligence")
# tutoring = ts.Concept("Tutoring")
# concepts = [test_score, intelligence, tutoring]

# # Add relationships
# analysis.addRelationship(intelligence, test_score, "cause")
# analysis.addRelationship(tutoring, test_score, "cause")
# analysis.addRelationship(intelligence, tutoring, "correlate")

# # Specify data (schema)
# test_score.specifyData(dtype="numeric")
# intelligence.specifyData(dtype="numeric")
# tutoring.specifyData(dtype="nominal", categories=["After school", "Before school"])
# # tutoring.specifyData(dtype="nominal", categories=["After school", "Before school", "None"])

# # Get valid statistical models
# # Assert statistical properties needed for linear regression
# # Generate effects sets
# effects = analysis.generate_effects_sets(ivs=[intelligence, tutoring], dv=test_score)

# # Add individual variable assertions
# # Some assertions are inferred from data schema, see above
# self.assertTrue(test_score.getVariable().has_property_value(prop="dtype", val="numeric"))
# self.assertTrue(intelligence.getVariable().has_property_value(prop="dtype", val="numeric"))
# self.assertTrue(tutoring.getVariable().has_property_value(prop="dtype", val="nominal"))

# self.assertFalse(test_score.getVariable().has_property(prop="cardinality"))
# self.assertFalse(intelligence.getVariable().has_property(prop="cardinality"))
# self.assertTrue(tutoring.getVariable().has_property_value(prop="cardinality", val="binary"))

# # Add assertions that pertain to effects sets
# linear_reg_es = None
# for es in effects:
# if es.to_dict() == DataForTests.expected_effects_set[2]:
# linear_reg_es = es
# break
# linear_reg_es.assert_property(prop="tolerate_correlation", val=True)

# # Convert linear regression model EffectSet to statistical model
# linear_reg_sm = StatisticalModel.create(model_type='linear_regression', effect_set=linear_reg_es)

# # Add assertions that pertain to the models
# # Note: Make assertions that will involve interaction (interactions compile to these statements)
# # Note: These assertions should be made on the *MODEL* not the *Effect Set*
# linear_reg_sm.get_residuals().assert_property(prop="distribution", val="normal")
# linear_reg_sm.get_residuals().assert_property(prop="homoscedastic", val=True)

# # TODO: Compile all these assertions into constraints
# # Should be in KnowledgeBase class or main Tisane class?

# # Query KB for statistical models
# analysis.start_model(linear_reg_es)

def test_sample_tisane_program(self):
analysis = ts.Tisane(task="explanation")

### PHASE 0: CONCEPTUAL RELATIONSHIPS
# Add concepts
test_score = ts.Concept("Test Score")
intelligence = ts.Concept("Intelligence")
tutoring = ts.Concept("Tutoring")
concepts = [test_score, intelligence, tutoring]


# Add relationships
analysis.addRelationship(intelligence, test_score, "cause")
analysis.addRelationship(tutoring, test_score, "cause")
analysis.addRelationship(intelligence, tutoring, "correlate")


# Specify data (schema)
test_score.specifyData(dtype="numeric")
intelligence.specifyData(dtype="numeric")
tutoring.specifyData(dtype="nominal", categories=["After school", "Before school"])
# tutoring.specifyData(dtype="nominal", categories=["After school", "Before school", "None"])

# Get valid statistical models
# Assert statistical properties needed for linear regression
### PHASE 1: IV, DV SPECIFICATION
### PHASE 2: EFFECTS SETS GENERATION
# Generate effects sets
effects = analysis.generate_effects_sets(ivs=[intelligence, tutoring], dv=test_score)

# Add individual variable assertions
# Some assertions are inferred from data schema, see above
self.assertTrue(test_score.getVariable().has_property_value(prop="dtype", val="numeric"))
self.assertTrue(intelligence.getVariable().has_property_value(prop="dtype", val="numeric"))
self.assertTrue(tutoring.getVariable().has_property_value(prop="dtype", val="nominal"))

self.assertFalse(test_score.getVariable().has_property(prop="cardinality"))
self.assertFalse(intelligence.getVariable().has_property(prop="cardinality"))
self.assertTrue(tutoring.getVariable().has_property_value(prop="cardinality", val="binary"))
### PHASE 3A: ASSERTIONS ABOUT VARIABLES AND SETS OF VARIABLES
# Specify data (schema) --> generates assertions about variables/data
# If have data: automatically detect and verify these based on uploaded data
test_score.specifyData(dtype="numeric")
intelligence.specifyData(dtype="numeric")
tutoring.specifyData(dtype="nominal", categories=["After school", "Before school"])

# Add assertions that pertain to effects sets
# THIS MIMICS END-USERS SELECTING A SET OF EFFECTS TO MODEL
linear_reg_es = None
for es in effects:
if es.to_dict() == DataForTests.expected_effects_set[2]:
linear_reg_es = es
break
# Add assertions that pertain to effects sets
linear_reg_es.assert_property(prop="tolerate_correlation", val=True)

# Convert linear regression model EffectSet to statistical model
linear_reg_sm = StatisticalModel.create(model_type='linear_regression', effect_set=linear_reg_es)

# Add assertions that pertain to the models
# Note: Make assertions that will involve interaction (interactions compile to these statements)
# Note: These assertions should be made on the *MODEL* not the *Effect Set*
linear_reg_sm.get_residuals().assert_property(prop="distribution", val="normal")
linear_reg_sm.get_residuals().assert_property(prop="homoscedastic", val=True)

# TODO: Compile all these assertions into constraints
# Should be in KnowledgeBase class or main Tisane class?

# Query KB for statistical models
analysis.start_model(linear_reg_es)
linear_reg_es.assert_property(prop="normal_residuals", val=True)
linear_reg_es.assert_property(prop="homoscedastic_residuals", val=True)

# PHASE 3B: QUERYING KNOWLEDGE BASE
# TODO: TEST INCREMENTAL SOLVING ASPECT
valid_models = analysis.start_model(effect_set=linear_reg_es)
print(valid_models) # "Linear Regression"

class DataForTests:
test_score = ts.Concept("Test Score")
Expand Down
7 changes: 4 additions & 3 deletions tisane/asp/generic_constraints.lp
@@ -1,4 +1,5 @@
normal_residuals(X, Y) :- variable(X), variable(Y), numeric_or_categorical(X), numeric_or_categorical(Y).
normal_residuals(X, Y) :- variable(X), variable(Y).
%, numeric_or_categorical(X), numeric_or_categorical(Y).
numeric(V) :- variable(V), not categorical(V).
two_categories(V) :- variable(V), categorical(V).
transformed(V) :- variable(V).
Expand All @@ -10,7 +11,7 @@ linear_regression(X, Y) :- variable(X),
homoscedastic_residuals(X, Y),
not multicollinear(X, Y),
numeric(Y),
numeric_or_categorical(X),
% numeric_or_categorical(X),
transformed(X),
transformed(Y).

Expand All @@ -21,7 +22,7 @@ logistic_regression(X, Y) :- variable(X),
homoscedastic_residuals(X, Y),
not multicollinear(X, Y),
two_categories(Y),
numeric_or_categorical(X),
% numeric_or_categorical(X),
transformed(X),
transformed(Y).

Expand Down
8 changes: 4 additions & 4 deletions tisane/asp/knowledge_base.py
Expand Up @@ -20,7 +20,7 @@ def absolute_path(p: str) -> str:

def format_concept_variable_constraint(concept: Concept, key: str, val: str):
c_name = concept.getVariableName()

## Variable constraints
if key.upper() == 'DTYPE':
if val == 'numeric':
Expand Down Expand Up @@ -113,7 +113,6 @@ def generate_constraints(self, name: str, ivs: List[Concept], dv: List[Concept])
# Are there any digits in the line indicating arity?
if re.search(r'[1-9]\D', line):
clauses = line.split("/")
import pdb; pdb.set_trace()
new_line = clauses[0] + "/" + str(len(ivs) + len(dv)) + ".\n"
else:
new_line = line
Expand Down Expand Up @@ -235,7 +234,7 @@ def get_concept_constraints(self, concept: Concept):
# add constraints that ground the variables
c_name = concept.name.lower().replace(' ', '_')
assertions.append(f'variable({c_name}).')

# add constraints that pertain to properties of the variables
if concept.has_assertions():
assert_dict = concept.get_assertions()
Expand Down Expand Up @@ -267,7 +266,8 @@ def query(self, file_name: str, assertions: list):

# Read file in as a string
constraints = None
with open(file_name, 'r') as f:
file_abs_path = absolute_path(file_name)
with open(file_abs_path, 'r') as f:
constraints = f.read()

# Add assertions to read-in file
Expand Down
19 changes: 19 additions & 0 deletions tisane/asp/specific_constraints_test0.lp
@@ -0,0 +1,19 @@
normal_residuals(X0, X1, Y) :- variable(X0), variable(X1), variable(Y).
%, numeric_or_categorical(X), numeric_or_categorical(Y).
numeric(V) :- variable(V), not categorical(V).
two_categories(V) :- variable(V), categorical(V).
transformed(V) :- variable(V).
multicollinear(X0, X1, Y) :- variable(X0), variable(X1), variable(Y), not tolerate_correlation(X0, X1).

linear_regression(X0, X1, Y) :- variable(X0), variable(X1), variable(Y), normal_residuals(X0, X1, Y), homoscedastic_residuals(X0, X1, Y), not multicollinear(X0, X1, Y), numeric(Y), %numeric_or_categorical(X),
transformed(X0), transformed(X1), transformed(Y).


logistic_regression(X0, X1, Y) :- variable(X0), variable(X1), variable(Y), normal_residuals(X0, X1, Y), homoscedastic_residuals(X0, X1, Y), not multicollinear(X0, X1, Y), two_categories(Y), %numeric_or_categorical(X),
transformed(X0), transformed(X1), transformed(Y).

%%%%%% CONTROL OUTPUT %%%%%%
#show 0.

#show linear_regression/3.
#show logistic_regression/3.
40 changes: 4 additions & 36 deletions tisane/asp/test_constraints.lp
@@ -1,4 +1,5 @@
normal_residuals(X, Y) :- variable(X), variable(Y), numeric_or_categorical(X), numeric_or_categorical(Y).
normal_residuals(X, Y) :- variable(X), variable(Y).
%, numeric_or_categorical(X), numeric_or_categorical(Y).
numeric(V) :- variable(V), not categorical(V).
two_categories(V) :- variable(V), categorical(V).
transformed(V) :- variable(V).
Expand All @@ -10,7 +11,7 @@ linear_regression(X, Y) :- variable(X),
homoscedastic_residuals(X, Y),
not multicollinear(X, Y),
numeric(Y),
numeric_or_categorical(X),
%numeric_or_categorical(X),
transformed(X),
transformed(Y).

Expand All @@ -21,42 +22,9 @@ logistic_regression(X, Y) :- variable(X),
homoscedastic_residuals(X, Y),
not multicollinear(X, Y),
two_categories(Y),
numeric_or_categorical(X),
%numeric_or_categorical(X),
transformed(X),
transformed(Y).
variable(intelligence). % unsafe !!!
variable(score).
normal_residuals(intelligence, score).
homoscedastic_residuals(intelligence, score).
tolerate_correlation(intelligence).
%numeric(score).
categorical(score).
two_categories(score).
numeric_or_categorical(intelligence).
transformed(intelligence).
transformed(score).

%* FOR TESTING AND DEVELOPMENT
variable(intelligence).
variable(tutoring).
variable(score).
normal_residuals(intelligence, tutoring, score).
homoscedastic_residuals(intelligence, tutoring, score).
tolerate_correlation(intelligence, tutoring).
%numeric(score).
numeric(intelligence).

numeric_or_categorical(intelligence).
numeric_or_categorical(tutoring).
numeric_or_categorical(score).

categorical(tutoring).
categorical(score).
two_categories(score).
%numeric_or_categorical(intelligence).
transformed(intelligence).
transformed(score).
*%

%%%%%% CONTROL OUTPUT %%%%%%
#show 0.
Expand Down
7 changes: 6 additions & 1 deletion tisane/concept_graph.py
Expand Up @@ -30,11 +30,12 @@ class ConceptGraph(object):
# dict of concepts in the _graph.
# We use this rather than store concepts directly in the graph because Python passes-by-object-reference.
# This means that the Concepts in ConceptGraph will reflect changes made to the Concept objects externally
# _concepts : Dict[Concept]
# _concepts : Dict[str, Concept]


def __init__(self):
self._graph = nx.MultiDiGraph()
# self._concepts = dict()

def __repr__(self):
return str(self._graph.__dict__)
Expand All @@ -52,6 +53,7 @@ def addNode(self, con: Concept): # concepts are indexed by their names. Concept
if not self._graph:
self._graph = nx.MultiDiGraph()
self._graph.add_node(con.name, concept=con)
# self._concepts[con.name] = con

def addEdge(self, start_con: Concept, end_con: Concept, edge_type: str):
start_node = None
Expand Down Expand Up @@ -85,9 +87,12 @@ def getConceptNode(self, con: Concept):

# @returns Concept with concept_name in this conceptual graph
def getConcept(self, concept_name: str) -> Concept:
# import pdb; pdb.set_trace()

for n in self._graph.nodes('concept'):
if n[0] == concept_name:
assert(isinstance(n[1], Concept))
# return self._concepts[n[0]]
return n[1]

return None
Expand Down

0 comments on commit fa86a7a

Please sign in to comment.