Debug concepts not updating in ConceptGraph. Add sample program test …

…case.WIP.
emjun · Jan 13, 2021 · fa86a7a · fa86a7a
1 parent 1a4535e
commit fa86a7a
Show file tree

Hide file tree

Showing 7 changed files with 142 additions and 109 deletions.
diff --git a/tests/test_end_to_end.py b/tests/test_end_to_end.py
@@ -6,85 +6,128 @@
 
 class EndToEndTests(unittest.TestCase): 
 
-    def test_generate_effects_sets(self):
-        analysis = ts.Tisane(task="explanation") # analysis has one task
+    # def test_generate_effects_sets(self):
+    #     analysis = ts.Tisane(task="explanation") # analysis has one task
 
-        test_score = ts.Concept("Test Score")
-        intelligence = ts.Concept("Intelligence")
-        tutoring = ts.Concept("Tutoring")
-        concepts = [test_score, intelligence, tutoring]
+    #     test_score = ts.Concept("Test Score")
+    #     intelligence = ts.Concept("Intelligence")
+    #     tutoring = ts.Concept("Tutoring")
+    #     concepts = [test_score, intelligence, tutoring]
 
-        analysis.addRelationship(intelligence, test_score, "cause")
-        analysis.addRelationship(tutoring, test_score, "cause")
-        analysis.addRelationship(intelligence, tutoring, "correlate")
+    #     analysis.addRelationship(intelligence, test_score, "cause")
+    #     analysis.addRelationship(tutoring, test_score, "cause")
+    #     analysis.addRelationship(intelligence, tutoring, "correlate")
 
-        effects = analysis.generate_effects_sets(ivs=[intelligence, tutoring], dv=test_score)
+    #     effects = analysis.generate_effects_sets(ivs=[intelligence, tutoring], dv=test_score)
 
-        # check total number of effect sets 
-        self.assertEqual(len(effects), 7)
-        # check each effect set is valid
-        for es in effects: 
-            es_dict = es.to_dict()
-            self.assertTrue(es_dict in DataForTests.expected_effects_set)
+    #     # check total number of effect sets 
+    #     self.assertEqual(len(effects), 7)
+    #     # check each effect set is valid
+    #     for es in effects: 
+    #         es_dict = es.to_dict()
+    #         self.assertTrue(es_dict in DataForTests.expected_effects_set)
 
 
-    def test_effects_ASP(self):
+    # def test_effects_ASP(self):
+    #     analysis = ts.Tisane(task="explanation")
+
+    #     # Add concepts
+    #     test_score = ts.Concept("Test Score")
+    #     intelligence = ts.Concept("Intelligence")
+    #     tutoring = ts.Concept("Tutoring")
+    #     concepts = [test_score, intelligence, tutoring]
+
+    #     # Add relationships
+    #     analysis.addRelationship(intelligence, test_score, "cause")
+    #     analysis.addRelationship(tutoring, test_score, "cause")
+    #     analysis.addRelationship(intelligence, tutoring, "correlate")
+
+    #     # Specify data (schema)
+    #     test_score.specifyData(dtype="numeric")
+    #     intelligence.specifyData(dtype="numeric")
+    #     tutoring.specifyData(dtype="nominal", categories=["After school", "Before school"])
+    #     # tutoring.specifyData(dtype="nominal", categories=["After school", "Before school", "None"])
+
+    #     # Get valid statistical models
+    #     # Assert statistical properties needed for linear regression
+    #     # Generate effects sets
+    #     effects = analysis.generate_effects_sets(ivs=[intelligence, tutoring], dv=test_score)
+
+    #     # Add individual variable assertions
+    #     # Some assertions are inferred from data schema, see above
+    #     self.assertTrue(test_score.getVariable().has_property_value(prop="dtype", val="numeric"))
+    #     self.assertTrue(intelligence.getVariable().has_property_value(prop="dtype", val="numeric"))
+    #     self.assertTrue(tutoring.getVariable().has_property_value(prop="dtype", val="nominal"))
+
+    #     self.assertFalse(test_score.getVariable().has_property(prop="cardinality"))
+    #     self.assertFalse(intelligence.getVariable().has_property(prop="cardinality"))
+    #     self.assertTrue(tutoring.getVariable().has_property_value(prop="cardinality", val="binary"))
+
+    #     # Add assertions that pertain to effects sets
+    #     linear_reg_es = None
+    #     for es in effects: 
+    #         if es.to_dict() == DataForTests.expected_effects_set[2]: 
+    #             linear_reg_es = es
+    #             break
+    #     linear_reg_es.assert_property(prop="tolerate_correlation", val=True)
+
+    #     # Convert linear regression model EffectSet to statistical model
+    #     linear_reg_sm = StatisticalModel.create(model_type='linear_regression', effect_set=linear_reg_es)
+
+    #     # Add assertions that pertain to the models
+    #     # Note: Make assertions that will involve interaction (interactions compile to these statements)
+    #     # Note: These assertions should be made on the *MODEL* not the *Effect Set*
+    #     linear_reg_sm.get_residuals().assert_property(prop="distribution", val="normal")
+    #     linear_reg_sm.get_residuals().assert_property(prop="homoscedastic", val=True)
+
+    #     # TODO: Compile all these assertions into constraints
+    #     # Should be in KnowledgeBase class or main Tisane class?
+
+    #     # Query KB for statistical models
+    #     analysis.start_model(linear_reg_es) 
+
+    def test_sample_tisane_program(self): 
         analysis = ts.Tisane(task="explanation")
 
+        ### PHASE 0: CONCEPTUAL RELATIONSHIPS
         # Add concepts
         test_score = ts.Concept("Test Score")
         intelligence = ts.Concept("Intelligence")
         tutoring = ts.Concept("Tutoring")
-        concepts = [test_score, intelligence, tutoring]
-
+
         # Add relationships
         analysis.addRelationship(intelligence, test_score, "cause")
         analysis.addRelationship(tutoring, test_score, "cause")
         analysis.addRelationship(intelligence, tutoring, "correlate")
+
 
-        # Specify data (schema)
-        test_score.specifyData(dtype="numeric")
-        intelligence.specifyData(dtype="numeric")
-        tutoring.specifyData(dtype="nominal", categories=["After school", "Before school"])
-        # tutoring.specifyData(dtype="nominal", categories=["After school", "Before school", "None"])
-
-        # Get valid statistical models
-        # Assert statistical properties needed for linear regression
+        ### PHASE 1: IV, DV SPECIFICATION
+        ### PHASE 2: EFFECTS SETS GENERATION 
         # Generate effects sets
         effects = analysis.generate_effects_sets(ivs=[intelligence, tutoring], dv=test_score)
 
-        # Add individual variable assertions
-        # Some assertions are inferred from data schema, see above
-        self.assertTrue(test_score.getVariable().has_property_value(prop="dtype", val="numeric"))
-        self.assertTrue(intelligence.getVariable().has_property_value(prop="dtype", val="numeric"))
-        self.assertTrue(tutoring.getVariable().has_property_value(prop="dtype", val="nominal"))
-
-        self.assertFalse(test_score.getVariable().has_property(prop="cardinality"))
-        self.assertFalse(intelligence.getVariable().has_property(prop="cardinality"))
-        self.assertTrue(tutoring.getVariable().has_property_value(prop="cardinality", val="binary"))
+        ### PHASE 3A: ASSERTIONS ABOUT VARIABLES AND SETS OF VARIABLES
+        # Specify data (schema) --> generates assertions about variables/data
+        # If have data: automatically detect and verify these based on uploaded data
+        test_score.specifyData(dtype="numeric")
+        intelligence.specifyData(dtype="numeric")
+        tutoring.specifyData(dtype="nominal", categories=["After school", "Before school"])
 
-        # Add assertions that pertain to effects sets
+        # THIS MIMICS END-USERS SELECTING A SET OF EFFECTS TO MODEL 
         linear_reg_es = None
         for es in effects: 
             if es.to_dict() == DataForTests.expected_effects_set[2]: 
                 linear_reg_es = es
                 break
+        # Add assertions that pertain to effects sets
         linear_reg_es.assert_property(prop="tolerate_correlation", val=True)
-
-        # Convert linear regression model EffectSet to statistical model
-        linear_reg_sm = StatisticalModel.create(model_type='linear_regression', effect_set=linear_reg_es)
-
-        # Add assertions that pertain to the models
-        # Note: Make assertions that will involve interaction (interactions compile to these statements)
-        # Note: These assertions should be made on the *MODEL* not the *Effect Set*
-        linear_reg_sm.get_residuals().assert_property(prop="distribution", val="normal")
-        linear_reg_sm.get_residuals().assert_property(prop="homoscedastic", val=True)
-
-        # TODO: Compile all these assertions into constraints
-        # Should be in KnowledgeBase class or main Tisane class?
-
-        # Query KB for statistical models
-        analysis.start_model(linear_reg_es) 
+        linear_reg_es.assert_property(prop="normal_residuals", val=True)
+        linear_reg_es.assert_property(prop="homoscedastic_residuals", val=True)
+
+        # PHASE 3B: QUERYING KNOWLEDGE BASE
+        # TODO: TEST INCREMENTAL SOLVING ASPECT
+        valid_models = analysis.start_model(effect_set=linear_reg_es) 
+        print(valid_models) # "Linear Regression"
 
 class DataForTests: 
     test_score = ts.Concept("Test Score")

diff --git a/tisane/asp/generic_constraints.lp b/tisane/asp/generic_constraints.lp
@@ -1,4 +1,5 @@
-normal_residuals(X, Y) :- variable(X), variable(Y), numeric_or_categorical(X), numeric_or_categorical(Y).
+normal_residuals(X, Y) :- variable(X), variable(Y).
+%, numeric_or_categorical(X), numeric_or_categorical(Y).
 numeric(V) :- variable(V), not categorical(V).
 two_categories(V) :- variable(V), categorical(V).
 transformed(V) :- variable(V).
@@ -10,7 +11,7 @@ linear_regression(X, Y) :-  variable(X),
                             homoscedastic_residuals(X, Y), 
                             not multicollinear(X, Y), 
                             numeric(Y), 
-                            numeric_or_categorical(X), 
+                            % numeric_or_categorical(X), 
                             transformed(X), 
                             transformed(Y).
 
@@ -21,7 +22,7 @@ logistic_regression(X, Y) :-    variable(X),
                                 homoscedastic_residuals(X, Y), 
                                 not multicollinear(X, Y), 
                                 two_categories(Y), 
-                                numeric_or_categorical(X), 
+                                % numeric_or_categorical(X), 
                                 transformed(X), 
                                 transformed(Y).
 

diff --git a/tisane/asp/knowledge_base.py b/tisane/asp/knowledge_base.py
@@ -20,7 +20,7 @@ def absolute_path(p: str) -> str:
 
 def format_concept_variable_constraint(concept: Concept, key: str, val: str): 
             c_name = concept.getVariableName()
-            
+
             ## Variable constraints
             if key.upper() == 'DTYPE': 
                 if val == 'numeric': 
@@ -113,7 +113,6 @@ def generate_constraints(self, name: str, ivs: List[Concept], dv: List[Concept])
                         # Are there any digits in the line indicating arity?
                         if re.search(r'[1-9]\D', line):
                             clauses = line.split("/")
-                            import pdb; pdb.set_trace()
                             new_line = clauses[0] + "/" + str(len(ivs) + len(dv)) + ".\n"
                         else: 
                             new_line = line
@@ -235,7 +234,7 @@ def get_concept_constraints(self, concept: Concept):
         # add constraints that ground the variables
         c_name = concept.name.lower().replace(' ', '_') 
         assertions.append(f'variable({c_name}).')
-        
+
         # add constraints that pertain to properties of the variables
         if concept.has_assertions(): 
             assert_dict = concept.get_assertions()
@@ -267,7 +266,8 @@ def query(self, file_name: str, assertions: list):
 
         # Read file in as a string
         constraints = None
-        with open(file_name, 'r') as f:
+        file_abs_path = absolute_path(file_name)
+        with open(file_abs_path, 'r') as f:
             constraints = f.read()
 
         # Add assertions to read-in file

diff --git a/tisane/asp/specific_constraints_test0.lp b/tisane/asp/specific_constraints_test0.lp
@@ -0,0 +1,19 @@
+normal_residuals(X0, X1, Y) :- variable(X0), variable(X1), variable(Y).
+%, numeric_or_categorical(X), numeric_or_categorical(Y).
+numeric(V) :- variable(V), not categorical(V).
+two_categories(V) :- variable(V), categorical(V).
+transformed(V) :- variable(V).
+multicollinear(X0, X1, Y) :- variable(X0), variable(X1), variable(Y), not tolerate_correlation(X0, X1).
+
+linear_regression(X0, X1, Y) :- variable(X0), variable(X1), variable(Y), normal_residuals(X0, X1, Y), homoscedastic_residuals(X0, X1, Y), not multicollinear(X0, X1, Y), numeric(Y),                             %numeric_or_categorical(X), 
+transformed(X0), transformed(X1), transformed(Y).
+
+
+logistic_regression(X0, X1, Y) :- variable(X0), variable(X1), variable(Y), normal_residuals(X0, X1, Y), homoscedastic_residuals(X0, X1, Y), not multicollinear(X0, X1, Y), two_categories(Y),                                 %numeric_or_categorical(X), 
+transformed(X0), transformed(X1), transformed(Y).
+
+%%%%%% CONTROL OUTPUT %%%%%%
+#show 0. 
+
+#show linear_regression/3.
+#show logistic_regression/3.
diff --git a/tisane/asp/test_constraints.lp b/tisane/asp/test_constraints.lp
@@ -1,4 +1,5 @@
-normal_residuals(X, Y) :- variable(X), variable(Y), numeric_or_categorical(X), numeric_or_categorical(Y).
+normal_residuals(X, Y) :- variable(X), variable(Y).
+%, numeric_or_categorical(X), numeric_or_categorical(Y).
 numeric(V) :- variable(V), not categorical(V).
 two_categories(V) :- variable(V), categorical(V).
 transformed(V) :- variable(V).
@@ -10,7 +11,7 @@ linear_regression(X, Y) :-  variable(X),
                             homoscedastic_residuals(X, Y), 
                             not multicollinear(X, Y), 
                             numeric(Y), 
-                            numeric_or_categorical(X), 
+                            %numeric_or_categorical(X), 
                             transformed(X), 
                             transformed(Y).
 
@@ -21,42 +22,9 @@ logistic_regression(X, Y) :-    variable(X),
                                 homoscedastic_residuals(X, Y), 
                                 not multicollinear(X, Y), 
                                 two_categories(Y), 
-                                numeric_or_categorical(X), 
+                                %numeric_or_categorical(X), 
                                 transformed(X), 
                                 transformed(Y).
-variable(intelligence). % unsafe !!!
-variable(score). 
-normal_residuals(intelligence, score). 
-homoscedastic_residuals(intelligence, score). 
-tolerate_correlation(intelligence).
-%numeric(score).
-categorical(score).
-two_categories(score). 
-numeric_or_categorical(intelligence). 
-transformed(intelligence). 
-transformed(score). 
-
-%* FOR TESTING AND DEVELOPMENT
-variable(intelligence). 
-variable(tutoring).
-variable(score).
-normal_residuals(intelligence, tutoring, score).
-homoscedastic_residuals(intelligence, tutoring, score).
-tolerate_correlation(intelligence, tutoring).
-%numeric(score).
-numeric(intelligence).
-
-numeric_or_categorical(intelligence). 
-numeric_or_categorical(tutoring). 
-numeric_or_categorical(score). 
-
-categorical(tutoring).
-categorical(score).
-two_categories(score).
-%numeric_or_categorical(intelligence).
-transformed(intelligence).
-transformed(score).
-*%
 
 %%%%%% CONTROL OUTPUT %%%%%%
 #show 0. 

diff --git a/tisane/concept_graph.py b/tisane/concept_graph.py
@@ -30,11 +30,12 @@ class ConceptGraph(object):
     # dict of concepts in the _graph.
     # We use this rather than store concepts directly in the graph because Python passes-by-object-reference.
     # This means that the Concepts in ConceptGraph will reflect changes made to the Concept objects externally
-    # _concepts : Dict[Concept]
+    # _concepts : Dict[str, Concept]
 
 
     def __init__(self): 
         self._graph = nx.MultiDiGraph()
+        # self._concepts = dict()
 
     def __repr__(self): 
         return str(self._graph.__dict__)
@@ -52,6 +53,7 @@ def addNode(self, con: Concept):  # concepts are indexed by their names. Concept
         if not self._graph: 
             self._graph = nx.MultiDiGraph()
         self._graph.add_node(con.name, concept=con)
+        # self._concepts[con.name] = con
 
     def addEdge(self, start_con: Concept, end_con: Concept, edge_type: str): 
         start_node = None
@@ -85,9 +87,12 @@ def getConceptNode(self, con: Concept):
 
     # @returns Concept with concept_name in this conceptual graph
     def getConcept(self, concept_name: str) -> Concept: 
+        # import pdb; pdb.set_trace()
+
         for n in self._graph.nodes('concept'): 
             if n[0] == concept_name: 
                 assert(isinstance(n[1], Concept))
+                # return self._concepts[n[0]]
                 return n[1]
 
         return None