cpd arguments should match case of parents

causalincentives · Dec 21, 2021 · e50ee06 · e50ee06
1 parent b48c74d
commit e50ee06
Show file tree

Hide file tree

Showing 19 changed files with 4,282 additions and 4,162 deletions.
diff --git a/README.md b/README.md
@@ -38,7 +38,7 @@ cid = pycid.CID([
 # specify the causal relationships with CPDs using keyword arguments
 cid.add_cpds(S = pycid.discrete_uniform([-1, 1]), # S is -1 or 1 with equal probability
              D=[-1, 1], # the permitted action choices for D are -1 and 1
-             U=lambda s, d: s * d) # U is the product of S and D (arguments lowercase the variable names)
+             U=lambda S, D: S * D) # U is the product of S and D (argument names match parent names)
 
 # Draw the result
 cid.draw()

diff --git a/notebooks/CID_Basics_Tutorial.ipynb b/notebooks/CID_Basics_Tutorial.ipynb
diff --git a/notebooks/CID_Incentives_Tutorial.ipynb b/notebooks/CID_Incentives_Tutorial.ipynb
diff --git a/notebooks/MACID_Basics_Tutorial.ipynb b/notebooks/MACID_Basics_Tutorial.ipynb
diff --git a/notebooks/PyCID basic example.ipynb b/notebooks/PyCID basic example.ipynb
diff --git a/notebooks/Why_fair_labels_may_yield_unfair_models_AAAI_22.ipynb b/notebooks/Why_fair_labels_may_yield_unfair_models_AAAI_22.ipynb
@@ -68,7 +68,7 @@
         "import networkx as nx\n",
         "import random"
       ],
-      "execution_count": 1,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -109,7 +109,7 @@
         "  \"\"\"ITV is possible under P-admissible loss\"\"\"\n",
         "  return theorem9_gc(cid) and not 'A' in cid.get_parents(list(cid.decisions)[0]) and cid.is_active_trail('A', 'U', [list(cid.decisions)[0]] + cid.get_parents(list(cid.decisions)[0]))"
       ],
-      "execution_count": 2,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -132,7 +132,7 @@
       "metadata": {
         "id": "0RCzeQhGlaPz"
       },
-      "execution_count": 3,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -168,9 +168,9 @@
         "\n",
         "hiring.add_cpds(\n",
         "    A = pycid.bernoulli(0.5),\n",
-        "    D = lambda a: pycid.noisy_copy(a, 0.8),\n",
-        "    Y = lambda d: pycid.bernoulli(0.49 + 0.02*d),\n",
-        "    U = lambda y, yh: int(y==yh),\n",
+        "    D = lambda A: pycid.noisy_copy(A, 0.8),\n",
+        "    Y = lambda D: pycid.bernoulli(0.49 + 0.02*D),\n",
+        "    U = lambda Y, Yh: int(Y==Yh),\n",
         "    Yh = [0, 1],\n",
         ")\n",
         "print(hiring.check_model())\n",
@@ -185,7 +185,7 @@
         "id": "0g2o4MFqli5q",
         "outputId": "2fbe277c-148e-4aa7-d626-6000ebf0ecb7"
       },
-      "execution_count": 4,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -251,18 +251,18 @@
         "music_cid.add_cpds(\n",
         "      A = pycid.bernoulli(0.5),\n",
         "      M = pycid.bernoulli(0.5),\n",
-        "      T = lambda a, m: {0: None,  # this becomes 1-P(T=1)\n",
-        "                        1: (0.05 if m==0 else \n",
-        "                            0.9  if m==1 and a==1 else\n",
+        "      T = lambda A, M: {0: None,  # this becomes 1-P(T=1)\n",
+        "                        1: (0.05 if M==0 else \n",
+        "                            0.9  if M==1 and A==1 else\n",
         "                            1)},\n",
-        "      Y = lambda m: pycid.noisy_copy(m, 0.95),\n",
-        "      U = lambda y, yh: -(yh-y)**2,\n",
+        "      Y = lambda M: pycid.noisy_copy(M, 0.95),\n",
+        "      U = lambda Y, Yh: -(Yh-Y)**2,\n",
         "      Yh = [0, 0.25, 0.5, 0.75, 1]\n",
         "  )\n",
         "music_cid.draw()\n",
         "assess_unfairness(music_cid)"
       ],
-      "execution_count": 5,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "display_data",
@@ -313,12 +313,12 @@
         "music_a_feature.add_cpds(\n",
         "      A = pycid.bernoulli(0.5),\n",
         "      M = pycid.bernoulli(0.5),\n",
-        "      T = lambda a, m: {0: None,  # this becomes 1-P(T=1)\n",
-        "                        1: (0.05 if m==0 else \n",
-        "                            0.9  if m==1 and a==1 else\n",
+        "      T = lambda A, M: {0: None,  # this becomes 1-P(T=1)\n",
+        "                        1: (0.05 if M==0 else \n",
+        "                            0.9  if M==1 and A==1 else\n",
         "                            1)},\n",
-        "      Y = lambda m: pycid.noisy_copy(m, 0.95),\n",
-        "      U = lambda y, yh: int(y == yh),\n",
+        "      Y = lambda M: pycid.noisy_copy(M, 0.95),\n",
+        "      U = lambda Y, Yh: int(Y == Yh),\n",
         "      Yh = [0, 1]\n",
         "  )\n",
         "music_a_feature.draw()\n",
@@ -332,7 +332,7 @@
         "id": "muy3bNXdQ8z7",
         "outputId": "08a2632b-bd34-44de-fac7-a93acf592d1e"
       },
-      "execution_count": 6,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "display_data",
@@ -385,7 +385,7 @@
         "id": "9l0xrtCCWD_J",
         "outputId": "de8588f5-05f3-477c-fec5-a5a99e01f1ed"
       },
-      "execution_count": 7,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -454,7 +454,7 @@
         "    # add random cpds and a 0-1 loss function\n",
         "    cid.add_cpds(\n",
         "        Yh = [0, 1],\n",
-        "        U = lambda yh, y: int(yh==y),  # 0-1 loss function\n",
+        "        U = lambda Yh, Y: int(Yh==Y),  # 0-1 loss function\n",
         "        A = pycid.random_cpd.RandomCPD(),\n",
         "        Y = pycid.random_cpd.RandomCPD(),\n",
         "      )\n",
@@ -463,7 +463,7 @@
         "\n",
         "    return cid"
       ],
-      "execution_count": 8,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -492,7 +492,7 @@
         "cid.draw()\n",
         "assess_unfairness(cid)"
       ],
-      "execution_count": 9,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "display_data",
@@ -554,7 +554,7 @@
         "    else:\n",
         "      continue  # if the CID doesn't satisfy the graphical condition, we resample"
       ],
-      "execution_count": 10,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -612,7 +612,7 @@
         "    else:\n",
         "      continue  # if the CID doesn't satisfy the graphical condition, we resample"
       ],
-      "execution_count": 11,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -650,8 +650,8 @@
       "source": [
         ""
       ],
-      "execution_count": 11,
+      "execution_count": null,
       "outputs": []
     }
   ]
-}
+}
diff --git a/notebooks/fairness.ipynb b/notebooks/fairness.ipynb
diff --git a/pycid/core/causal_bayesian_network.py b/pycid/core/causal_bayesian_network.py
@@ -90,15 +90,6 @@ def __init__(self, edges: Iterable[Tuple[str, str]]):
         self.model = self.Model(self)
         super().__init__(ebunch=edges)
 
-        self._lowercase_to_variable: Dict[str, str] = {}
-        for node in self.nodes:
-            if node.lower() in self._lowercase_to_variable:
-                raise ValueError(
-                    f'Name conflict: Both "{node}" and "{self._lowercase_to_variable[node.lower()]}" '
-                    f'have the same lowercase "{node.lower()}".'
-                )
-            self._lowercase_to_variable[node.lower()] = node
-
     def remove_edge(self, u: str, v: str) -> None:
         """removes an edge u to v that exists from the CBN"""
         super().remove_edge(u, v)
@@ -116,7 +107,7 @@ def add_cpds(self, *cpds: TabularCPD, **relationships: Relationship) -> None:
         Add the given CPDs and initialize StochasticFunctionCPDs
         """
         for cpd in cpds:
-            self.model.__setitem__(cpd.variable, cpd)  # type: ignore
+            self.model[cpd.variable] = cpd  # type: ignore
         self.model.update(relationships)
 
     def remove_cpds(self, *cpds: Union[str, TabularCPD]) -> None:
@@ -137,15 +128,6 @@ def is_structural_causal_model(self) -> bool:
                         return False
         return True
 
-    def _fix_lowercase_variables(self, outcome_dict: Dict[str, Outcome]) -> None:
-        """
-        Outcomes are sometimes specified in terms of lowercase versions of variable names.
-        They need to be converted, before passed to factor.query
-        """
-        for var in set(outcome_dict).intersection(self._lowercase_to_variable):
-            outcome_dict[self._lowercase_to_variable[var]] = outcome_dict[var]
-            del outcome_dict[var]
-
     def query(
         self, query: Iterable[str], context: Dict[str, Outcome], intervention: Dict[str, Outcome] = None
     ) -> BeliefPropagation:
@@ -162,8 +144,6 @@ def query(
 
         intervention: Interventions to apply. A dictionary mapping node => outcome.
         """
-        self._fix_lowercase_variables(context)
-
         for variable, outcome in context.items():
             if outcome not in self.model.domain[variable]:
                 raise ValueError(f"The outcome {outcome} is not in the domain of {variable}")
@@ -202,7 +182,6 @@ def intervene(self, intervention: Dict[str, Outcome]) -> None:
         ----------
         intervention: Interventions to apply. A dictionary mapping node => value.
         """
-        self._fix_lowercase_variables(intervention)
         for variable in intervention:
             for p in self.get_parents(variable):  # remove ingoing edges
                 self.remove_edge(p, variable)

diff --git a/pycid/core/cpd.py b/pycid/core/cpd.py
@@ -60,8 +60,8 @@ def __init__(
         stochastic_function: A stochastic function that maps parent outcomes to a distribution
         over outcomes for this variable (see doc-string for class).
         The different parents are identified by name: the arguments to the function must
-        be lowercase versions of the names of the parent variables. For example, if X has
-        parents Y, S1, and Obs, the arguments to function must be y, s1, and obs.
+        match the names of the parent variables. For example, if X has
+        parents Y, S1, and Obs, the arguments to function must be Y, S1, and Obs.
 
         domain: An optional specification of the variable's domain.
             Must include all values this variable can take as a result of its function.
@@ -149,11 +149,10 @@ def check_function_arguments_match_parent_names(self) -> None:
         sig = inspect.signature(self.stochastic_function).parameters
         arg_kinds = [arg_kind.kind.name for arg_kind in sig.values()]
         args = set(sig)
-        lower_case_parents = {p.lower() for p in self.cbn.get_parents(self.variable)}
-        if "VAR_KEYWORD" not in arg_kinds and args != lower_case_parents:
+        if "VAR_KEYWORD" not in arg_kinds and args != set(self.cbn.get_parents(self.variable)):
             raise ValueError(
                 f"function for {self.variable} mismatch parents on"
-                f" {args.symmetric_difference(lower_case_parents)}, "
+                f" {args.symmetric_difference(set(self.cbn.get_parents(self.variable)))}, "
             )
 
     def parent_values(self) -> Iterator[Dict[str, Outcome]]:
@@ -165,7 +164,7 @@ def parent_values(self) -> Iterator[Dict[str, Outcome]]:
         except KeyError:
             raise ParentsNotReadyException(f"Parent {p} of {self.variable} not yet instantiated")
         for parent_values in itertools.product(*parent_values_list):
-            yield {p.lower(): parent_values[i] for i, p in enumerate(self.cbn.get_parents(self.variable))}
+            yield {p: parent_values[i] for i, p in enumerate(self.cbn.get_parents(self.variable))}
 
     def possible_values(self) -> List[Outcome]:
         """The possible values this variable can take, given the values the parents can take"""

diff --git a/pycid/core/macid_base.py b/pycid/core/macid_base.py
@@ -151,8 +151,6 @@ def query(
 
         intervention: Interventions to apply. A dictionary mapping node => outcome.
         """
-
-        self._fix_lowercase_variables(context)
         for variable, outcome in context.items():
             if outcome not in self.get_cpds(variable).domain:
                 raise ValueError(f"The outcome {outcome} is not in the domain of {variable}")
@@ -290,7 +288,7 @@ def arg2idx(pv: Dict[str, Outcome]) -> int:
             idx = 0
             for i, parent in enumerate(parents):
                 name_to_no: Dict[Outcome, int] = self.get_cpds(parent).name_to_no[parent]
-                idx += name_to_no[pv[parent.lower()]] * int(np.product(parent_cardinalities[:i]))
+                idx += name_to_no[pv[parent]] * int(np.product(parent_cardinalities[:i]))
             assert 0 <= idx <= number_of_decision_contexts
             return idx
 
@@ -400,8 +398,8 @@ def impute_conditional_expectation_decision(self, decision: str, y: str) -> None
 
         @lru_cache(maxsize=1000)
         def cond_exp_policy(**pv: Outcome) -> float:
-            if y.lower() in pv:
-                return pv[y.lower()]  # type: ignore
+            if y in pv:
+                return pv[y]  # type: ignore
             else:
                 return copy.expected_value([y], pv)[0]
 

diff --git a/pycid/examples/simple_cbns.py b/pycid/examples/simple_cbns.py
@@ -4,23 +4,23 @@
 
 def get_3node_cbn() -> CausalBayesianNetwork:
     cbn = CausalBayesianNetwork([("S", "D"), ("S", "U"), ("D", "U")])
-    cbn.add_cpds(S=discrete_uniform([-1, 1]), D=lambda s: s + 1, U=lambda s, d: s * d)
+    cbn.add_cpds(S=discrete_uniform([-1, 1]), D=lambda S: S + 1, U=lambda S, D: S * D)
     return cbn
 
 
 def get_3node_uniform_cbn() -> CausalBayesianNetwork:
     cbn = CausalBayesianNetwork([("A", "C"), ("A", "B"), ("B", "C")])
-    cbn.add_cpds(A=bernoulli(0.5), B=bernoulli(0.5), C=lambda a, b: a * b)
+    cbn.add_cpds(A=bernoulli(0.5), B=bernoulli(0.5), C=lambda A, B: A * B)
     return cbn
 
 
 def get_minimal_cbn() -> CausalBayesianNetwork:
     cbn = CausalBayesianNetwork([("A", "B")])
-    cbn.add_cpds(A=discrete_uniform([0, 1]), B=lambda a: a)
+    cbn.add_cpds(A=discrete_uniform([0, 1]), B=lambda A: A)
     return cbn
 
 
 def get_fork_cbn() -> CausalBayesianNetwork:
     cbn = CausalBayesianNetwork([("A", "C"), ("B", "C")])
-    cbn.add_cpds(A=discrete_uniform([1, 2]), B=discrete_uniform([3, 4]), C=lambda a, b: a * b)
+    cbn.add_cpds(A=discrete_uniform([1, 2]), B=discrete_uniform([3, 4]), C=lambda A, B: A * B)
     return cbn