prevent FutureWarnings (#231)

Category: bugfix JIRA issue: MIC-3488 pandas will show a FutureWarning when passing a list of length 1 to groupby. There is also a FutureWarning about using df.loc[] the way we did in updating the population. Testing Ran a small simulation all the way through and checked that groupby objects produced the same results using new and old methods. Checked that change to the population view column update produced the same behavior as the old code.
ihmeuw · Oct 12, 2022 · 20feee2 · 20feee2
1 parent 2dd154b
commit 20feee2
Show file tree

Hide file tree

Showing 3 changed files with 18 additions and 5 deletions.
diff --git a/src/vivarium/framework/population/population_view.py b/src/vivarium/framework/population/population_view.py
@@ -224,7 +224,7 @@ def update(self, population_update: Union[pd.DataFrame, pd.Series]) -> None:
                     state_table[column],
                     self._manager.adding_simulants,
                 )
-                self._manager._population.loc[:, column] = column_update
+                self._manager._population[column] = column_update
 
     def __repr__(self):
         return (

diff --git a/src/vivarium/framework/state_machine.py b/src/vivarium/framework/state_machine.py
@@ -83,7 +83,8 @@ def _groupby_new_state(
 
     """
     output_map = {o: i for i, o in enumerate(outputs)}
-    groups = pd.Series(index).groupby([output_map[d] for d in decisions])
+    # Pass grouper as Series to avoid FutureWarning about passing list of length 1
+    groups = pd.Series(index).groupby(pd.Series([output_map[d] for d in decisions]))
     results = [(outputs[i], pd.Index(sub_group.values)) for i, sub_group in groups]
     selected_outputs = [o for o, _ in results]
     for output in outputs:

diff --git a/src/vivarium/interpolation.py b/src/vivarium/interpolation.py
@@ -68,7 +68,11 @@ def __init__(
         if self.key_columns:
             # Since there are key_columns we need to group the table by those
             # columns to get the sub-tables to fit
-            sub_tables = self.data.groupby(list(self.key_columns))
+            if len(self.key_columns) == 1:
+                # Pass element instead of list of length 1 to avoid FutureWarning
+                sub_tables = self.data.groupby(self.key_columns[0])
+            else:
+                sub_tables = self.data.groupby(list(self.key_columns))
         else:
             # There are no key columns so we will fit the whole table
             sub_tables = {None: self.data}.items()
@@ -107,7 +111,11 @@ def __call__(self, interpolants: pd.DataFrame) -> pd.DataFrame:
             validate_call_data(interpolants, self.key_columns, self.parameter_columns)
 
         if self.key_columns:
-            sub_tables = interpolants.groupby(list(self.key_columns))
+            if len(self.key_columns) == 1:
+                # Pass element instead of list of length 1 to avoid FutureWarning
+                sub_tables = interpolants.groupby(self.key_columns[0])
+            else:
+                sub_tables = interpolants.groupby(list(self.key_columns))
         else:
             sub_tables = [(None, interpolants)]
         # specify some numeric type for columns so they won't be objects but will updated with whatever
@@ -212,7 +220,11 @@ def check_data_complete(data, parameter_columns):
     for p in param_edges:
         other_params = [p_ed[0] for p_ed in param_edges if p_ed != p]
         if other_params:
-            sub_tables = data.groupby(list(other_params))
+            if len(other_params) == 1:
+                # Pass element instead of list of length 1 to avoid FutureWarning
+                sub_tables = data.groupby(other_params[0])
+            else:
+                sub_tables = data.groupby(list(other_params))
         else:
             sub_tables = {None: data}.items()