Provide warning if diversity or ubiquity are 0 for #22

cid-harvard · Jan 26, 2023 · 967796b · 967796b
1 parent 2eada51
commit 967796b
Show file tree

Hide file tree

Showing 2 changed files with 20 additions and 9 deletions.
diff --git a/ecomplexity/ComplexityData.py b/ecomplexity/ComplexityData.py
@@ -57,25 +57,30 @@ def clean_data(self, val_errors_flag_input):
     def create_full_df(self, t):
         """Rectangularize, but remove rows with diversity or ubiquity zero
 
-        Rows with zero diversity / ubiquity lead to dividebyzero errors and
-        incorrect values during normzalization
+        Rows with zero diversity / ubiquity lead to ZeroDivision errors and
+        incorrect values during normalization
         """
         self.t = t
         self.data_t = self.data.loc[t].copy()
-        diversity_check = (
+        # Check for zero diversity and ubiquity
+        val_diversity_check = (
             self.data_t.reset_index().groupby(["loc"])["val"].sum().reset_index()
         )
-        ubiquity_check = (
+        val_ubiquity_check = (
             self.data_t.reset_index().groupby(["prod"])["val"].sum().reset_index()
         )
-        diversity_check = diversity_check[diversity_check.val != 0]
-        ubiquity_check = ubiquity_check[ubiquity_check.val != 0]
+        val_diversity_check = val_diversity_check[val_diversity_check.val != 0]
+        val_ubiquity_check = val_ubiquity_check[val_ubiquity_check.val != 0]
+        # Remove locations and products with zero diversity and ubiquity respectively
         self.data_t = self.data_t.reset_index()
-        self.data_t = self.data_t.merge(diversity_check[["loc"]], on="loc", how="right")
         self.data_t = self.data_t.merge(
-            ubiquity_check[["prod"]], on="prod", how="right"
+            val_diversity_check[["loc"]], on="loc", how="right"
+        )
+        self.data_t = self.data_t.merge(
+            val_ubiquity_check[["prod"]], on="prod", how="right"
         )
         self.data_t.set_index(["loc", "prod"], inplace=True)
+        # Create full dataframe with all combinations of locations and products
         data_index = pd.MultiIndex.from_product(
             self.data_t.index.levels, names=self.data_t.index.names
         )

diff --git a/ecomplexity/ecomplexity.py b/ecomplexity/ecomplexity.py
@@ -146,7 +146,7 @@ def ecomplexity(
         rpop_mcp_threshold: numeric indicating RPOP threshold beyond which mcp is 1.
             *default* 1. Only used if presence_test is not "rca".
         pop: pandas df, with time, location and corresponding population, in that order.
-            Not required if presence_test is "rca" (default).
+            Not required if presence_test is "rca", which is the default.
         continuous: Used to calculate product proximities, indicates whether
             to consider correlation of every product pair (True) or product
             co-occurrence (False). *default* False.
@@ -193,6 +193,12 @@ def ecomplexity(
         cdata.diversity_t = np.nansum(cdata.mcp_t, axis=1)
         cdata.ubiquity_t = np.nansum(cdata.mcp_t, axis=0)
 
+        # If ANY of diversity or ubiquity is 0, warn that eci and pci will be nan
+        if np.any(cdata.diversity_t == 0) or np.any(cdata.ubiquity_t == 0):
+            warnings.warn(
+                f"Year {t}: Diversity or ubiquity is 0, so ECI and PCI will be nan"
+            )
+
         # Calculate ECI and PCI
         cdata.eci_t, cdata.pci_t = calc_eci_pci(cdata)