Skip to content

Commit

Permalink
Provide warning if diversity or ubiquity are 0 for #22
Browse files Browse the repository at this point in the history
  • Loading branch information
shreyasgm committed Jan 26, 2023
1 parent 2eada51 commit 967796b
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 9 deletions.
21 changes: 13 additions & 8 deletions ecomplexity/ComplexityData.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,25 +57,30 @@ def clean_data(self, val_errors_flag_input):
def create_full_df(self, t):
"""Rectangularize, but remove rows with diversity or ubiquity zero
Rows with zero diversity / ubiquity lead to dividebyzero errors and
incorrect values during normzalization
Rows with zero diversity / ubiquity lead to ZeroDivision errors and
incorrect values during normalization
"""
self.t = t
self.data_t = self.data.loc[t].copy()
diversity_check = (
# Check for zero diversity and ubiquity
val_diversity_check = (
self.data_t.reset_index().groupby(["loc"])["val"].sum().reset_index()
)
ubiquity_check = (
val_ubiquity_check = (
self.data_t.reset_index().groupby(["prod"])["val"].sum().reset_index()
)
diversity_check = diversity_check[diversity_check.val != 0]
ubiquity_check = ubiquity_check[ubiquity_check.val != 0]
val_diversity_check = val_diversity_check[val_diversity_check.val != 0]
val_ubiquity_check = val_ubiquity_check[val_ubiquity_check.val != 0]
# Remove locations and products with zero diversity and ubiquity respectively
self.data_t = self.data_t.reset_index()
self.data_t = self.data_t.merge(diversity_check[["loc"]], on="loc", how="right")
self.data_t = self.data_t.merge(
ubiquity_check[["prod"]], on="prod", how="right"
val_diversity_check[["loc"]], on="loc", how="right"
)
self.data_t = self.data_t.merge(
val_ubiquity_check[["prod"]], on="prod", how="right"
)
self.data_t.set_index(["loc", "prod"], inplace=True)
# Create full dataframe with all combinations of locations and products
data_index = pd.MultiIndex.from_product(
self.data_t.index.levels, names=self.data_t.index.names
)
Expand Down
8 changes: 7 additions & 1 deletion ecomplexity/ecomplexity.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def ecomplexity(
rpop_mcp_threshold: numeric indicating RPOP threshold beyond which mcp is 1.
*default* 1. Only used if presence_test is not "rca".
pop: pandas df, with time, location and corresponding population, in that order.
Not required if presence_test is "rca" (default).
Not required if presence_test is "rca", which is the default.
continuous: Used to calculate product proximities, indicates whether
to consider correlation of every product pair (True) or product
co-occurrence (False). *default* False.
Expand Down Expand Up @@ -193,6 +193,12 @@ def ecomplexity(
cdata.diversity_t = np.nansum(cdata.mcp_t, axis=1)
cdata.ubiquity_t = np.nansum(cdata.mcp_t, axis=0)

# If ANY of diversity or ubiquity is 0, warn that eci and pci will be nan
if np.any(cdata.diversity_t == 0) or np.any(cdata.ubiquity_t == 0):
warnings.warn(
f"Year {t}: Diversity or ubiquity is 0, so ECI and PCI will be nan"
)

# Calculate ECI and PCI
cdata.eci_t, cdata.pci_t = calc_eci_pci(cdata)

Expand Down

0 comments on commit 967796b

Please sign in to comment.