Skip to content

Commit

Permalink
Added support for knn density #22
Browse files Browse the repository at this point in the history
  • Loading branch information
shreyasgm committed Jan 26, 2023
1 parent 967796b commit 7420533
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 7 deletions.
36 changes: 32 additions & 4 deletions ecomplexity/calc_density.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,49 @@

import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors


def calc_density(rca_or_mcp, proximity_mat):
def calc_density(rca_or_mcp, proximity_mat, knn=None):
"""Calculate density, as defined by Hidalgo et. al. (2007)
Args:
rca_or_mcp: numpy array of RCA (if continuous product proximities are
used), else Mcp
proximity_mat: product proximity matrix
knn: number of nearest neighbors to consider for density calculation (optional)
Returns:
numpy array of same shape as proximity_mat corresponding to density of
each product
"""
den = np.nansum(proximity_mat, axis=1)[np.newaxis, :]
# density = rca_or_mcp @ (proximity_mat / den)
density = rca_or_mcp @ (proximity_mat.T / den)
if knn is None:
den = np.nansum(proximity_mat, axis=1)[np.newaxis, :]
# density = rca_or_mcp @ (proximity_mat / den)
density = rca_or_mcp @ (proximity_mat.T / den)
else:
# Convert proximity matrix to a distance matrix
distance_mat = 1 - proximity_mat
# Get proximity to k nearest neighbors
nbrs = NearestNeighbors(n_neighbors=knn, metric="precomputed").fit(distance_mat)
distance_knn, indices_knn = nbrs.kneighbors()
# Get proximity
proximity_knn = 1 - distance_knn
# Calculate density
# Get denominator
den = np.nansum(proximity_knn, axis=1)
density = []
for i, row in enumerate(indices_knn):
# Use row to subset rca_or_mcp
rca_knn_p = rca_or_mcp[np.arange(rca_or_mcp.shape[0])[:, np.newaxis], row]
# Get distance_knn for this row
proximity_knn_row = proximity_knn[i]
# Divide by den
proximity_knn_row = proximity_knn_row / den[i]
# Multiply each row of rca_knn_p by proximity_knn_row
num_p = rca_knn_p * proximity_knn_row
# Sum across columns
density_p = np.nansum(rca_knn_p, axis=1)
density.append(density_p)
density = np.array(density).T
return density
16 changes: 13 additions & 3 deletions ecomplexity/ecomplexity.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ def ecomplexity(
pop=None,
continuous=False,
asymmetric=False,
knn=None,
verbose=True,
):
"""Complexity calculations through the ComplexityData class
Expand Down Expand Up @@ -153,6 +154,9 @@ def ecomplexity(
asymmetric: Used to calculate product proximities, indicates whether
to generate asymmetric proximity matrix (True) or symmetric (False).
*default* False.
knn: Number of nearest neighbors from proximity matrix to use to calculate
density. Will use entire proximity matrix if None.
*default* None.
verbose: Print year being processed
Returns:
Expand Down Expand Up @@ -207,13 +211,19 @@ def ecomplexity(
prox_mat = calc_discrete_proximity(
cdata.mcp_t, cdata.ubiquity_t, asymmetric
)
cdata.density_t = calc_density(cdata.mcp_t, prox_mat)
cdata.density_t = calc_density(
rca_or_mcp=cdata.mcp_t, proximity_mat=prox_mat, knn=knn
)
elif continuous == True and presence_test == "rpop":
prox_mat = calc_continuous_proximity(cdata.rpop_t, cdata.ubiquity_t)
cdata.density_t = calc_density(cdata.rpop_t, prox_mat)
cdata.density_t = calc_density(
rca_or_mcp=cdata.rpop_t, proximity_mat=prox_mat, knn=knn
)
elif continuous == True and presence_test != "rpop":
prox_mat = calc_continuous_proximity(cdata.rca_t, cdata.ubiquity_t)
cdata.density_t = calc_density(cdata.rca_t, prox_mat)
cdata.density_t = calc_density(
rca_or_mcp=cdata.rca_t, proximity_mat=prox_mat, knn=knn
)

# Calculate COI and COG
cdata.coi_t, cdata.cog_t = calc_coi_cog(cdata, prox_mat)
Expand Down

0 comments on commit 7420533

Please sign in to comment.