In [None]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import scipy.io as sio
from scipy.stats import entropy
from constants import SAVE_FOLDER

## Load region-product export data

In [None]:
trade = pd.read_parquet(f"{SAVE_FOLDER}/cleaned.parquet")
trade.head()

In [None]:
R0 = 0.115

In [None]:
# Transform RCA with R0 and generate different projections
df = trade.assign(llrca=np.log(1 + trade.rca / R0) / np.log(1 + 1 / R0))
df["bin"] = df["export"] > 0
df["avgrca_part_p"] = df.llrca * df.pivec_p
df["avgrca_part_m"] = df.llrca * df.pivec_m
df["avgrca_part_c"] = df.llrca * df.pivec_c
df["avgrca_p"] = df.groupby(["year", "region"])["avgrca_part_p"].transform(sum)
df["avgrca_m"] = df.groupby(["year", "region"])["avgrca_part_m"].transform(sum)
df["avgrca_c"] = df.groupby(["year", "region"])["avgrca_part_c"].transform(sum)
df["diversity"] = df.groupby(["year", "region"])["binrca"].transform(sum)
df.avgrca_p.describe()

In [None]:
df["rct_p"] = np.where(df.avgrca_p > 0, df.llrca / df.avgrca_p, 0)
df["rct_m"] = np.where(df.avgrca_m > 0, df.llrca / df.avgrca_m, 0)
df["rct_c"] = np.where(df.avgrca_c > 0, df.llrca / df.avgrca_c, 0)
df["rct_demean_p"] = df.rct_p - df.groupby("prod").rct_p.transform("mean")
df["rct_demean_m"] = df.rct_m - df.groupby("prod").rct_m.transform("mean")
df["rct_demean_c"] = df.rct_c - df.groupby("prod").rct_c.transform("mean")
df["proj_p"] = np.where(
    df.avgrca_p > 0, df.pci_p * df.llrca * df.pivec_p / df.avgrca_p, 0
)
df["proj_m"] = np.where(
    df.avgrca_m > 0, df.pci_m * df.llrca * df.pivec_m / df.avgrca_m, 0
)
df["proj_c"] = np.where(
    df.avgrca_c > 0, df.pci_c * df.llrca * df.pivec_c / df.avgrca_c, 0
)
df["eci_part"] = np.where(df.diversity > 0, df.pci * df.binrca / df.diversity, 0)
df.head()

In [None]:
cntryagg = (
    df.groupby(["year", "region"])[
        [
            "avgrca_part_p",
            "avgrca_part_m",
            "avgrca_part_c",
            "proj_p",
            "proj_m",
            "proj_c",
            "eci_part",
            "bin",
        ]
    ]
    .sum()
    .reset_index()
    .rename(
        columns={
            "avgrca_part_p": "avgrca_p",
            "avgrca_part_m": "avgrca_m",
            "avgrca_part_c": "avgrca_c",
            "eci_part": "eci",
        }
    )
)
cntryagg.head()

## Calculate metrics for 2016

### Prepare mcp of 2016

In [None]:
mcpdf = (
    trade[trade.year == 2016]
    .pivot(index="region", columns="prod", values="binrca")
    .fillna(0)
)
mcpdf.shape

In [None]:
mcp = mcpdf.values
ubiquity = mcp.sum(axis=0)
diversity = mcp.sum(axis=1)
ubiquity.min(), diversity.min()
mcp = mcp[diversity > 0, :]
mcp.shape

In [None]:
kp = mcp.sum(axis=0)
kc = mcp.sum(axis=1)

### country Fitness

In [None]:
qp = np.ones(mcp.shape[1])
fc = np.ones(mcp.shape[0])

In [None]:
for i in range(20):
    fc_t = mcp @ qp
    qp_t = 1 / (mcp.T @ (1 / fc))
    fc = fc_t / fc_t.mean()
    qp = qp_t / qp_t.mean()

### ECI

In [None]:
mr = np.diag(1 / kc) @ mcp @ np.diag(1 / kp) @ mcp.T
mr.shape

In [None]:
eigvals2, eigvecs2 = np.linalg.eig(mr)
eigvecs2 = np.real(eigvecs2)

In [None]:
eci = np.sign(np.corrcoef(kc, eigvecs2[:, 1])[0, 1]) * eigvecs2[:, 1]

### GENEPY

In [None]:
kp_1 = (np.diag(1 / kc) @ mcp).sum(axis=0)
wcp = np.diag(1 / kc) @ mcp @ np.diag(1 / kp_1)
ncc = wcp @ wcp.T
np.fill_diagonal(ncc, 0)

In [None]:
eigvals, eigvecs = np.linalg.eig(ncc)
xc1 = np.absolute(eigvecs[:, 0])
xc2 = eigvecs[:, 1]
lambda1 = eigvals[0]
lambda2 = eigvals[1]
genepy = np.square(lambda1 * np.square(xc1) + lambda2 * np.square(xc2)) + 2 * (
    lambda1**2 * np.square(xc1) + lambda2**2 * np.square(xc2)
)

### Production Ability

In [None]:
from girth import twopl_mml

estimates = twopl_mml(mcp.T)

In [None]:
estimates.keys()

### Fixed effects

In [None]:
fedf = trade[(trade.year == 2016) & (trade["export"] > 0)][
    ["region", "prod", "export", "rca", "regionsum", "prodsum"]
].copy()
fedf.head()

In [None]:
fedf["ycp"] = -np.log(-np.log(fedf.rca / (fedf.rca + 1)))
fedf["regionshare"] = fedf.export / fedf.regionsum
fedf["prodshare"] = fedf.export / fedf.prodsum
fedf.head()

In [None]:
res = smf.ols(formula="ycp ~ region+prod", data=fedf).fit()

In [None]:
fecoefdf = pd.DataFrame({"fe": res.params[1:]}).reset_index()
fecoefdf["var"] = fecoefdf["index"].str[-4:-1]
fecoefdf.head()

In [None]:
gamma_c = (
    fedf[["region"]]
    .drop_duplicates()
    .merge(fecoefdf[["var", "fe"]].rename(columns={"var": "region"}), how="left")
    .fillna(0)
)
gamma_c.head()

### entropic measure

In [None]:
tmpdf = fedf[["region", "prod", "export"]].copy()
tmpdf.head()

In [None]:
tmpdf.region.nunique(), tmpdf["prod"].nunique()

In [None]:
tmpdf["hc"] = tmpdf.groupby("region")["export"].transform(entropy)
tmpdf["hp"] = tmpdf.groupby("prod")["export"].transform(entropy)
tmpdf["xcp"] = tmpdf.export * (np.log(233) - tmpdf.hp)
tmpdf["ycp"] = tmpdf.export * (np.log(235) - tmpdf.hc)
tmpdf["xcpr"] = tmpdf.xcp / tmpdf.groupby("region")["xcp"].transform(sum)
tmpdf["ycpr"] = tmpdf.ycp / tmpdf.groupby("prod")["ycp"].transform(sum)
tmpdf.head()

In [None]:
for i in range(25):
    tmpdf["hc"] = tmpdf.groupby("region")["xcpr"].transform(entropy)
    tmpdf["hp"] = tmpdf.groupby("prod")["ycpr"].transform(entropy)
    tmpdf["xcp"] = tmpdf.export * (np.log(233) - tmpdf.hp)
    tmpdf["ycp"] = tmpdf.export * (np.log(235) - tmpdf.hc)
    tmpdf["xcpr"] = tmpdf.xcp / tmpdf.groupby("region")["xcp"].transform(sum)
    tmpdf["ycpr"] = tmpdf.ycp / tmpdf.groupby("prod")["ycp"].transform(sum)

In [None]:
tmpdf.head()

In [None]:
regiondf = tmpdf[["region", "hc"]].drop_duplicates().sort_values("region")
regiondf.head()

## Pool metrics in one dataframe

In [None]:
resdf = pd.DataFrame(
    {
        "fitness2016": fc,
        "eci2016": eci,
        "kc": kc,
        "xc1": xc1,
        "xc2": xc2,
        "genepy": genepy,
        "ability": estimates["Ability"],
    },
    index=mcpdf.index[diversity > 0],
).reset_index()
resdf.shape

In [None]:
resdf = (
    resdf.merge(gamma_c)
    .merge(regiondf)
    .merge(
        cntryagg[(cntryagg.year == 2016) & (cntryagg.bin > 0)].drop(columns=["year"]),
        how="left",
    )
)
resdf.head()

In [None]:
resdf[
    [
        "kc",
        "fitness2016",
        "hc",
        "ability",
        "xc1",
        "avgrca_p",
        "avgrca_m",
        "avgrca_c",
        "bin",
        "eci2016",
        "eci",
        "xc2",
        "proj_p",
        "proj_m",
        "proj_c",
        "genepy",
        "fe",
    ]
].corr()

In [None]:
resdf.columns

In [None]:
sio.savemat(f"{SAVE_FOLDER}/metric2016.mat", {"struct1": resdf.to_dict("list")})