In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sio
from concentrationMetrics import Index
from constants import DATA_FOLDER, SAVE_FOLDER, OUT_FOLDER

In [None]:
gdppc = (
    pd.read_csv(
        f"{DATA_FOLDER}/API_NY.GDP.PCAP.KD_DS2_en_csv_v2_2252313.csv", skiprows=4
    )
    .drop(columns=["Country Name", "Indicator Name", "Indicator Code", "Unnamed: 65"])
    .melt(id_vars="Country Code", var_name="year", value_name="gdppc")
    .dropna()
    .rename(columns={"Country Code": "region"})
)
gdppc["year"] = gdppc.year.astype(int)
gdppc.head()

## Load region-product export data

In [None]:
trade = pd.read_parquet(f"{SAVE_FOLDER}/cleaned.parquet")
trade.head()

## Transform and calculate metrics

In [None]:
R0 = 0.115

In [None]:
## transform with R0 and project
df = trade.assign(llrca=np.log(1 + trade.rca / R0) / np.log(1 + 1 / R0))
df["bin"] = df.export > 0
df["avgrca_part_p"] = df.llrca * df.pivec_p
df["avgrca_part_m"] = df.llrca * df.pivec_m
df["avgrca_part_c"] = df.llrca * df.pivec_c
df["avgrca_p"] = df.groupby(["year", "region"])["avgrca_part_p"].transform(sum)
df["avgrca_m"] = df.groupby(["year", "region"])["avgrca_part_m"].transform(sum)
df["avgrca_c"] = df.groupby(["year", "region"])["avgrca_part_c"].transform(sum)
df["diversity"] = df.groupby(["year", "region"])["binrca"].transform(sum)
df.avgrca_p.describe()

In [None]:
df["rct_p"] = np.where(df.avgrca_p > 0, df.llrca / df.avgrca_p, 0)
df["rct_m"] = np.where(df.avgrca_m > 0, df.llrca / df.avgrca_m, 0)
df["rct_c"] = np.where(df.avgrca_c > 0, df.llrca / df.avgrca_c, 0)
df["rct_demean_p"] = df.rct_p - df.groupby("prod").rct_p.transform("mean")
df["rct_demean_m"] = df.rct_m - df.groupby("prod").rct_m.transform("mean")
df["rct_demean_c"] = df.rct_c - df.groupby("prod").rct_c.transform("mean")
df["proj_p"] = np.where(
    df.avgrca_p > 0, df.pci_p * df.llrca * df.pivec_p / df.avgrca_p, 0
)
df["proj_m"] = np.where(
    df.avgrca_m > 0, df.pci_m * df.llrca * df.pivec_m / df.avgrca_m, 0
)
df["proj_c"] = np.where(
    df.avgrca_c > 0, df.pci_c * df.llrca * df.pivec_c / df.avgrca_c, 0
)
df["eci_part"] = np.where(df.diversity > 0, df.pci * df.binrca / df.diversity, 0)
df.head()

In [None]:
cntryagg = (
    df.groupby(["year", "region"])[
        [
            "avgrca_part_p",
            "avgrca_part_m",
            "avgrca_part_c",
            "proj_p",
            "proj_m",
            "proj_c",
            "eci_part",
            "bin",
        ]
    ]
    .sum()
    .reset_index()
    .rename(
        columns={
            "avgrca_part_p": "avgrca_p",
            "avgrca_part_m": "avgrca_m",
            "avgrca_part_c": "avgrca_c",
            "eci_part": "eci",
        }
    )
)
cntryagg = cntryagg.merge(gdppc, how="left")
cntryagg.head()

In [None]:
cntryagg.year.nunique(), cntryagg.region.nunique()

## ECI vs. ECI* and PCI vs. PCI* across proximity matrices

In [None]:
fig, ax = plt.subplots(figsize=(4, 4))
plt.axvline(x=0, color="grey", linestyle="--", zorder=0)
plt.axhline(y=0, color="grey", linestyle="--", zorder=0)
ax.scatter(cntryagg.eci, cntryagg.proj_p, alpha=0.05, zorder=5)
ax.tick_params(axis="y", direction="in")
ax.tick_params(axis="x", direction="in")
# ax.set_xlim([-1,1])
# ax.set_ylim([-1,1])
ax.set_aspect(0.065)
plt.xlabel("ECI")
plt.ylabel("ECI*")
plt.savefig(f"{OUT_FOLDER}/fig4.pdf", bbox_inches="tight", pad_inches=0.05)

In [None]:
pcidf = (
    trade.groupby(["prod"])[["pci", "pci_p", "pci_m", "pci_c"]].median().reset_index()
)
pcidf.head()

In [None]:
from matplotlib.ticker import FormatStrFormatter
from scipy.stats import spearmanr

fig, ax = plt.subplots(3, 2, figsize=(10, 15))

ax[0, 0].axvline(x=0, color="grey", linestyle="--", zorder=0)
ax[0, 0].axhline(y=0, color="grey", linestyle="--", zorder=0)
ax[0, 0].tick_params(axis="y", direction="in")
ax[0, 0].tick_params(axis="x", direction="in")
ax[0, 0].scatter(pcidf.pci, pcidf.pci_p, alpha=0.2, zorder=5)
ax[0, 0].set_xlabel("PCI")
ax[0, 0].set_ylabel("PCI*")
ax[0, 0].text(
    -0.1,
    1.4,
    f"Pearson:{np.corrcoef(pcidf.pci,pcidf.pci)[0,1]:.3f}\nSpearman:{spearmanr(pcidf.pci,pcidf.pci)[0]:.3f}",
)
ax[0, 0].set_title(r"(a) $\Phi^P$")

ax[0, 1].axvline(x=0, color="grey", linestyle="--", zorder=0)
ax[0, 1].axhline(y=0, color="grey", linestyle="--", zorder=0)
ax[0, 1].tick_params(axis="y", direction="in")
ax[0, 1].tick_params(axis="x", direction="in")
ax[0, 1].scatter(cntryagg.eci, cntryagg.proj_p, alpha=0.05, zorder=5)
ax[0, 1].set_xlabel("ECI")
ax[0, 1].set_ylabel("ECI*")
ax[0, 1].text(
    -0.1,
    1.4,
    f"Pearson:{np.corrcoef(cntryagg.eci,cntryagg.proj_p)[0,1]:.3f}\nSpearman:{spearmanr(cntryagg.eci,cntryagg.proj_p)[0]:.3f}",
)
ax[0, 1].set_title(r"(b) $\Phi^P$")

ax[1, 0].axvline(x=0, color="grey", linestyle="--", zorder=0)
ax[1, 0].axhline(y=0, color="grey", linestyle="--", zorder=0)
ax[1, 0].tick_params(axis="y", direction="in")
ax[1, 0].tick_params(axis="x", direction="in")
ax[1, 0].scatter(pcidf.pci, pcidf.pci_m, alpha=0.2, zorder=5)
ax[1, 0].yaxis.set_major_formatter(FormatStrFormatter("%.1f"))
ax[1, 0].set_xlabel("PCI")
ax[1, 0].set_ylabel("PCI*")
ax[1, 0].text(
    -0.1,
    0.8,
    f"Pearson:{np.corrcoef(pcidf.pci_m,pcidf.pci)[0,1]:.3f}\nSpearman:{spearmanr(pcidf.pci_m,pcidf.pci)[0]:.3f}",
)
ax[1, 0].set_title(r"(c) $\Phi^M$")


ax[1, 1].axvline(x=0, color="grey", linestyle="--", zorder=0)
ax[1, 1].axhline(y=0, color="grey", linestyle="--", zorder=0)
ax[1, 1].tick_params(axis="y", direction="in")
ax[1, 1].tick_params(axis="x", direction="in")
ax[1, 1].scatter(cntryagg.eci, cntryagg.proj_m, alpha=0.05, zorder=5)
ax[1, 1].yaxis.set_major_formatter(FormatStrFormatter("%.1f"))
ax[1, 1].set_xlabel("ECI")
ax[1, 1].set_ylabel("ECI*")
ax[1, 1].text(
    -0.1,
    0.8,
    f"Pearson:{np.corrcoef(cntryagg.eci,cntryagg.proj_m)[0,1]:.3f}\nSpearman:{spearmanr(cntryagg.eci,cntryagg.proj_m)[0]:.3f}",
)
ax[1, 1].set_title(r"(d) $\Phi^M$")

ax[2, 0].axvline(x=0, color="grey", linestyle="--", zorder=0)
ax[2, 0].axhline(y=0, color="grey", linestyle="--", zorder=0)
ax[2, 0].tick_params(axis="y", direction="in")
ax[2, 0].tick_params(axis="x", direction="in")
ax[2, 0].scatter(pcidf.pci, pcidf.pci_c, alpha=0.2, zorder=5)
ax[2, 0].set_xlabel("PCI")
ax[2, 0].set_ylabel("PCI*")
ax[2, 0].text(
    -0.1,
    1.5,
    f"Pearson:{np.corrcoef(pcidf.pci_c,pcidf.pci)[0,1]:.3f}\nSpearman:{spearmanr(pcidf.pci_c,pcidf.pci)[0]:.3f}",
)
ax[2, 0].set_title(r"(e) $\Phi^C$")


ax[2, 1].axvline(x=0, color="grey", linestyle="--", zorder=0)
ax[2, 1].axhline(y=0, color="grey", linestyle="--", zorder=0)
ax[2, 1].tick_params(axis="y", direction="in")
ax[2, 1].tick_params(axis="x", direction="in")
ax[2, 1].scatter(cntryagg.eci, cntryagg.proj_c, alpha=0.05, zorder=5)
ax[2, 1].set_xlabel("ECI")
ax[2, 1].set_ylabel("ECI*")
ax[2, 1].text(
    -0.1,
    1.25,
    f"Pearson:{np.corrcoef(cntryagg.eci,cntryagg.proj_c)[0,1]:.3f}\nSpearman:{spearmanr(cntryagg.eci,cntryagg.proj_c)[0]:.3f}",
)
ax[2, 1].set_title(r"(f) $\Phi^C$")

plt.savefig(f"{OUT_FOLDER}/figs1.pdf", bbox_inches="tight", pad_inches=0.05)

## Export datasets to analyze in matlab

In [None]:
cntryagg2 = (
    cntryagg[cntryagg.bin > 0]
    .assign(ry=cntryagg.region + cntryagg.year.astype(str))
    .sort_values("ry")
    .reset_index(drop=True)
)
cntryagg2.shape

In [None]:
df2 = df.assign(ry=df.region + df.year.astype(str))
df2["bincounts"] = df2.groupby("ry")["bin"].transform(sum)
df2 = df2[df2.bincounts > 0]
df2.columns

In [None]:
Rcpt = df2.pivot(index="prod", columns="ry", values="llrca").fillna(0)  # .values
Rcpt.shape

In [None]:
sio.savemat(f"{SAVE_FOLDER}/rcptcolumns.mat", {"cntryyear": Rcpt.columns.values})

In [None]:
zct = df2.pivot(index="prod", columns="ry", values="avgrca_p").fillna(0).values[0, :]
zct.shape

In [None]:
ecistar = df2.pivot(index="prod", columns="ry", values="proj_p").values.sum(axis=0)
ecistar.shape

In [None]:
pcistar = df2.pivot(index="prod", columns="ry", values="pci_p").fillna(0).values[:, 0]
pcistar.shape

In [None]:
sio.savemat(
    f"{SAVE_FOLDER}/fig5b.mat",
    {"Rcpt": Rcpt, "zct": zct, "ecistar": ecistar, "pcistar": pcistar},
)

In [None]:
df["share"] = np.where(df.regionsum > 0, df.export / df.regionsum, 0)

In [None]:
sharemat = (
    df.assign(ry=df.region + df.year.astype(str))
    .pivot(columns="prod", index="ry", values="share")
    .fillna(0)
)
sharemat.shape

In [None]:
rygini = Index(data=sharemat.values + 1e-12, index="gini").results
ryshannon = Index(data=sharemat.values + 1e-12, index="shannon").results
ryhhi = Index(data=sharemat.values + 1e-12, index="hhi").results
len(rygini), len(ryshannon), len(ryhhi)

In [None]:
cntrylabel = pd.read_csv(f"{DATA_FOLDER}/cntrylabel2.tsv", sep="\t", dtype="string")
cntrylabel.head()
cntryagg2 = (
    cntryagg[cntryagg.bin > 0]
    .assign(ry=cntryagg.region + cntryagg.year.astype(str))
    .sort_values("ry")
    .reset_index(drop=True)
)
cntryagg2.head()
cntryagg2 = cntryagg2.merge(
    pd.DataFrame.from_dict(
        {"gini": rygini, "shannon": ryshannon, "hhi": ryhhi, "ry": sharemat.index}
    ),
    how="left",
).merge(cntrylabel, how="left")
cntryagg2.head()

In [None]:
cntryagg2.columns

In [None]:
sio.savemat(
    f"{SAVE_FOLDER}/cntryyear_pmc.mat",
    {
        "struct1": cntryagg2[
            [
                "year",
                "region",
                "ry",
                "avgrca_p",
                "avgrca_m",
                "avgrca_c",
                "proj_p",
                "proj_m",
                "proj_c",
                "eci",
                "bin",
                "gdppc",
                "gini",
                "shannon",
                "hhi",
                "label",
            ]
        ]
        .rename(
            columns={
                "ry": "cntryyear",
                "avgrca_p": "zct_p",
                "proj_p": "ecistar_p",
                "avgrca_m": "zct_m",
                "proj_m": "ecistar_m",
                "avgrca_c": "zct_c",
                "proj_c": "ecistar_c",
                "bin": "bindiversity",
            }
        )
        .to_dict("list")
    },
)