In [None]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
from girth import twopl_mml
from scipy.stats import entropy
from constants import SAVE_FOLDER

## Load region-product data

In [None]:
trade = pd.read_parquet(
    f"{SAVE_FOLDER}/cleaned.parquet",
    columns=[
        "year",
        "region",
        "prod",
        "export",
        "regionsum",
        "prodsum",
        "yearsum",
        "rca",
        "binrca",
    ],
)
trade.head()

In [None]:
# transform RCA with R0 and generate different projections
R0 = 0.115
trade["llrca"] = np.log(1 + trade.rca / R0) / np.log(1 + 1 / R0)
trade.columns, trade.shape, trade[["year", "region", "prod"]].nunique()

In [None]:
## calculate 1962-based metrics of ECI/ECI-star/fitness
mcpdf = (
    trade[trade.year == 1962]
    .pivot(index="region", columns="prod", values="binrca")
    .fillna(0)
)
rcadf = (
    trade[trade.year == 1962]
    .pivot(index="region", columns="prod", values="rca")
    .fillna(0)
)
mcp = mcpdf.values
rcamat = rcadf.values
diversity = mcp.sum(axis=1)
mcp = mcp[diversity > 0, :]
rcamat = rcamat[diversity > 0, :]
ubiquity = mcp.sum(axis=0)
diversity = mcp.sum(axis=1)


## calculate normalized proximity phi-M (A1), phi-P (A2), phi-C (A3) for 1962
A = mcp.T @ mcp
A = A / ubiquity[np.newaxis, :]
A = np.minimum(A, A.T)
np.fill_diagonal(A, 0)
den = A.sum(axis=0)
A1 = np.diag(1 / den) @ A
A2 = np.diag(1 / ubiquity) @ mcp.T @ np.diag(1 / diversity) @ mcp
A3 = (1 + np.corrcoef(rcamat.T)) / 2
den = A3.sum(axis=0)
A3 = np.diag(1 / den) @ A3
eigvals1, eigvecs1 = np.linalg.eig(A1)
eigvecs1 = np.real(eigvecs1)
assert np.array_equal(np.argsort(-eigvals1)[:2], np.array([0, 1]))
eigvals1_l, eigvecs1_l = np.linalg.eig(A1.T)
eigvecs1_l = np.real(eigvecs1_l)
assert np.array_equal(np.argsort(-eigvals1_l)[:2], np.array([0, 1]))
eigvals2, eigvecs2 = np.linalg.eig(A2)
eigvecs2 = np.real(eigvecs2)
assert np.array_equal(np.argsort(-eigvals2)[:2], np.array([0, 1]))
eigvals2_l, eigvecs2_l = np.linalg.eig(A2.T)
eigvecs2_l = np.real(eigvecs2_l)
assert np.array_equal(np.argsort(-eigvals2_l)[:2], np.array([0, 1]))
eigvals3, eigvecs3 = np.linalg.eig(A3)
eigvecs3 = np.real(eigvecs3)
assert np.array_equal(np.argsort(-eigvals3)[:2], np.array([0, 1]))
eigvals3_l, eigvecs3_l = np.linalg.eig(A3.T)
eigvecs3_l = np.real(eigvecs3_l)
assert np.array_equal(np.argsort(-eigvals3_l)[:2], np.array([0, 1]))

pivec1 = eigvecs1_l[:, 0]
pivec2 = eigvecs2_l[:, 0]
pivec3 = eigvecs3_l[:, 0]
pivec1_1962 = pivec1 / pivec1.sum()
pivec2_1962 = pivec2 / pivec2.sum()
pivec3_1962 = pivec3 / pivec3.sum()

diversity = mcp.sum(axis=1)
pci_p = (
    np.sign(np.real(np.corrcoef(diversity, mcp @ eigvecs2[:, 1])[0, 1]))
    * eigvecs2[:, 1]
)
pci_m = (
    np.sign(np.real(np.corrcoef(eigvecs1[:, 1], eigvecs2[:, 1])[0, 1])) * eigvecs1[:, 1]
)
pci_c = (
    np.sign(np.real(np.corrcoef(eigvecs3[:, 1], eigvecs2[:, 1])[0, 1])) * eigvecs3[:, 1]
)
pcil_p = np.sign(eigvecs2_l[:, 1].dot(pci_p)) * eigvecs2_l[:, 1]
pcil_m = np.sign(eigvecs1_l[:, 1].dot(pci_m)) * eigvecs1_l[:, 1]
pcil_c = np.sign(eigvecs3_l[:, 1].dot(pci_c)) * eigvecs3_l[:, 1]

pci_1962 = pci_p.copy()
pci_m_1962 = pci_m * 1 / np.sqrt(pci_m.T @ np.diag(pivec1_1962) @ pci_m)
pci_p_1962 = pci_p * 1 / np.sqrt(pci_p.T @ np.diag(pivec2_1962) @ pci_p)
pci_c_1962 = pci_c * 1 / np.sqrt(pci_c.T @ np.diag(pivec3_1962) @ pci_c)
pcil_m_1962 = pcil_m * 1 / np.sqrt(pcil_m.T @ np.diag(1 / pivec1_1962) @ pcil_m)
pcil_p_1962 = pcil_p * 1 / np.sqrt(pcil_p.T @ np.diag(1 / pivec2_1962) @ pcil_p)
pcil_c_1962 = pcil_c * 1 / np.sqrt(pcil_c.T @ np.diag(1 / pivec3_1962) @ pcil_c)

##fitness_1962
qp = np.ones(mcp.shape[1])
fc = np.ones(mcp.shape[0])
for i in range(19):
    fc_t = mcp @ qp
    qp_t = 1 / (mcp.T @ (1 / fc))
    fc = fc_t / fc_t.mean()
    qp = qp_t / qp_t.mean()

qp_1962 = qp.copy()

In [None]:
def processyear(year):
    ### calculate yearly country complexity metric
    df = trade[trade.year == year].copy()
    df["diversity"] = df.groupby("region")["binrca"].transform("sum")
    # ECI/ECI_Star for 1962 and yearly rolling version, see annotation in other notebooks
    mcpdf = df.pivot(index="region", columns="prod", values="binrca").fillna(0)
    rcadf = df.pivot(index="region", columns="prod", values="rca").fillna(0)
    mcp = mcpdf.values
    rcamat = rcadf.values
    ubiquity = mcp.sum(axis=0)
    diversity = mcp.sum(axis=1)
    if ubiquity.min() == 0:
        print(f"error {year}! mcp mat ubi=0")
    mcp = mcp[diversity > 0, :]
    ubiquity = rcamat.sum(axis=0)
    diversity = rcamat.sum(axis=1)
    if ubiquity.min() == 0:
        print(f"error {year}! rca mat ubi=0")
    rcamat = rcamat[diversity > 0, :]
    ubiquity = mcp.sum(axis=0)
    diversity = mcp.sum(axis=1)
    A = mcp.T @ mcp
    A = A / ubiquity[np.newaxis, :]
    A = np.minimum(A, A.T)
    np.fill_diagonal(A, 0)
    den = A.sum(axis=0)
    A1 = np.diag(1 / den) @ A
    A2 = np.diag(1 / ubiquity) @ mcp.T @ np.diag(1 / diversity) @ mcp
    A3 = (1 + np.corrcoef(rcamat.T)) / 2
    den = A3.sum(axis=0)
    A3 = np.diag(1 / den) @ A3

    eigvals1, eigvecs1 = np.linalg.eig(A1)
    eigvecs1 = np.real(eigvecs1)
    assert np.array_equal(np.argsort(-eigvals1)[:2], np.array([0, 1]))
    eigvals1_l, eigvecs1_l = np.linalg.eig(A1.T)
    eigvecs1_l = np.real(eigvecs1_l)
    assert np.array_equal(np.argsort(-eigvals1_l)[:2], np.array([0, 1]))
    eigvals2, eigvecs2 = np.linalg.eig(A2)
    eigvecs2 = np.real(eigvecs2)
    assert np.array_equal(np.argsort(-eigvals2)[:2], np.array([0, 1]))
    eigvals2_l, eigvecs2_l = np.linalg.eig(A2.T)
    eigvecs2_l = np.real(eigvecs2_l)
    assert np.array_equal(np.argsort(-eigvals2_l)[:2], np.array([0, 1]))
    eigvals3, eigvecs3 = np.linalg.eig(A3)
    eigvecs3 = np.real(eigvecs3)
    assert np.array_equal(np.argsort(-eigvals3)[:2], np.array([0, 1]))
    eigvals3_l, eigvecs3_l = np.linalg.eig(A3.T)
    eigvecs3_l = np.real(eigvecs3_l)
    assert np.array_equal(np.argsort(-eigvals3_l)[:2], np.array([0, 1]))

    pivec1 = eigvecs1_l[:, 0]
    pivec2 = eigvecs2_l[:, 0]
    pivec3 = eigvecs3_l[:, 0]
    pivec1 = pivec1 / pivec1.sum()
    pivec2 = pivec2 / pivec2.sum()
    pivec3 = pivec3 / pivec3.sum()

    diversity = mcp.sum(axis=1)
    pci_p = (
        np.sign(np.real(np.corrcoef(diversity, mcp @ eigvecs2[:, 1])[0, 1]))
        * eigvecs2[:, 1]
    )

    df = df.merge(
        pd.DataFrame(pci_p, index=mcpdf.columns)
        .reset_index()
        .rename(columns={0: "pci"})
    )
    df = df.merge(
        pd.DataFrame(pci_1962, index=mcpdf.columns)
        .reset_index()
        .rename(columns={0: "pci_1962"})
    )

    pci_m = (
        np.sign(np.real(np.corrcoef(eigvecs1[:, 1], eigvecs2[:, 1])[0, 1]))
        * eigvecs1[:, 1]
    )
    pci_c = (
        np.sign(np.real(np.corrcoef(eigvecs3[:, 1], eigvecs2[:, 1])[0, 1]))
        * eigvecs3[:, 1]
    )
    pcil_p = np.sign(eigvecs2_l[:, 1].dot(pci_p)) * eigvecs2_l[:, 1]
    pcil_m = np.sign(eigvecs1_l[:, 1].dot(pci_m)) * eigvecs1_l[:, 1]
    pcil_c = np.sign(eigvecs3_l[:, 1].dot(pci_c)) * eigvecs3_l[:, 1]

    pci_m = pci_m * 1 / np.sqrt(pci_m.T @ np.diag(pivec1) @ pci_m)
    pci_p = pci_p * 1 / np.sqrt(pci_p.T @ np.diag(pivec2) @ pci_p)
    pci_c = pci_c * 1 / np.sqrt(pci_c.T @ np.diag(pivec3) @ pci_c)
    pcil_m = pcil_m * 1 / np.sqrt(pcil_m.T @ np.diag(1 / pivec1) @ pcil_m)
    pcil_p = pcil_p * 1 / np.sqrt(pcil_p.T @ np.diag(1 / pivec2) @ pcil_p)
    pcil_c = pcil_c * 1 / np.sqrt(pcil_c.T @ np.diag(1 / pivec3) @ pcil_c)

    df = df.merge(
        pd.DataFrame(pivec2, index=mcpdf.columns)
        .reset_index()
        .rename(columns={0: "pivec_p"})
    )
    df = df.merge(
        pd.DataFrame(pivec1, index=mcpdf.columns)
        .reset_index()
        .rename(columns={0: "pivec_m"})
    )
    df = df.merge(
        pd.DataFrame(pivec3, index=mcpdf.columns)
        .reset_index()
        .rename(columns={0: "pivec_c"})
    )
    df = df.merge(
        pd.DataFrame(pci_p, index=mcpdf.columns)
        .reset_index()
        .rename(columns={0: "pci_p"})
    )
    df = df.merge(
        pd.DataFrame(pci_m, index=mcpdf.columns)
        .reset_index()
        .rename(columns={0: "pci_m"})
    )
    df = df.merge(
        pd.DataFrame(pci_c, index=mcpdf.columns)
        .reset_index()
        .rename(columns={0: "pci_c"})
    )
    df = df.merge(
        pd.DataFrame(pcil_p, index=mcpdf.columns)
        .reset_index()
        .rename(columns={0: "pcil_p"})
    )
    df = df.merge(
        pd.DataFrame(pcil_m, index=mcpdf.columns)
        .reset_index()
        .rename(columns={0: "pcil_m"})
    )
    df = df.merge(
        pd.DataFrame(pcil_c, index=mcpdf.columns)
        .reset_index()
        .rename(columns={0: "pcil_c"})
    )

    df = df.merge(
        pd.DataFrame(pivec2_1962, index=mcpdf.columns)
        .reset_index()
        .rename(columns={0: "pivec_p_1962"})
    )
    df = df.merge(
        pd.DataFrame(pivec1_1962, index=mcpdf.columns)
        .reset_index()
        .rename(columns={0: "pivec_m_1962"})
    )
    df = df.merge(
        pd.DataFrame(pivec3_1962, index=mcpdf.columns)
        .reset_index()
        .rename(columns={0: "pivec_c_1962"})
    )
    df = df.merge(
        pd.DataFrame(pci_p_1962, index=mcpdf.columns)
        .reset_index()
        .rename(columns={0: "pci_p_1962"})
    )
    df = df.merge(
        pd.DataFrame(pci_m_1962, index=mcpdf.columns)
        .reset_index()
        .rename(columns={0: "pci_m_1962"})
    )
    df = df.merge(
        pd.DataFrame(pci_c_1962, index=mcpdf.columns)
        .reset_index()
        .rename(columns={0: "pci_c_1962"})
    )
    df = df.merge(
        pd.DataFrame(pcil_p_1962, index=mcpdf.columns)
        .reset_index()
        .rename(columns={0: "pcil_p_1962"})
    )
    df = df.merge(
        pd.DataFrame(pcil_m_1962, index=mcpdf.columns)
        .reset_index()
        .rename(columns={0: "pcil_m_1962"})
    )
    df = df.merge(
        pd.DataFrame(pcil_c_1962, index=mcpdf.columns)
        .reset_index()
        .rename(columns={0: "pcil_c_1962"})
    )
    df = df.merge(
        pd.DataFrame(qp_1962, index=mcpdf.columns)
        .reset_index()
        .rename(columns={0: "qp_1962"})
    )

    df["avgrca_part_p"] = df.llrca * df.pivec_p
    df["avgrca_part_m"] = df.llrca * df.pivec_m
    df["avgrca_part_c"] = df.llrca * df.pivec_c
    df["avgrca_p"] = df.groupby(["year", "region"])["avgrca_part_p"].transform("sum")
    df["avgrca_m"] = df.groupby(["year", "region"])["avgrca_part_m"].transform("sum")
    df["avgrca_c"] = df.groupby(["year", "region"])["avgrca_part_c"].transform("sum")
    df["eci_part"] = np.where(df.diversity > 0, df.pci * df.binrca / df.diversity, 0)

    df["avgrca_part_p_1962"] = df.llrca * df.pivec_p_1962
    df["avgrca_part_m_1962"] = df.llrca * df.pivec_m_1962
    df["avgrca_part_c_1962"] = df.llrca * df.pivec_c_1962
    df["avgrca_p_1962"] = df.groupby(["year", "region"])[
        "avgrca_part_p_1962"
    ].transform("sum")
    df["avgrca_m_1962"] = df.groupby(["year", "region"])[
        "avgrca_part_m_1962"
    ].transform("sum")
    df["avgrca_c_1962"] = df.groupby(["year", "region"])[
        "avgrca_part_c_1962"
    ].transform("sum")
    df["eci_part_1962"] = np.where(
        df.diversity > 0, df.pci_1962 * df.binrca / df.diversity, 0
    )

    df["rct_p"] = np.where(df.avgrca_p > 0, df.llrca / df.avgrca_p, 0)
    df["rct_m"] = np.where(df.avgrca_m > 0, df.llrca / df.avgrca_m, 0)
    df["rct_c"] = np.where(df.avgrca_c > 0, df.llrca / df.avgrca_c, 0)
    df["rct_demean_p"] = df.rct_p - df.groupby("prod").rct_p.transform("mean")
    df["rct_demean_m"] = df.rct_m - df.groupby("prod").rct_m.transform("mean")
    df["rct_demean_c"] = df.rct_c - df.groupby("prod").rct_c.transform("mean")
    df["proj_p"] = np.where(
        df.avgrca_p > 0, df.pci_p * df.llrca * df.pivec_p / df.avgrca_p, 0
    )
    df["proj_m"] = np.where(
        df.avgrca_m > 0, df.pci_m * df.llrca * df.pivec_m / df.avgrca_m, 0
    )
    df["proj_c"] = np.where(
        df.avgrca_c > 0, df.pci_c * df.llrca * df.pivec_c / df.avgrca_c, 0
    )

    df["rct_p_1962"] = np.where(df.avgrca_p_1962 > 0, df.llrca / df.avgrca_p_1962, 0)
    df["rct_m_1962"] = np.where(df.avgrca_m_1962 > 0, df.llrca / df.avgrca_m_1962, 0)
    df["rct_c_1962"] = np.where(df.avgrca_c_1962 > 0, df.llrca / df.avgrca_c_1962, 0)
    df["rct_demean_p_1962"] = df.rct_p_1962 - df.groupby("prod").rct_p_1962.transform(
        "mean"
    )
    df["rct_demean_m_1962"] = df.rct_m_1962 - df.groupby("prod").rct_m_1962.transform(
        "mean"
    )
    df["rct_demean_c_1962"] = df.rct_c_1962 - df.groupby("prod").rct_c_1962.transform(
        "mean"
    )
    df["proj_p_1962"] = np.where(
        df.avgrca_p_1962 > 0,
        df.pci_p_1962 * df.llrca * df.pivec_p_1962 / df.avgrca_p_1962,
        0,
    )
    df["proj_m_1962"] = np.where(
        df.avgrca_m_1962 > 0,
        df.pci_m_1962 * df.llrca * df.pivec_m_1962 / df.avgrca_m_1962,
        0,
    )
    df["proj_c_1962"] = np.where(
        df.avgrca_c_1962 > 0,
        df.pci_c_1962 * df.llrca * df.pivec_c_1962 / df.avgrca_c_1962,
        0,
    )

    ## fitness 1962 version
    df["fitness_1962"] = np.where(df.diversity > 0, df.qp_1962 * df.binrca, 0)
    df["fitness_1962"] = df["fitness_1962"] / (df["fitness_1962"].sum() / mcp.shape[0])

    ## fitness-yearly version
    qp = np.ones(mcp.shape[1])
    fc = np.ones(mcp.shape[0])
    for i in range(20):
        fc_t = mcp @ qp
        qp_t = 1 / (mcp.T @ (1 / fc))
        fc = fc_t / fc_t.mean()
        qp = qp_t / qp_t.mean()

    ## genepy
    kc = mcp.sum(axis=1)
    kp_1 = (np.diag(1 / kc) @ mcp).sum(axis=0)
    wcp = np.diag(1 / kc) @ mcp @ np.diag(1 / kp_1)
    ncc = wcp @ wcp.T
    np.fill_diagonal(ncc, 0)
    eigvals, eigvecs = np.linalg.eigh(ncc)
    xc1 = np.absolute(eigvecs[:, -1])
    xc2 = eigvecs[:, -2]
    lambda1 = np.real(eigvals[-1])
    lambda2 = np.real(eigvals[-2])
    genepy = np.square(lambda1 * np.square(xc1) + lambda2 * np.square(xc2)) + 2 * (
        lambda1**2 * np.square(xc1) + lambda2**2 * np.square(xc2)
    )

    ## production ability
    estimates = twopl_mml(mcp.T)

    ## fixed effects
    fedf = trade[(trade.year == year) & (trade.export > 0)][
        ["region", "prod", "export", "rca", "regionsum", "prodsum"]
    ].copy()
    fedf["ycp"] = -np.log(-np.log(fedf.rca / (fedf.rca + 1)))
    fedf["regionshare"] = fedf.export / fedf.regionsum
    fedf["prodshare"] = fedf.export / fedf.prodsum
    res = smf.ols(formula="ycp ~ region+prod", data=fedf).fit()
    fecoefdf = pd.DataFrame({"fe": res.params[1:]}).reset_index()
    fecoefdf["var"] = fecoefdf["index"].str[-4:-1]
    gamma_c = (
        fedf[["region"]]
        .drop_duplicates()
        .merge(fecoefdf[["var", "fe"]].rename(columns={"var": "region"}), how="left")
        .fillna(0)
    )

    ## entropy
    tmpdf2 = fedf[["region", "prod", "export"]].copy()
    tmpdf2["hc"] = tmpdf2.groupby("region")["export"].transform(entropy)
    tmpdf2["hp"] = tmpdf2.groupby("prod")["export"].transform(entropy)
    tmpdf2["xcp"] = tmpdf2.export * (np.log(233) - tmpdf2.hp)
    tmpdf2["ycp"] = tmpdf2.export * (np.log(235) - tmpdf2.hc)
    tmpdf2["xcpr"] = tmpdf2.xcp / tmpdf2.groupby("region")["xcp"].transform("sum")
    tmpdf2["ycpr"] = tmpdf2.ycp / tmpdf2.groupby("prod")["ycp"].transform("sum")
    for i in range(25):
        tmpdf2["hc"] = tmpdf2.groupby("region")["xcpr"].transform(entropy)
        tmpdf2["hp"] = tmpdf2.groupby("prod")["ycpr"].transform(entropy)
        tmpdf2["xcp"] = tmpdf2.export * (np.log(233) - tmpdf2.hp)
        tmpdf2["ycp"] = tmpdf2.export * (np.log(235) - tmpdf2.hc)
        tmpdf2["xcpr"] = tmpdf2.xcp / tmpdf2.groupby("region")["xcp"].transform("sum")
        tmpdf2["ycpr"] = tmpdf2.ycp / tmpdf2.groupby("prod")["ycp"].transform("sum")
    regiondf2 = tmpdf2[["region", "hc"]].drop_duplicates().sort_values("region")

    ## collect result
    resdf = (
        pd.DataFrame(
            {
                "fitness": fc,
                "diversity": kc,
                "xc1": xc1,
                "xc2": xc2,
                "genepy": genepy,
                "ability": estimates["Ability"],
            },
            index=mcpdf.index[mcpdf.sum(axis=1) > 0],
        )
        .reset_index()
        .merge(gamma_c)
        .merge(regiondf2)
        .merge(
            df.groupby(["year", "region"])[
                [
                    "avgrca_part_p",
                    "avgrca_part_m",
                    "avgrca_part_c",
                    "proj_p",
                    "proj_m",
                    "proj_c",
                    "eci_part",
                    "avgrca_part_p_1962",
                    "avgrca_part_m_1962",
                    "avgrca_part_c_1962",
                    "proj_p_1962",
                    "proj_m_1962",
                    "proj_c_1962",
                    "eci_part_1962",
                    "fitness_1962",
                ]
            ]
            .sum()
            .reset_index()
            .rename(
                columns={
                    "avgrca_part_p": "avgrca_p",
                    "avgrca_part_m": "avgrca_m",
                    "avgrca_part_c": "avgrca_c",
                    "eci_part": "eci",
                    "avgrca_part_p_1962": "avgrca_p_1962",
                    "avgrca_part_m_1962": "avgrca_m_1962",
                    "avgrca_part_c_1962": "avgrca_c_1962",
                    "eci_part_1962": "eci_1962",
                }
            )
        )
    )
    return resdf

In [None]:
resdict = dict()
years = range(1962, 2019)
for year in years:
    print(f"Processing year {year}...")
    try:
        resdict[f"year{year}"] = processyear(year)
    except:
        print(f"{year} has error!")
print("Done")

In [None]:
## calculate and adjust signs of x2 and x2divsqrtd for comparision
region_metricdf = pd.concat(
    [resdict[f"year{year}"].assign(year=year) for year in years]
)
region_metricdf["x1d"] = region_metricdf.xc1 * region_metricdf.diversity
region_metricdf["x2divsqrtd"] = region_metricdf.xc2 / np.sqrt(region_metricdf.diversity)
signdf = np.sign(
    region_metricdf.groupby("year")[["x2divsqrtd", "eci"]].corr().unstack().iloc[:, 1]
).reset_index()
signdf.columns = ["year", "sign"]
region_metricdf = region_metricdf.merge(signdf, how="left")
region_metricdf["x2divsqrtd"] = region_metricdf["x2divsqrtd"] * region_metricdf["sign"]
region_metricdf["xc2"] = region_metricdf["xc2"] * region_metricdf["sign"]

In [None]:
region_metricdf.to_csv(f"{SAVE_FOLDER}/region_year_metric.tsv", sep="\t", index=False)

In [None]:
region_year_corr = (
    region_metricdf.drop(columns=["region"])
    .groupby("year")
    .corr()
    .stack()
    .reset_index()
)
region_year_corr.columns = ["year", "metric1", "metric2", "corrcoef"]
region_year_corr.head()

In [None]:
region_year_corr2 = (
    region_metricdf.drop(columns=["region"])
    .groupby("year")
    .corr(method="spearman")
    .stack()
    .reset_index()
)
region_year_corr2.columns = ["year", "metric1", "metric2", "corrcoef"]
region_year_corr2.head()

In [None]:
region_year_corr.to_csv(f"{SAVE_FOLDER}/region_year_corr.tsv", sep="\t", index=False)
region_year_corr2.to_csv(
    f"{SAVE_FOLDER}/region_year_rankcorr.tsv", sep="\t", index=False
)