# Trustworthiness Index

In [None]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
from kneed import KneeLocator
import sys;sys.path.append("../")
from utils import *
import itertools

## ITN Data

In [None]:
countries = pd.read_csv("../data/2. Atlas/location_country.csv")
bilateral = pd.read_parquet("../data/1. UN Comtrade/bilateral.parquet")
reporters = pd.read_csv("../data/1. UN Comtrade/reporters.csv")

In [None]:
trustworthiness_scores = pd.DataFrame()

for cmd in [f"{x:02d}" for x in range(1, 100)]:

   if cmd in ["77", "98"]: # Not valid codes
      continue

   for year in range(2012,2023):
      yearly_data = bilateral[(bilateral.period == str(year)) & (bilateral.cmdCode == cmd)]
      yearly_data = yearly_data[["reporterCode", "partnerCode", "primaryValue"]].merge(yearly_data[["reporterCode", "partnerCode", "mirrorPrimaryValue"]], \
                                                                        left_on=["reporterCode", "partnerCode"], right_on=["partnerCode", "reporterCode"],\
                                                                           suffixes=(None, "_x"))
      yearly_data.drop(columns=["reporterCode_x", "partnerCode_x"], inplace=True)
      yearly_data.columns = ["country_id", "partner_country_id", "export_value", "import_value"]
      yearly_data.reset_index(drop=True, inplace=True)
      
      trustworthiness_scores_, avg_trust = compute_trustworthiness(yearly_data, n_iter=100)
      trustworthiness_scores_ = trustworthiness_scores_[["country_id_x", "T_a"]].drop_duplicates(subset=["country_id_x"], keep="first")
      trustworthiness_scores_.columns = ["country_id", "trustworthiness"]
      trustworthiness_scores_["year"] = year
      trustworthiness_scores_["cmd"] = cmd
      trustworthiness_scores = pd.concat([trustworthiness_scores, trustworthiness_scores_])

For countries with no reporting, assign the worst possible trustworthiness (zero)

In [None]:
reporters.rename(columns={"reporterCode": "country_id"}, inplace=True)

# Generate all combinations
all_combinations = pd.DataFrame(itertools.product(reporters.country_id, trustworthiness_scores.year.unique(), \
                                                  trustworthiness_scores.cmd.unique()), columns=['country_id', 'year', 'cmd'])

trustworthiness_scores_all = all_combinations.merge(trustworthiness_scores, on=['country_id', 'year', 'cmd'], how="left")

# Compute the minimum trustworthiness score for each (year, cmd) combination
min_values = trustworthiness_scores.groupby(['year', 'cmd'])['trustworthiness'].min().reset_index()

# Merge with the main DataFrame
trustworthiness_scores_all = trustworthiness_scores_all.merge(min_values, on=['year', 'cmd'], suffixes=('', '_min'))

# Fill missing values with the computed minimum
trustworthiness_scores_all['trustworthiness'] = trustworthiness_scores_all['trustworthiness'].fillna(trustworthiness_scores_all['trustworthiness_min'])

# Drop the auxiliary column
trustworthiness_scores_all.drop(columns=['trustworthiness_min'], inplace=True)

In [None]:
trustworthiness_scores_all["trustworthiness"].hist();

In [None]:
trustworthiness_scores_all.to_csv("../data/trustworthiness_scores.csv", index=False)

#### Show a few nodes

In [None]:
fig, ax = plt.subplots(1,4, figsize=(20,5))

for ix, j in enumerate([0, 1, 5, 100]):
    _ = compute_trustworthiness(yearly_data, n_iter=j)
    _["T_ab"] = _["T_ab"].round(2)
    nodes_attr = _[["country_id_x", "T_a"]].groupby("country_id_x").max().reset_index()
    nodes_attr["T_a"] = nodes_attr["T_a"].round(2)
    #print(_)
    G = nx.from_pandas_edgelist(_, source="country_id_x", target="partner_country_id_x", edge_attr="T_ab")
    pos = {list(G.nodes)[x]: y for x, y in enumerate([(0,2), (1,2), (2,2), (0,1), (1,1), (2, 1), (0,0), (1,0), (2,0)])}
    nod_labels = nodes_attr.set_index("country_id_x").to_dict()["T_a"]
    nod_labels = {k: v for k, v in nod_labels.items() if k in pos.keys()}
    nx.draw(nx.subgraph(G, nbunch=pos.keys()), pos=pos, with_labels=False, node_color="skyblue", font_size=8, ax=ax[ix])
    nx.draw_networkx_labels(nx.subgraph(G, nbunch=pos.keys()), pos, labels=nod_labels, font_size=10, font_color='k', font_family='sans-serif', font_weight='normal', ax=ax[ix])
    nx.draw_networkx_edge_labels(nx.subgraph(G, nbunch=pos.keys()), pos=pos, edge_labels=nx.get_edge_attributes(nx.subgraph(G, nbunch=pos.keys()), 'T_ab'), label_pos=0.5,  rotate=False, font_size=8, ax=ax[ix])

plt.show()

#### Trustworthiness index - 100 iters

In [None]:
trustworthiness_scores = compute_trustworthiness(yearly_data, n_iter=100)
trustworthiness_scores = trustworthiness_scores[["country_id_x", "T_a"]].drop_duplicates(subset=["country_id_x"], keep="first")
trustworthiness_scores.head()

#### Comparison with Discrepancy index

In [None]:
discrepancy_index = pd.read_csv("../data/DISCREPANCY_INDEX_H5_2017_csv.zip")
discrepancy_index = discrepancy_index.groupby("CountryA")["DI"].mean().reset_index()
trustworthiness_scores_countries = trustworthiness_scores.merge(countries[["country_id", "iso3_code"]], left_on="country_id_x", right_on="country_id", how="left")
trustworthiness_scores_countries = trustworthiness_scores_countries.merge(discrepancy_index, left_on="iso3_code", right_on="CountryA", how="left")
trustworthiness_scores_countries.head()

In [None]:
test = trustworthiness_scores_countries.dropna()
np.corrcoef(abs(test.DI), test.T_a)

#### Data summary

In [None]:
trustworthiness_scores.describe()