### Notebook 5 - Summary: correlations of node centralities' ranks in technology graphs and trade graphs

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from scipy.stats import pearsonr, spearmanr

seed = 100

In [2]:
centrality_names = {
    'betweenness_centr_uw' : 'Betweenness Centrality - Unweighted',
    'betweenness_centr' : 'Betweenness Centrality - Weighted',
    'pagerank_centr_uw' : 'PageRank Centrality - Unweighted',
    'degree_centr' : 'Degree Centrality - Weighted',
    'pagerank_centr' : 'PageRank Centrality - Weighted',
    'eigenv_centr' : 'Eigenvector Centrality - Weighted',
    'eigenv_centr_uw' : 'Eigenvector Centrality - Unweighted',
    'closeness_centr' : 'Closeness Centrality - Weighted' 
}

In [3]:
def add_pvlaue_level(coef, pvalue):
    if pvalue <= 0.1 and pvalue > 0.05:
        asterisks = '*'
    elif pvalue <= 0.05 and pvalue > 0.01:
        asterisks = '**'
    elif pvalue <= 0.01:
        asterisks = '***'
    else:
        asterisks = ''
    return ('$' + str(coef) + '^{' + asterisks + '}$')

In [4]:
trade_centr_path = '/Users/koshelev/Documents/lmu/thesis/2-centralities_computation/computed_centralities/trade/'
tech1_centr_path = '/Users/koshelev/Documents/lmu/thesis/2-centralities_computation/computed_centralities/technology1/'
tech2_centr_path = '/Users/koshelev/Documents/lmu/thesis/2-centralities_computation/computed_centralities/technology2/'

In [5]:
# all computed centrality names
centr_names = []
for filename in os.listdir(trade_centr_path):
    centr_names.append(filename.replace('.csv', ''))
centr_names = sorted(centr_names, reverse=True)
print(centr_names)

['pagerank_centr_uw', 'pagerank_centr', 'eigenv_centr_uw', 'eigenv_centr', 'degree_centr', 'closeness_centr', 'betweenness_centr_uw', 'betweenness_centr']


In [6]:
# check how many countries do we have in different dimension pairs

### technology 1 & trade
trade_centr = pd.read_csv(f'{trade_centr_path}{centr_names[3]}.csv', index_col=0).fillna(method='ffill')
trade_set = set(trade_centr.columns)
tech1_centr = pd.read_csv(f'{tech1_centr_path}{centr_names[3]}.csv', index_col=0).fillna(method='ffill')
tech1_set = set(tech1_centr.columns)
print(f'countries in trade centr. dataframe: {len(trade_set)}')
print(f'countries in tech.1 centr. dataframe: {len(tech1_set)}')
print(f'intersection in trade and tech.1: {len(trade_set.intersection(tech1_set))}')
print(f'intersection rate: {len(trade_set.intersection(tech1_set)) / min(len(trade_set), len(tech1_set))}')
print(' ')

### technology 2 & trade
tech2_centr = pd.read_csv(f'{tech2_centr_path}{centr_names[3]}.csv', index_col=0).fillna(method='ffill')
tech2_set = set(tech2_centr.columns)
print(f'countries in trade centr. dataframe: {len(trade_set)}')
print(f'countries in tech.2 centr. dataframe: {len(tech2_set)}')
print(f'intersection in trade and tech.2: {len(trade_set.intersection(tech2_set))}')
print(f'intersection rate: {len(trade_set.intersection(tech2_set)) / min(len(trade_set), len(tech2_set))}')

countries in trade centr. dataframe: 253
countries in tech.1 centr. dataframe: 160
intersection in trade and tech.1: 155
intersection rate: 0.96875
 
countries in trade centr. dataframe: 253
countries in tech.2 centr. dataframe: 160
intersection in trade and tech.2: 155
intersection rate: 0.96875


In [7]:
tech1_trade_interc = list(trade_set.intersection(tech1_set))
tech2_trade_interc = list(trade_set.intersection(tech2_set))
trade_years = [2000, 2010, 2020]

In [8]:
# technology 1 and trade 
all_corr1 = []
for trade_year in trade_years:
    year_corr = []
    for centr in centr_names:
        tech_centr_df = pd.read_csv(f'{tech1_centr_path}{centr}.csv', index_col=0)
        trade_centr_df = pd.read_csv(f'{trade_centr_path}{centr}.csv', index_col=0)
        tech_array = tech_centr_df[tech1_trade_interc].rank(axis=1).loc[1963:1990].mean(axis=0)
        trade_array = trade_centr_df[tech1_trade_interc].rank(axis=1).loc[trade_year]
        corr, pvalue = spearmanr(a=tech_array, b=trade_array, nan_policy='omit')
        year_corr.append(add_pvlaue_level(coef=np.round(corr, 3), pvalue=np.round(pvalue, 5)))
    all_corr1.append(year_corr)

In [9]:
tech1_trade = pd.DataFrame(all_corr1).transpose()
tech1_trade.index = list(map(lambda x: centrality_names[x], centr_names))
tech1_trade.columns = ['Technology (F-L, av.1963-1990) & Trade (2000)', 
                        'Technology (F-L, av.1963-1990) & Trade (2010)', 
                        'Technology (F-L, av.1963-1990) & Trade (2020)']

In [10]:
# technology 2 and trade 
all_corr2 = []
for trade_year in trade_years:
    year_corr = []
    for centr in centr_names:
        tech_centr_df = pd.read_csv(f'{tech2_centr_path}{centr}.csv', index_col=0)
        trade_centr_df = pd.read_csv(f'{trade_centr_path}{centr}.csv', index_col=0)
        tech_array = tech_centr_df[tech2_trade_interc].rank(axis=1).loc[1975:1999].mean(axis=0)
        trade_array = trade_centr_df[tech2_trade_interc].rank(axis=1).loc[trade_year]
        corr, pvalue = spearmanr(a=tech_array, b=trade_array, nan_policy='omit')
        year_corr.append(add_pvlaue_level(coef=np.round(corr, 3), pvalue=np.round(pvalue, 5)))
    all_corr2.append(year_corr)

In [11]:
tech2_trade = pd.DataFrame(all_corr2).transpose()
tech2_trade.index = list(map(lambda x: centrality_names[x], centr_names))
tech2_trade.columns = ['Technology (B-L, av.1975-1999) & Trade (2000)', 
                        'Technology (B-L, av.1975-1999) & Trade (2010)', 
                        'Technology (B-L, av.1975-1999) & Trade (2020)']

In [12]:
tech1_trade

Unnamed: 0,"Technology (F-L, av.1963-1990) & Trade (2000)","Technology (F-L, av.1963-1990) & Trade (2010)","Technology (F-L, av.1963-1990) & Trade (2020)"
PageRank Centrality - Unweighted,$0.74^{***}$,$0.702^{***}$,$0.671^{***}$
PageRank Centrality - Weighted,$0.807^{***}$,$0.768^{***}$,$0.729^{***}$
Eigenvector Centrality - Unweighted,$0.731^{***}$,$0.68^{***}$,$0.67^{***}$
Eigenvector Centrality - Weighted,$0.85^{***}$,$0.792^{***}$,$0.753^{***}$
Degree Centrality - Weighted,$0.769^{***}$,$0.738^{***}$,$0.689^{***}$
Closeness Centrality - Weighted,$0.673^{***}$,$0.629^{***}$,$0.621^{***}$
Betweenness Centrality - Unweighted,$0.611^{***}$,$0.601^{***}$,$0.526^{***}$
Betweenness Centrality - Weighted,$-0.137^{*}$,$-0.146^{*}$,$-0.088^{}$


In [13]:
tech2_trade

Unnamed: 0,"Technology (B-L, av.1975-1999) & Trade (2000)","Technology (B-L, av.1975-1999) & Trade (2010)","Technology (B-L, av.1975-1999) & Trade (2020)"
PageRank Centrality - Unweighted,$0.569^{***}$,$0.575^{***}$,$0.6^{***}$
PageRank Centrality - Weighted,$0.685^{***}$,$0.675^{***}$,$0.665^{***}$
Eigenvector Centrality - Unweighted,$0.59^{***}$,$0.584^{***}$,$0.613^{***}$
Eigenvector Centrality - Weighted,$0.71^{***}$,$0.702^{***}$,$0.67^{***}$
Degree Centrality - Weighted,$0.753^{***}$,$0.732^{***}$,$0.736^{***}$
Closeness Centrality - Weighted,$0.566^{***}$,$0.56^{***}$,$0.587^{***}$
Betweenness Centrality - Unweighted,$0.594^{***}$,$0.577^{***}$,$0.606^{***}$
Betweenness Centrality - Weighted,$-0.148^{*}$,$-0.17^{**}$,$-0.031^{}$


In [14]:
tech_trade = pd.concat([tech1_trade, tech2_trade], axis=1)

In [15]:
tech_trade

Unnamed: 0,"Technology (F-L, av.1963-1990) & Trade (2000)","Technology (F-L, av.1963-1990) & Trade (2010)","Technology (F-L, av.1963-1990) & Trade (2020)","Technology (B-L, av.1975-1999) & Trade (2000)","Technology (B-L, av.1975-1999) & Trade (2010)","Technology (B-L, av.1975-1999) & Trade (2020)"
PageRank Centrality - Unweighted,$0.74^{***}$,$0.702^{***}$,$0.671^{***}$,$0.569^{***}$,$0.575^{***}$,$0.6^{***}$
PageRank Centrality - Weighted,$0.807^{***}$,$0.768^{***}$,$0.729^{***}$,$0.685^{***}$,$0.675^{***}$,$0.665^{***}$
Eigenvector Centrality - Unweighted,$0.731^{***}$,$0.68^{***}$,$0.67^{***}$,$0.59^{***}$,$0.584^{***}$,$0.613^{***}$
Eigenvector Centrality - Weighted,$0.85^{***}$,$0.792^{***}$,$0.753^{***}$,$0.71^{***}$,$0.702^{***}$,$0.67^{***}$
Degree Centrality - Weighted,$0.769^{***}$,$0.738^{***}$,$0.689^{***}$,$0.753^{***}$,$0.732^{***}$,$0.736^{***}$
Closeness Centrality - Weighted,$0.673^{***}$,$0.629^{***}$,$0.621^{***}$,$0.566^{***}$,$0.56^{***}$,$0.587^{***}$
Betweenness Centrality - Unweighted,$0.611^{***}$,$0.601^{***}$,$0.526^{***}$,$0.594^{***}$,$0.577^{***}$,$0.606^{***}$
Betweenness Centrality - Weighted,$-0.137^{*}$,$-0.146^{*}$,$-0.088^{}$,$-0.148^{*}$,$-0.17^{**}$,$-0.031^{}$
