### Notebook 6 - Summary: correlations of node centralities' ranks in technology graphs and GDPpc growth

In [92]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from scipy.stats import pearsonr, spearmanr, ttest_ind, mannwhitneyu

seed = 100

In [2]:
centrality_names = {
    'betweenness_centr_uw' : 'Betweenness Centrality - Unweighted',
    'betweenness_centr' : 'Betweenness Centrality - Weighted',
    'pagerank_centr_uw' : 'PageRank Centrality - Unweighted',
    'degree_centr' : 'Degree Centrality - Weighted',
    'pagerank_centr' : 'PageRank Centrality - Weighted',
    'eigenv_centr' : 'Eigenvector Centrality - Weighted',
    'eigenv_centr_uw' : 'Eigenvector Centrality - Unweighted',
    'closeness_centr' : 'Closeness Centrality - Weighted' 
}

In [3]:
def add_pvlaue_level(coef, pvalue):
    if pvalue <= 0.1 and pvalue > 0.05:
        asterisks = '*'
    elif pvalue <= 0.05 and pvalue > 0.01:
        asterisks = '**'
    elif pvalue <= 0.01:
        asterisks = '***'
    else:
        asterisks = ''
    return ('$' + str(coef) + '^{' + asterisks + '}$')

In [5]:
tech1_centr_path = '/Users/koshelev/Documents/lmu/thesis/2-centralities_computation/computed_centralities/technology1/'
tech2_centr_path = '/Users/koshelev/Documents/lmu/thesis/2-centralities_computation/computed_centralities/technology2/'

In [6]:
# all computed centrality names
centr_names = []
for filename in os.listdir(tech1_centr_path):
    centr_names.append(filename.replace('.csv', ''))
centr_names = sorted(centr_names, reverse=True)
print(centr_names)

['pagerank_centr_uw', 'pagerank_centr', 'eigenv_centr_uw', 'eigenv_centr', 'degree_centr', 'closeness_centr', 'betweenness_centr_uw', 'betweenness_centr']


In [7]:
# GDP p.c. data from PWT10
pwt = pd.read_csv('data/pwt10.csv')[['countrycode', 'year', 'rgdpo', 'pop']]
pwt['gdppc'] = pwt['rgdpo'] / pwt['pop']
pwt.drop(columns=['rgdpo', 'pop'], inplace=True)
pwt = pwt.pivot(index='year', columns='countrycode', values='gdppc')
pwt.sort_values(by='year', inplace=True)

In [8]:
# check how many countries do we have in different dimension pairs

### technology 1 & GDPpc
gdp_set = set(pwt.columns)
tech1_centr = pd.read_csv(f'{tech1_centr_path}{centr_names[3]}.csv', index_col=0).fillna(method='ffill')
tech1_set = set(tech1_centr.columns)
print(f'countries in GDPpc dataframe: {len(gdp_set)}')
print(f'countries in tech.1 centr. dataframe: {len(tech1_set)}')
print(f'intersection in GDPpc and tech.1: {len(gdp_set.intersection(tech1_set))}')
print(f'intersection rate: {len(gdp_set.intersection(tech1_set)) / min(len(gdp_set), len(tech1_set))}')
print(' ')
### technology 2 & GDPpc
tech2_centr = pd.read_csv(f'{tech2_centr_path}{centr_names[3]}.csv', index_col=0).fillna(method='ffill')
tech2_set = set(tech2_centr.columns)
print(f'countries in GDPpc dataframe: {len(gdp_set)}')
print(f'countries in tech.2 centr. dataframe: {len(tech2_set)}')
print(f'intersection in GDPpc and tech.2: {len(gdp_set.intersection(tech2_set))}')
print(f'intersection rate: {len(gdp_set.intersection(tech2_set)) / min(len(gdp_set), len(tech2_set))}')

countries in GDPpc dataframe: 183
countries in tech.1 centr. dataframe: 160
intersection in GDPpc and tech.1: 137
intersection rate: 0.85625
 
countries in GDPpc dataframe: 183
countries in tech.2 centr. dataframe: 160
intersection in GDPpc and tech.2: 137
intersection rate: 0.85625


In [9]:
tech1_years = [1963, 1975, 1990]
tech2_years = [1975, 1990, 1999]

In [13]:
tech1_gdp_interc = list(gdp_set.intersection(tech1_set))
tech2_gdp_interc = list(gdp_set.intersection(tech2_set))

In [103]:
# technology 1 and GDPpc growth 
n = 20
all_corr1 = []
for tech1_year in tech1_years:
    year_corr = []
    for centr in centr_names:
        tech_centr_df = pd.read_csv(f'{tech1_centr_path}{centr}.csv', index_col=0)
        tech_array = tech_centr_df[tech1_trade_interc].rank(axis=1).loc[tech1_year]
        gdppc_array = pwt[tech1_trade_interc].loc[2019] - pwt[tech1_trade_interc].loc[tech1_year+1]
        common_df = pd.concat([tech_array, gdppc_array], axis=1)
        common_df.columns = ['centr_rank', 'gdppc_change']
        common_df = common_df.sort_values(by='centr_rank', ascending=False).dropna()
        top_n_gdppc_gr = common_df['gdppc_change'].iloc[:n].values
        bottom_n_gdppc_gr = common_df['gdppc_change'].iloc[-n:].values
        diff = round(np.mean(top_n_gdppc_gr) - np.mean(bottom_n_gdppc_gr), 3)
        stat, pvalue = mannwhitneyu(top_n_gdppc_gr, 
                                    bottom_n_gdppc_gr, 
                                    nan_policy='omit', 
                                    #random_state=seed, 
                                    alternative='two-sided')
        year_corr.append(add_pvlaue_level(coef=diff, pvalue=np.round(pvalue, 5)))
    all_corr1.append(year_corr)

In [104]:
tech1_gdppc = pd.DataFrame(all_corr1).transpose()
tech1_gdppc.index = list(map(lambda x: centrality_names[x], centr_names))
tech1_gdppc.columns = ['Technology (F-L, 1963) & GDP p.c. growth (1964-2019)', 
                        'Technology (F-L, 1975) & GDP p.c. growth (1976-2019)', 
                        'Technology (F-L, 1990) & GDP p.c. growth (1991-2019)']

In [105]:
tech1_gdppc

Unnamed: 0,"Technology (F-L, 1963) & GDP p.c. growth (1964-2019)","Technology (F-L, 1975) & GDP p.c. growth (1976-2019)","Technology (F-L, 1990) & GDP p.c. growth (1991-2019)"
PageRank Centrality - Unweighted,$26418.337^{***}$,$26376.291^{***}$,$28756.926^{***}$
PageRank Centrality - Weighted,$28784.11^{***}$,$19029.496^{***}$,$29590.126^{***}$
Eigenvector Centrality - Unweighted,$26231.071^{***}$,$26933.45^{***}$,$28308.583^{***}$
Eigenvector Centrality - Weighted,$28029.612^{***}$,$19870.373^{***}$,$28961.533^{***}$
Degree Centrality - Weighted,$25197.941^{***}$,$26738.932^{***}$,$14337.238^{***}$
Closeness Centrality - Weighted,$28022.435^{***}$,$22883.529^{***}$,$29037.289^{***}$
Betweenness Centrality - Unweighted,$13463.255^{***}$,$14206.485^{***}$,$11517.212^{***}$
Betweenness Centrality - Weighted,$18653.469^{***}$,$16859.562^{***}$,$12871.297^{***}$


In [97]:
# technology 2 and GDPpc growth 
n = 20
all_corr2 = []
for tech2_year in tech2_years:
    year_corr = []
    for centr in centr_names:
        tech_centr_df = pd.read_csv(f'{tech2_centr_path}{centr}.csv', index_col=0)
        tech_array = tech_centr_df[tech2_trade_interc].rank(axis=1).loc[tech2_year]
        gdppc_array = pwt[tech2_trade_interc].loc[2019] - pwt[tech2_trade_interc].loc[tech2_year+1]
        common_df = pd.concat([tech_array, gdppc_array], axis=1)
        common_df.columns = ['centr_rank', 'gdppc_change']
        common_df = common_df.sort_values(by='centr_rank', ascending=False).dropna()
        top_n_gdppc_gr = common_df['gdppc_change'].iloc[:n].values
        bottom_n_gdppc_gr = common_df['gdppc_change'].iloc[-n:].values
        diff = round(np.mean(top_n_gdppc_gr) - np.mean(bottom_n_gdppc_gr), 3)
        stat, pvalue = mannwhitneyu(top_n_gdppc_gr, 
                                  bottom_n_gdppc_gr, 
                                 nan_policy='omit', 
                                 #random_state=seed, 
                                 alternative='two-sided')
        year_corr.append(add_pvlaue_level(coef=diff, pvalue=np.round(pvalue, 5)))
    all_corr2.append(year_corr)

In [98]:
tech2_gdppc = pd.DataFrame(all_corr2).transpose()
tech2_gdppc.index = list(map(lambda x: centrality_names[x], centr_names))
tech2_gdppc.columns = ['Technology (F-L, 1975) & GDP p.c. growth (1976-2019)', 
                        'Technology (F-L, 1990) & GDP p.c. growth (1991-2019)', 
                        'Technology (F-L, 1999) & GDP p.c. growth (2000-2019)']

In [99]:
tech2_gdppc

Unnamed: 0,"Technology (F-L, 1975) & GDP p.c. growth (1976-2019)","Technology (F-L, 1990) & GDP p.c. growth (1991-2019)","Technology (F-L, 1999) & GDP p.c. growth (2000-2019)"
PageRank Centrality - Unweighted,$24910.399^{***}$,$17302.692^{***}$,$6073.263^{***}$
PageRank Centrality - Weighted,$25947.365^{***}$,$18373.289^{***}$,$6461.278^{***}$
Eigenvector Centrality - Unweighted,$24910.399^{***}$,$18088.59^{***}$,$6714.278^{***}$
Eigenvector Centrality - Weighted,$25263.334^{***}$,$18373.289^{***}$,$6699.229^{***}$
Degree Centrality - Weighted,$24233.485^{***}$,$18726.03^{***}$,$5530.187^{***}$
Closeness Centrality - Weighted,$26258.314^{***}$,$18316.811^{***}$,$7449.129^{***}$
Betweenness Centrality - Unweighted,$13656.467^{***}$,$13831.501^{***}$,$8167.12^{***}$
Betweenness Centrality - Weighted,$32286.636^{***}$,$8625.158^{***}$,$5746.071^{***}$
