In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import statsmodels.api as sm

seed = 100

In [2]:
centrality_names = {
    'betweenness_centr_uw' : 'Betweenness Centrality - Unweighted',
    'betweenness_centr' : 'Betweenness Centrality - Weighted',
    'pagerank_centr_uw' : 'PageRank Centrality - Unweighted',
    'degree_centr' : 'Degree Centrality - Weighted',
    'pagerank_centr' : 'PageRank Centrality - Weighted',
    'eigenv_centr' : 'Eigenvector Centrality - Weighted',
    'eigenv_centr_uw' : 'Eigenvector Centrality - Unweighted',
    'closeness_centr' : 'Closeness Centrality - Weighted' 
}

In [3]:
trade_centr_path = '/Users/koshelev/Documents/lmu/thesis/2-centralities_computation/computed_centralities/trade/'
tech1_centr_path = '/Users/koshelev/Documents/lmu/thesis/2-centralities_computation/computed_centralities/technology1/'
tech2_centr_path = '/Users/koshelev/Documents/lmu/thesis/2-centralities_computation/computed_centralities/technology2/'

In [4]:
def add_pvlaue_level(coef, pvalue):
    if pvalue <= 0.1 and pvalue > 0.05:
        asterisks = '*'
    elif pvalue <= 0.05 and pvalue > 0.01:
        asterisks = '**'
    elif pvalue <= 0.01:
        asterisks = '***'
    else:
        asterisks = ''
    return ('$' + str(coef) + '^{' + asterisks + '}$')

In [5]:
# all computed centrality names
centr_names = []
for filename in os.listdir(trade_centr_path):
    centr_names.append(filename.replace('.csv', ''))
centr_names = sorted(centr_names, reverse=True)
print(centr_names)

['pagerank_centr_uw', 'pagerank_centr', 'eigenv_centr_uw', 'eigenv_centr', 'degree_centr', 'closeness_centr', 'betweenness_centr_uw', 'betweenness_centr']


In [6]:
# check how many countries do we have in different dimension pairs

### technology 1 & trade
trade_centr = pd.read_csv(f'{trade_centr_path}{centr_names[3]}.csv', index_col=0).fillna(method='ffill')
trade_set = set(trade_centr.columns)
tech1_centr = pd.read_csv(f'{tech1_centr_path}{centr_names[3]}.csv', index_col=0).fillna(method='ffill')
tech1_set = set(tech1_centr.columns)
print(f'countries in trade centr. dataframe: {len(trade_set)}')
print(f'countries in tech.1 centr. dataframe: {len(tech1_set)}')
print(f'intersection in trade and tech.1: {len(trade_set.intersection(tech1_set))}')
print(f'intersection rate: {len(trade_set.intersection(tech1_set)) / min(len(trade_set), len(tech1_set))}')
print(' ')

### technology 2 & trade
tech2_centr = pd.read_csv(f'{tech2_centr_path}{centr_names[3]}.csv', index_col=0).fillna(method='ffill')
tech2_set = set(tech2_centr.columns)
print(f'countries in trade centr. dataframe: {len(trade_set)}')
print(f'countries in tech.2 centr. dataframe: {len(tech2_set)}')
print(f'intersection in trade and tech.2: {len(trade_set.intersection(tech2_set))}')
print(f'intersection rate: {len(trade_set.intersection(tech2_set)) / min(len(trade_set), len(tech2_set))}')

countries in trade centr. dataframe: 253
countries in tech.1 centr. dataframe: 160
intersection in trade and tech.1: 155
intersection rate: 0.96875
 
countries in trade centr. dataframe: 253
countries in tech.2 centr. dataframe: 160
intersection in trade and tech.2: 155
intersection rate: 0.96875


In [7]:
tech1_trade_interc = list(trade_set.intersection(tech1_set))
tech2_trade_interc = list(trade_set.intersection(tech2_set))
trade_years = [2000, 2010, 2020]

In [8]:
# technology 1 and trade 
all_models1 = []
for trade_year in trade_years:
    year_coef = []
    for centr in centr_names:
        tech_centr_df = pd.read_csv(f'{tech1_centr_path}{centr}.csv', index_col=0)
        trade_centr_df = pd.read_csv(f'{trade_centr_path}{centr}.csv', index_col=0)
        tech_array = tech_centr_df[tech1_trade_interc].rank(axis=1).loc[1963:1990].mean(axis=0)
        trade_array = trade_centr_df[tech1_trade_interc].rank(axis=1).loc[trade_year]
        # trade_array1990 = trade_centr_df[tech1_trade_interc].rank(axis=1).loc[:1990].mean(axis=0)
        X = pd.DataFrame()
        X['Tech. average centrality'] = tech_array
        # X['Trade centr 1990'] = trade_array1990
        X = sm.add_constant(X)
        model = sm.OLS(endog=trade_array, exog=X, missing='drop')
        results = model.fit()
        coef = results.params['Tech. average centrality']
        pvalue = results.pvalues['Tech. average centrality']
        year_coef.append(add_pvlaue_level(coef=np.round(coef, 3), pvalue=np.round(pvalue, 5)))
    all_models1.append(year_coef)

In [9]:
tech1_trade = pd.DataFrame(all_models1).transpose()
tech1_trade.index = list(map(lambda x: centrality_names[x], centr_names))
tech1_trade.columns = ['Technology (F-L, av.1963-1990) & Trade (2000)', 
                        'Technology (F-L, av.1963-1990) & Trade (2010)', 
                        'Technology (F-L, av.1963-1990) & Trade (2020)']

In [10]:
# technology 2 and trade 
all_models2 = []
for trade_year in trade_years:
    year_coef = []
    for centr in centr_names:
        tech_centr_df = pd.read_csv(f'{tech2_centr_path}{centr}.csv', index_col=0)
        trade_centr_df = pd.read_csv(f'{trade_centr_path}{centr}.csv', index_col=0)
        tech_array = tech_centr_df[tech2_trade_interc].rank(axis=1).loc[1975:1999].mean(axis=0)
        trade_array = trade_centr_df[tech2_trade_interc].rank(axis=1).loc[trade_year]
        X = pd.DataFrame()
        X['Tech. average centrality'] = tech_array
        X = sm.add_constant(X)
        model = sm.OLS(endog=trade_array, exog=X, missing='drop')
        results = model.fit()
        coef = results.params['Tech. average centrality']
        pvalue = results.pvalues['Tech. average centrality']
        year_coef.append(add_pvlaue_level(coef=np.round(coef, 3), pvalue=np.round(pvalue, 5)))
    all_models2.append(year_coef)

In [11]:
tech2_trade = pd.DataFrame(all_models2).transpose()
tech2_trade.index = list(map(lambda x: centrality_names[x], centr_names))
tech2_trade.columns = ['Technology (B-L, av.1975-1999) & Trade (2000)', 
                        'Technology (B-L, av.1975-1999) & Trade (2010)', 
                        'Technology (B-L, av.1975-1999) & Trade (2020)']

In [12]:
tech_trade = pd.concat([tech1_trade, tech2_trade], axis=1)

In [13]:
tech_trade

Unnamed: 0,"Technology (F-L, av.1963-1990) & Trade (2000)","Technology (F-L, av.1963-1990) & Trade (2010)","Technology (F-L, av.1963-1990) & Trade (2020)","Technology (B-L, av.1975-1999) & Trade (2000)","Technology (B-L, av.1975-1999) & Trade (2010)","Technology (B-L, av.1975-1999) & Trade (2020)"
PageRank Centrality - Unweighted,$1.122^{***}$,$1.074^{***}$,$1.035^{***}$,$1.082^{***}$,$1.085^{***}$,$1.115^{***}$
PageRank Centrality - Weighted,$1.168^{***}$,$1.123^{***}$,$1.079^{***}$,$1.236^{***}$,$1.214^{***}$,$1.193^{***}$
Eigenvector Centrality - Unweighted,$1.114^{***}$,$1.047^{***}$,$1.026^{***}$,$1.12^{***}$,$1.105^{***}$,$1.136^{***}$
Eigenvector Centrality - Weighted,$1.207^{***}$,$1.151^{***}$,$1.091^{***}$,$1.278^{***}$,$1.267^{***}$,$1.206^{***}$
Degree Centrality - Weighted,$1.199^{***}$,$1.162^{***}$,$1.092^{***}$,$1.25^{***}$,$1.219^{***}$,$1.205^{***}$
Closeness Centrality - Weighted,$1.112^{***}$,$1.048^{***}$,$1.025^{***}$,$1.076^{***}$,$1.062^{***}$,$1.091^{***}$
Betweenness Centrality - Unweighted,$1.393^{***}$,$1.381^{***}$,$1.29^{***}$,$1.48^{***}$,$1.467^{***}$,$1.41^{***}$
Betweenness Centrality - Weighted,$-0.254^{*}$,$-0.297^{**}$,$-0.184^{}$,$-0.308^{**}$,$-0.337^{**}$,$-0.156^{}$


In [14]:
# technology 1 and trade - with controls
all_models1 = []
for trade_year in trade_years:
    year_coef = []
    for centr in centr_names:
        tech_centr_df = pd.read_csv(f'{tech1_centr_path}{centr}.csv', index_col=0)
        trade_centr_df = pd.read_csv(f'{trade_centr_path}{centr}.csv', index_col=0)
        tech_array = tech_centr_df[tech1_trade_interc].rank(axis=1).loc[1963:1990].mean(axis=0)
        trade_array = trade_centr_df[tech1_trade_interc].rank(axis=1).loc[trade_year]
        trade_array1990 = trade_centr_df[tech1_trade_interc].rank(axis=1).loc[1963:1990].mean(axis=0)
        X = pd.DataFrame()
        X['Tech. average centrality'] = tech_array
        X['Trade average centr by 1990'] = trade_array1990
        X = sm.add_constant(X)
        model = sm.OLS(endog=trade_array, exog=X, missing='drop')
        results = model.fit()
        coef = results.params['Tech. average centrality']
        pvalue = results.pvalues['Tech. average centrality']
        year_coef.append(add_pvlaue_level(coef=np.round(coef, 3), pvalue=np.round(pvalue, 5)))
    all_models1.append(year_coef)

In [15]:
tech1_trade = pd.DataFrame(all_models1).transpose()
tech1_trade.index = list(map(lambda x: centrality_names[x], centr_names))
tech1_trade.columns = ['Technology (F-L, av.1963-1990) & Trade (2000)', 
                        'Technology (F-L, av.1963-1990) & Trade (2010)', 
                        'Technology (F-L, av.1963-1990) & Trade (2020)']

In [16]:
# technology 2 and trade - with Controls
all_models2 = []
for trade_year in trade_years:
    year_coef = []
    for centr in centr_names:
        tech_centr_df = pd.read_csv(f'{tech2_centr_path}{centr}.csv', index_col=0)
        trade_centr_df = pd.read_csv(f'{trade_centr_path}{centr}.csv', index_col=0)
        tech_array = tech_centr_df[tech2_trade_interc].rank(axis=1).loc[1975:1999].mean(axis=0)
        trade_array = trade_centr_df[tech2_trade_interc].rank(axis=1).loc[trade_year]
        trade_array1999 = trade_centr_df[tech1_trade_interc].rank(axis=1).loc[1975:1999].mean(axis=0)
        X = pd.DataFrame()
        X['Tech. average centrality'] = tech_array
        X['Trade average centr by 1999'] = trade_array1999
        X = sm.add_constant(X)
        model = sm.OLS(endog=trade_array, exog=X, missing='drop')
        results = model.fit()
        coef = results.params['Tech. average centrality']
        pvalue = results.pvalues['Tech. average centrality']
        year_coef.append(add_pvlaue_level(coef=np.round(coef, 3), pvalue=np.round(pvalue, 5)))
    all_models2.append(year_coef)

In [17]:
tech2_trade = pd.DataFrame(all_models2).transpose()
tech2_trade.index = list(map(lambda x: centrality_names[x], centr_names))
tech2_trade.columns = ['Technology (B-L, av.1975-1999) & Trade (2000)', 
                        'Technology (B-L, av.1975-1999) & Trade (2010)', 
                        'Technology (B-L, av.1975-1999) & Trade (2020)']

In [18]:
tech_trade_controls = pd.concat([tech1_trade, tech2_trade], axis=1)

In [19]:
tech_trade_controls

Unnamed: 0,"Technology (F-L, av.1963-1990) & Trade (2000)","Technology (F-L, av.1963-1990) & Trade (2010)","Technology (F-L, av.1963-1990) & Trade (2020)","Technology (B-L, av.1975-1999) & Trade (2000)","Technology (B-L, av.1975-1999) & Trade (2010)","Technology (B-L, av.1975-1999) & Trade (2020)"
PageRank Centrality - Unweighted,$0.246^{***}$,$0.195^{*}$,$0.205^{*}$,$0.002^{}$,$0.106^{}$,$0.285^{***}$
PageRank Centrality - Weighted,$0.387^{***}$,$0.22^{**}$,$0.225^{**}$,$0.014^{}$,$-0.0^{}$,$0.049^{}$
Eigenvector Centrality - Unweighted,$0.212^{***}$,$0.142^{*}$,$0.171^{*}$,$0.046^{}$,$0.114^{*}$,$0.274^{***}$
Eigenvector Centrality - Weighted,$0.413^{***}$,$0.227^{***}$,$0.225^{**}$,$0.062^{}$,$0.112^{*}$,$0.106^{}$
Degree Centrality - Weighted,$0.607^{***}$,$0.54^{***}$,$0.532^{***}$,$0.185^{***}$,$0.156^{*}$,$0.314^{***}$
Closeness Centrality - Weighted,$0.229^{***}$,$0.158^{*}$,$0.178^{**}$,$0.062^{*}$,$0.114^{*}$,$0.255^{***}$
Betweenness Centrality - Unweighted,$0.985^{***}$,$1.038^{***}$,$0.922^{***}$,$0.041^{}$,$0.103^{}$,$0.224^{}$
Betweenness Centrality - Weighted,$-0.434^{**}$,$-0.379^{**}$,$-0.183^{}$,$-0.529^{***}$,$-0.471^{***}$,$-0.26^{*}$
