In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import statsmodels.api as sm

seed = 100

In [2]:
centrality_names = {
    'betweenness_centr_uw' : 'Betweenness Centrality - Unweighted',
    'betweenness_centr' : 'Betweenness Centrality - Weighted',
    'pagerank_centr_uw' : 'PageRank Centrality - Unweighted',
    'degree_centr' : 'Degree Centrality - Weighted',
    'pagerank_centr' : 'PageRank Centrality - Weighted',
    'eigenv_centr' : 'Eigenvector Centrality - Weighted',
    'eigenv_centr_uw' : 'Eigenvector Centrality - Unweighted',
    'closeness_centr' : 'Closeness Centrality - Weighted' 
}

In [3]:
def add_pvlaue_level(coef, pvalue):
    if pvalue <= 0.1 and pvalue > 0.05:
        asterisks = '*'
    elif pvalue <= 0.05 and pvalue > 0.01:
        asterisks = '**'
    elif pvalue <= 0.01:
        asterisks = '***'
    else:
        asterisks = ''
    return ('$' + str(coef) + '^{' + asterisks + '}$')

In [4]:
tech1_centr_path = '/Users/koshelev/Documents/lmu/thesis/2-centralities_computation/computed_centralities/technology1/'
tech2_centr_path = '/Users/koshelev/Documents/lmu/thesis/2-centralities_computation/computed_centralities/technology2/'

In [5]:
# all computed centrality names
centr_names = []
for filename in os.listdir(tech1_centr_path):
    centr_names.append(filename.replace('.csv', ''))
centr_names = sorted(centr_names, reverse=True)
print(centr_names)

['pagerank_centr_uw', 'pagerank_centr', 'eigenv_centr_uw', 'eigenv_centr', 'degree_centr', 'closeness_centr', 'betweenness_centr_uw', 'betweenness_centr']


In [6]:
# GDP p.c. data from PWT10
cols = ['countrycode', 'year', 'rgdpo', 'pop', 'hc', 'ctfp', 'cn']
pwt = pd.read_csv('/Users/koshelev/Documents/lmu/thesis/5-centrality_correlations/data/pwt10.csv')[cols]
pwt['gdppc'] = pwt['rgdpo'] / pwt['pop']
pwt.drop(columns=['rgdpo', 'pop'], inplace=True)
# GDP p.c. 
gdppc_df = pwt.pivot(index='year', columns='countrycode', values='gdppc')
gdppc_df.sort_values(by='year', inplace=True)
# PWT human capital index
hc_df = pwt.pivot(index='year', columns='countrycode', values='hc')
hc_df.sort_values(by='year', inplace=True)
# TFP
tfp_df = pwt.pivot(index='year', columns='countrycode', values='ctfp')
tfp_df.sort_values(by='year', inplace=True)
# Capital stock
cap_df = pwt.pivot(index='year', columns='countrycode', values='cn')
cap_df.sort_values(by='year', inplace=True)

In [7]:
# check how many countries do we have in different dimension pairs

### technology 1 & GDPpc
gdp_set = set(gdppc_df.columns)
tech1_centr = pd.read_csv(f'{tech1_centr_path}{centr_names[3]}.csv', index_col=0).fillna(method='ffill')
tech1_set = set(tech1_centr.columns)
print(f'countries in GDPpc dataframe: {len(gdp_set)}')
print(f'countries in tech.1 centr. dataframe: {len(tech1_set)}')
print(f'intersection in GDPpc and tech.1: {len(gdp_set.intersection(tech1_set))}')
print(f'intersection rate: {len(gdp_set.intersection(tech1_set)) / min(len(gdp_set), len(tech1_set))}')
print(' ')
### technology 2 & GDPpc
tech2_centr = pd.read_csv(f'{tech2_centr_path}{centr_names[3]}.csv', index_col=0).fillna(method='ffill')
tech2_set = set(tech2_centr.columns)
print(f'countries in GDPpc dataframe: {len(gdp_set)}')
print(f'countries in tech.2 centr. dataframe: {len(tech2_set)}')
print(f'intersection in GDPpc and tech.2: {len(gdp_set.intersection(tech2_set))}')
print(f'intersection rate: {len(gdp_set.intersection(tech2_set)) / min(len(gdp_set), len(tech2_set))}')

countries in GDPpc dataframe: 183
countries in tech.1 centr. dataframe: 160
intersection in GDPpc and tech.1: 137
intersection rate: 0.85625
 
countries in GDPpc dataframe: 183
countries in tech.2 centr. dataframe: 160
intersection in GDPpc and tech.2: 137
intersection rate: 0.85625


In [8]:
tech1_years = [1963, 1975, 1990]
tech2_years = [1975, 1990, 1999]

tech1_gdp_interc = list(gdp_set.intersection(tech1_set))
tech2_gdp_interc = list(gdp_set.intersection(tech2_set))

In [9]:
# technology 1 and GDP p.c. - no controls 
all_models1 = []
for tech_year in tech1_years:
    year_coef = []
    for centr in centr_names:
        tech_centr_df = pd.read_csv(f'{tech1_centr_path}{centr}.csv', index_col=0)
        tech_array = tech_centr_df[tech1_gdp_interc].rank(axis=1).loc[tech_year]
        gdppc_array = gdppc_df[tech1_gdp_interc].loc[2019] - gdppc_df[tech1_gdp_interc].loc[tech_year+1]
        hc_array = hc_df[tech1_gdp_interc].loc[tech_year]
        cap_array = cap_df[tech1_gdp_interc].loc[tech_year]
        init_gdppc_array = gdppc_df[tech1_gdp_interc].loc[tech_year]
        X = pd.DataFrame()
        X['Tech. centrality'] = tech_array
        #X['Initial GDP p.c.'] = init_gdppc_array
        #X['HC index'] = hc_array
        #X['Capital Stock'] = cap_array
        X = sm.add_constant(X)
        model = sm.OLS(endog=gdppc_array, exog=X, missing='drop')
        results = model.fit()
        coef = results.params['Tech. centrality']
        pvalue = results.pvalues['Tech. centrality']
        year_coef.append(add_pvlaue_level(coef=np.round(coef, 3), pvalue=np.round(pvalue, 5)))
    all_models1.append(year_coef)

In [10]:
tech1_gdppc = pd.DataFrame(all_models1).transpose()
tech1_gdppc.index = list(map(lambda x: centrality_names[x], centr_names))
tech1_gdppc.columns = ['Technology (F-L, 1963) & GDP p.c. growth (1964-2019)', 
                        'Technology (F-L, 1975) & GDP p.c. growth (1976-2019)', 
                        'Technology (F-L, 1990) & GDP p.c. growth (1991-2019)']

In [11]:
# technology 2 and GDP p.c. - no controls 
all_models2 = []
for tech_year in tech2_years:
    year_coef = []
    for centr in centr_names:
        tech_centr_df = pd.read_csv(f'{tech2_centr_path}{centr}.csv', index_col=0)
        tech_array = tech_centr_df[tech2_gdp_interc].rank(axis=1).loc[tech_year]
        gdppc_array = gdppc_df[tech2_gdp_interc].loc[2019] - gdppc_df[tech2_gdp_interc].loc[tech_year+1]
        hc_array = hc_df[tech2_gdp_interc].loc[tech_year]
        cap_array = cap_df[tech2_gdp_interc].loc[tech_year]
        init_gdppc_array = gdppc_df[tech2_gdp_interc].loc[tech_year]
        X = pd.DataFrame()
        X['Tech. centrality'] = tech_array
        #X['Initial GDP p.c.'] = init_gdppc_array
        #X['HC index'] = hc_array
        #X['Capital Stock'] = cap_array
        X = sm.add_constant(X)
        model = sm.OLS(endog=gdppc_array, exog=X, missing='drop')
        results = model.fit()
        coef = results.params['Tech. centrality']
        pvalue = results.pvalues['Tech. centrality']
        year_coef.append(add_pvlaue_level(coef=np.round(coef, 3), pvalue=np.round(pvalue, 5)))
    all_models2.append(year_coef)

In [12]:
tech2_gdppc = pd.DataFrame(all_models2).transpose()
tech2_gdppc.index = list(map(lambda x: centrality_names[x], centr_names))
tech2_gdppc.columns = ['Technology (B-L, 1975) & GDP p.c. growth (1976-2019)', 
                        'Technology (B-L, 1990) & GDP p.c. growth (1991-2019)', 
                        'Technology (B-L, 1999) & GDP p.c. growth (2000-2019)']

In [13]:
tech_gdppc_no_controls = pd.concat([tech1_gdppc, tech2_gdppc], axis=1)

In [14]:
tech_gdppc_no_controls

Unnamed: 0,"Technology (F-L, 1963) & GDP p.c. growth (1964-2019)","Technology (F-L, 1975) & GDP p.c. growth (1976-2019)","Technology (F-L, 1990) & GDP p.c. growth (1991-2019)","Technology (B-L, 1975) & GDP p.c. growth (1976-2019)","Technology (B-L, 1990) & GDP p.c. growth (1991-2019)","Technology (B-L, 1999) & GDP p.c. growth (2000-2019)"
PageRank Centrality - Unweighted,$416.675^{***}$,$333.399^{**}$,$360.089^{***}$,$431.242^{***}$,$279.217^{**}$,$95.25^{**}$
PageRank Centrality - Weighted,$421.233^{***}$,$325.112^{**}$,$355.299^{***}$,$442.177^{***}$,$301.976^{***}$,$73.769^{*}$
Eigenvector Centrality - Unweighted,$422.165^{***}$,$322.096^{**}$,$365.662^{***}$,$433.529^{***}$,$278.72^{**}$,$99.309^{**}$
Eigenvector Centrality - Weighted,$449.222^{***}$,$323.305^{**}$,$358.03^{***}$,$479.014^{***}$,$302.193^{***}$,$96.965^{**}$
Degree Centrality - Weighted,$394.994^{***}$,$350.709^{**}$,$262.49^{**}$,$544.954^{***}$,$259.639^{**}$,$87.621^{**}$
Closeness Centrality - Weighted,$392.699^{***}$,$309.924^{**}$,$368.681^{***}$,$371.988^{**}$,$262.273^{**}$,$103.894^{**}$
Betweenness Centrality - Unweighted,$276.609^{***}$,$439.863^{***}$,$306.466^{**}$,$407.374^{**}$,$366.043^{***}$,$116.277^{**}$
Betweenness Centrality - Weighted,$219.309^{**}$,$316.293^{**}$,$206.413^{*}$,$607.463^{***}$,$104.485^{}$,$55.835^{}$


In [15]:
# technology 1 and GDP p.c. - with controls 
all_models1 = []
for tech_year in tech1_years:
    year_coef = []
    for centr in centr_names:
        tech_centr_df = pd.read_csv(f'{tech1_centr_path}{centr}.csv', index_col=0)
        tech_array = tech_centr_df[tech1_gdp_interc].rank(axis=1).loc[tech_year]
        gdppc_array = gdppc_df[tech1_gdp_interc].loc[2019] - gdppc_df[tech1_gdp_interc].loc[tech_year+1]
        hc_array = hc_df[tech1_gdp_interc].loc[tech_year]
        cap_array = cap_df[tech1_gdp_interc].loc[tech_year]
        init_gdppc_array = gdppc_df[tech1_gdp_interc].loc[tech_year]
        X = pd.DataFrame()
        X['Tech. centrality'] = tech_array
        X['Initial GDP p.c.'] = init_gdppc_array
        X['HC index'] = hc_array
        X['Capital Stock'] = cap_array
        X = sm.add_constant(X)
        model = sm.OLS(endog=gdppc_array, exog=X, missing='drop')
        results = model.fit()
        coef = results.params['Tech. centrality']
        pvalue = results.pvalues['Tech. centrality']
        year_coef.append(add_pvlaue_level(coef=np.round(coef, 3), pvalue=np.round(pvalue, 5)))
    all_models1.append(year_coef)

In [16]:
tech1_gdppc = pd.DataFrame(all_models1).transpose()
tech1_gdppc.index = list(map(lambda x: centrality_names[x], centr_names))
tech1_gdppc.columns = ['Technology (F-L, 1963) & GDP p.c. growth (1964-2019)', 
                        'Technology (F-L, 1975) & GDP p.c. growth (1976-2019)', 
                        'Technology (F-L, 1990) & GDP p.c. growth (1991-2019)']

In [17]:
# technology 2 and GDP p.c. - with controls 
all_models2 = []
for tech_year in tech2_years:
    year_coef = []
    for centr in centr_names:
        tech_centr_df = pd.read_csv(f'{tech2_centr_path}{centr}.csv', index_col=0)
        tech_array = tech_centr_df[tech2_gdp_interc].rank(axis=1).loc[tech_year]
        gdppc_array = gdppc_df[tech2_gdp_interc].loc[2019] - gdppc_df[tech2_gdp_interc].loc[tech_year+1]
        hc_array = hc_df[tech2_gdp_interc].loc[tech_year]
        cap_array = cap_df[tech2_gdp_interc].loc[tech_year]
        init_gdppc_array = gdppc_df[tech2_gdp_interc].loc[tech_year]
        X = pd.DataFrame()
        X['Tech. centrality'] = tech_array
        X['Initial GDP p.c.'] = init_gdppc_array
        X['HC index'] = hc_array
        X['Capital Stock'] = cap_array
        X = sm.add_constant(X)
        model = sm.OLS(endog=gdppc_array, exog=X, missing='drop')
        results = model.fit()
        coef = results.params['Tech. centrality']
        pvalue = results.pvalues['Tech. centrality']
        year_coef.append(add_pvlaue_level(coef=np.round(coef, 3), pvalue=np.round(pvalue, 5)))
    all_models2.append(year_coef)

In [18]:
tech2_gdppc = pd.DataFrame(all_models2).transpose()
tech2_gdppc.index = list(map(lambda x: centrality_names[x], centr_names))
tech2_gdppc.columns = ['Technology (B-L, 1975) & GDP p.c. growth (1976-2019)', 
                        'Technology (B-L, 1990) & GDP p.c. growth (1991-2019)', 
                        'Technology (B-L, 1999) & GDP p.c. growth (2000-2019)']

In [19]:
tech_gdppc_controls = pd.concat([tech1_gdppc, tech2_gdppc], axis=1)

In [20]:
tech_gdppc_controls

Unnamed: 0,"Technology (F-L, 1963) & GDP p.c. growth (1964-2019)","Technology (F-L, 1975) & GDP p.c. growth (1976-2019)","Technology (F-L, 1990) & GDP p.c. growth (1991-2019)","Technology (B-L, 1975) & GDP p.c. growth (1976-2019)","Technology (B-L, 1990) & GDP p.c. growth (1991-2019)","Technology (B-L, 1999) & GDP p.c. growth (2000-2019)"
PageRank Centrality - Unweighted,$191.833^{*}$,$156.157^{*}$,$154.037^{**}$,$154.344^{}$,$191.686^{**}$,$17.109^{}$
PageRank Centrality - Weighted,$209.009^{**}$,$123.603^{}$,$148.549^{**}$,$160.493^{*}$,$219.192^{**}$,$16.91^{}$
Eigenvector Centrality - Unweighted,$202.272^{*}$,$166.834^{**}$,$171.242^{**}$,$172.362^{*}$,$197.733^{**}$,$14.774^{}$
Eigenvector Centrality - Weighted,$262.198^{**}$,$135.564^{}$,$189.341^{***}$,$184.9^{*}$,$212.573^{**}$,$6.643^{}$
Degree Centrality - Weighted,$89.482^{}$,$122.961^{}$,$149.907^{*}$,$181.031^{*}$,$167.64^{*}$,$16.011^{}$
Closeness Centrality - Weighted,$191.613^{**}$,$171.945^{**}$,$169.488^{**}$,$174.597^{*}$,$200.275^{**}$,$26.413^{}$
Betweenness Centrality - Unweighted,$-256.045^{**}$,$24.273^{}$,$171.475^{**}$,$-146.833^{}$,$108.779^{}$,$52.573^{}$
Betweenness Centrality - Weighted,$-168.965^{*}$,$-10.445^{}$,$116.781^{*}$,$270.728^{***}$,$1.916^{}$,$16.526^{}$
