In [35]:
import networkx as nx
import pandas as pd
from variables import *
import numpy as np

node_df = pd.read_csv('../' + DATA_FOLDER + NODE_VARS).rename(columns={'Unnamed: 0': 'Country'}).set_index('Country')
dyad_df = pd.read_csv('../' + DATA_FOLDER + DYAD_VARS).rename(columns={'Unnamed: 0': 'Country'}).set_index('Country')

COUNTRIES = sorted(['USA', 'JPN', 'CHN', 'ITA', 'NTH', 'DEN', 'FRN'])
n = len(COUNTRIES)

In [36]:
# Filter export columns
columns = [f"{c}.exports" for c in COUNTRIES]
drop_columns = list(set(dyad_df.columns.values) - set(columns))
# Filter a reduced selection of countries
ids = [row for row in dyad_df.index if row in COUNTRIES]
drop_ids = list(set(dyad_df.index.values) - set(ids))

dyad_df.drop(labels=drop_columns, axis=1, inplace=True)
dyad_df.drop(labels=drop_ids, axis=0, inplace=True)

# Rename columns to country names only
new_columns = [col.split('.')[0] for col in columns]
dyad_df.rename(columns = dict(zip(columns, new_columns)), inplace=True)

# Sort by country names
#print(dyad_df.head())

In [37]:
# Build sociomatrix
# 1. transform into log scale
dyad_df.iloc[:,:] = np.log(dyad_df + 0.01)
Y_log = dyad_df.to_numpy()
print(dyad_df)
print(Y_log.round(2))

              CHN       DEN       FRN       ITA       JPN       NTH       USA
Country                                                                      
CHN           NaN -0.127833  1.673351  1.363537  3.467609  1.311032  4.035832
DEN     -0.820981       NaN  0.819780  0.576613  0.488580  0.506818  0.788457
FRN      1.223775  0.765468       NaN  3.270329  1.805005  2.343727  2.976040
ITA      1.283708  0.518794  3.268047       NaN  1.646734  1.603420  2.878074
JPN      3.518091  0.039221  2.180417  1.510722       NaN  1.834180  4.771108
NTH      0.329304  1.007958  2.685805  2.452728  0.542324       NaN  1.937302
USA      2.964757  0.737164  3.082369  2.298577  4.178073  2.625393       NaN
[[  nan -0.13  1.67  1.36  3.47  1.31  4.04]
 [-0.82   nan  0.82  0.58  0.49  0.51  0.79]
 [ 1.22  0.77   nan  3.27  1.81  2.34  2.98]
 [ 1.28  0.52  3.27   nan  1.65  1.6   2.88]
 [ 3.52  0.04  2.18  1.51   nan  1.83  4.77]
 [ 0.33  1.01  2.69  2.45  0.54   nan  1.94]
 [ 2.96  0.74  3.08  2.3   4

# ANOVA Decomposition

In [None]:
# To perfomr calculations we need to exclude the diagonal.
mask = ~np.eye(n, dtype=bool)
mu = np.mean(Y_log[mask])
print(f"overall mean: {mu:.2f}")

# Row and column effects
row_effects = np.array([np.mean(Y_log[i, mask[i]]) for i in range(n)]) - mu
column_effects = np.array([np.mean(Y_log.T[i, mask[i]]) for i in range(n)]) - mu
residuals = Y_log - (mu + row_effects[:, np.newaxis] + column_effects[np.newaxis, :])



overall mean: 1.82
[-0.40168628 -1.32800029  0.46683265  0.09395565  0.20325866 -0.11403386
  1.07967348]
