### HDR: Neural Network Initial Training

In [1]:
import numpy as np
import pandas as pd
import os
import time

from pprint import pprint

#### Settings

In [2]:
std_X = 'z_score'  # standardization method for input values (X)
std_t = 'max_min'  # standardization method for output values (t)

conn = 'full'  # ['full', 'sparse']

year_s = 2010
year_f = 2016
period = f'{year_s}_{year_f}'

year = 2010  # [year_s, year_f]

#### Set input/output folder

In [3]:
data_in_X = f'../data/HDR_2a_{std_X}_year'
data_in_t = f'../data/HDR_2a_{std_t}_year'

data_mt  =  '../data/HDR_mutual_attributes_and_countries'
data_out = f'../data/HDR_3_nn_1a_init_training//{conn}/' \
            f'{std_X}__{std_t}/{period}/{year}'

if not os.path.exists(data_out):
    os.makedirs(data_out)

print(f'{conn}/{std_X}__{std_t}/{period}/{year}', end='\n\n')

full/z_score__max_min/2010_2016/2010



#### Mutual attributes (2010-2016)

In [4]:
attr = pd.read_csv(f'{data_mt}/attributes_{period}.csv', header=None)
attr = list(attr[0])

print('Total attributes:', len(attr))

Total attributes: 98


#### Mutual countries (2010-2016)

In [5]:
cntr = pd.read_csv(f'{data_mt}/countries_{period}.csv', header=None)
cntr = list(cntr[0])

print('Total countries:', len(cntr))

Total countries: 195


#### Read input data | Input values

In [6]:
X = pd.read_csv(f'{data_in_X}/{year}.csv', index_col='Country')

X = X[attr]      # mutual attributes
X = X.loc[cntr]  # mutual countries

X = X[X.notna()['Human Development Index (HDI)'] == True]
print(f'{len(cntr) - X.shape[0]} countries are missing HDI value.')

X.replace(0, 1e-9)  # 1/(1,000,000,000)
X = X.fillna(0)    # 0 doesn't contribute to weights

X = X.drop('Human Development Index (HDI)', axis=1)  # input values
X[::10]

7 countries are missing HDI value.


Unnamed: 0_level_0,[Demography] Old age dependency ratio (old age (65 and older) per 100 people (ages 15-64)),"[Demography] Population, ages 15–64 (millions)","[Demography] Population, ages 65 and older (millions)","[Demography] Population, total (millions)","[Demography] Population, under age 5 (millions)","[Demography] Population, urban (%)",[Demography] Young age (0-14) dependency ratio (per 100 people ages 15-64),[Education] Education Index,[Education] Expected years of schooling (years),"[Education] Expected years of schooling, female (years)",...,"[Work, employment and vulnerability] Employment in agriculture (% of total employment)","[Work, employment and vulnerability] Employment in services (% of total employment)","[Work, employment and vulnerability] Employment to population ratio (% ages 15 and older)","[Work, employment and vulnerability] Labour force participation rate (% ages 15 and older)","[Work, employment and vulnerability] Labour force participation rate (% ages 15 and older), female","[Work, employment and vulnerability] Labour force participation rate (% ages 15 and older), male","[Work, employment and vulnerability] Unemployment, total (% of labour force)","[Work, employment and vulnerability] Unemployment, youth (% ages 15–24)","[Work, employment and vulnerability] Vulnerable employment (% of total employment)","[Work, employment and vulnerability] Youth not in school or employment (% ages 15-24)"
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AFG,-0.912488,-0.101016,-0.200837,-0.045205,0.142137,-1.365923,1.852404,-1.102317,-0.671824,-1.453208,...,1.30132,-0.975721,-0.851805,-1.025726,-2.227843,1.440743,-0.09737,-0.105321,1.042075,0.0
AZE,-0.435765,-0.187006,-0.219764,-0.194314,-0.238771,-0.10619,-0.801039,0.548634,-0.014424,-0.282629,...,0.292299,-0.071555,0.249787,0.139674,0.647006,-0.802462,-0.447502,-0.231252,0.627644,0.0
BLR,1.033056,-0.183782,-0.144057,-0.190548,-0.2557,0.797254,-1.262688,1.33683,1.121086,0.969618,...,-0.758923,0.242727,0.413304,0.044926,0.406923,-0.49657,-1.147767,-1.207214,-1.352003,0.0
CHE,1.69016,-0.19883,-0.144057,-0.203351,-0.264165,0.750597,-1.21278,1.592461,1.150968,0.806281,...,-1.038993,1.185574,0.533791,0.423918,0.554666,0.047237,-0.574823,-0.758586,-1.100385,-1.118506
CRI,-0.087887,-0.222477,-0.238691,-0.228202,-0.264165,0.670008,-0.626361,0.383539,0.493568,0.506831,...,-0.59395,0.760084,-0.206341,-0.191944,-0.411825,0.364458,0.077697,0.327565,-0.685954,0.294061
ECU,-0.255384,-0.15476,-0.18191,-0.149882,-0.162589,0.288271,-0.106485,0.287678,0.433804,0.0,...,-0.102868,0.194376,0.42191,0.243897,-0.060933,0.749655,-0.686229,-0.664138,0.065202,-0.12074
GAB,-0.397112,-0.24505,-0.257618,-0.250042,-0.281094,1.255339,0.521524,0.111931,0.224631,-0.037624,...,0.480292,-0.255289,-1.574725,-1.262595,-0.676532,-1.788113,1.907935,1.437329,-0.290024,0.0
GTM,-0.525956,-0.167659,-0.210301,-0.152142,-0.137196,-0.318266,0.775223,-0.644311,-0.373006,-0.609303,...,0.24626,-0.308476,0.249787,0.016502,-0.565724,1.32745,-0.749889,-0.955353,0.154009,0.854604
IRN,-0.616147,0.318188,0.083065,0.299705,0.243713,0.623352,-0.767767,0.447447,0.403922,0.125712,...,-0.436651,-0.04738,-1.738243,-1.831083,-2.147815,-0.439924,0.809792,0.831288,0.054101,1.975689
KGZ,-0.616147,-0.217103,-0.248154,-0.221425,-0.247236,-0.873906,-0.235414,0.633845,0.194749,0.044044,...,-0.026137,-0.110236,0.094875,0.149149,0.080655,0.307811,0.029951,-0.105321,-0.038406,0.193164


#### Read input data | Target values

In [7]:
t = pd.read_csv(f'{data_in_t}/{year}.csv', index_col='Country')

t = t.loc[cntr]  # mutual countries
t = t[t.notna()['Human Development Index (HDI)'] == True]
t = t['Human Development Index (HDI)']  # target values

print(f'{len(cntr) - t.shape[0]} countries are missing AI value.')

t[::10]

7 countries are missing AI value.


Country
AFG    0.350133
AZE    0.717507
BLR    0.786472
CHE    0.972149
CRI    0.736074
ECU    0.684350
GAB    0.618037
GTM    0.546419
IRN    0.737401
KGZ    0.579576
LCA    0.705570
MDV    0.625995
MUS    0.729443
NZL    0.928382
PRY    0.631300
SLB    0.408488
SWZ    0.449602
TTO    0.763926
VEN    0.742706
Name: Human Development Index (HDI), dtype: float64

#### Get list and size of all dimensions

In [9]:
dim_list = [file for file in sorted(os.listdir('../data/HDR_0'))
            if os.path.isdir(f'../data/HDR_0/{file}')]
dim_len = dict()

for dim in dim_list:
    dim_len[dim] = X.filter(regex='\[{}\].*'.format(dim)).shape[1]
    print('-', dim, f'({dim_len[dim]})')

- Demography (7)
- Education (20)
- Environmental Sustainability (1)
- Gender (16)
- Health (12)
- Human Security (1)
- Income_composition of resources (6)
- Inequality (9)
- Mobility and communication (4)
- Socio-economic sustainability (6)
- Trade and financial flows (5)
- Work, employment and vulnerability (10)


#### Functions

In [10]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [11]:
def sigmoid_derivative(x):
    return np.multiply(sigmoid(x), (1 - sigmoid(x)))

In [12]:
def clean_weights(W, A):
    if W.shape[0] != A.shape[0] or W.shape[1] != A.shape[1]:
        print(W.shape, '!=', A.shape)
        return

    else:
        W_temp = np.zeros(W.shape)
        c = 0
        
        for i in range(n_dims):
            for j in range(dim_len[dim_list[i]]):
                W_temp[j+c, i] = W[j+c, i]
            c += dim_len[dim_list[i]]

    return W_temp

In [13]:
def get_attr_weights(W):
    W_arr = np.zeros(n_attrs)
    c = 0

    for i in range(n_dims):
        for j in range(dim_len[dim_list[i]]):
            W_arr[j + c] = W[j + c, i]
        c += dim_len[dim_list[i]]

    return W_arr

#### Neural network initial training (sparsely connected)

##### Settings

In [14]:
n_start = 1       # number of random starts
n_iters = 100000  # number of iterations per start
l_rate  = 1       # learning rate  (0 < l_rate <= 1)

n_sampl = X.shape[0]  # number of items/samples
n_input = X.shape[1]  # number of attributes (neurons in input layer)
n_hidden = len(dim_list)  # number of dimensions (neurons in hidden layer)

low = -0.3  # lower bound of uniform distribution
high = 0.3  # upper bound of uniform distribution

#### Iterate

In [43]:
# set output folder
nd_out = f'{data_out}/{n_iters}i_{n_hidden}h_{l_rate}l'

if not os.path.exists(nd_out):
    os.makedirs(nd_out)

# initialize & train the neural network (sparsely-connected)
if conn == 'sparse':  # update only the weights between attributes and their corresponding dimension
    W_x_res = np.zeros((n_input, n_iters))
    B_x_res = np.zeros((n_hidden, n_iters))  # bias neuron for each dimension

# initialize & train the neural network (fully-connected)
if conn == 'full':  # update all the weights between input and hidden layer
    W_x_res = np.zeros((n_input, n_hidden, n_iters))
    B_x_res = np.zeros(n_iters)  # only one bias neuron

W_h_res = np.zeros((n_hidden, n_iters))
B_h_res = np.zeros(n_iters)
y_res = np.zeros((n_sampl, n_iters))

tim = time.time()

for s in range(n_start):
    print(f'===== RANDOM START {s+1} =====')
    
    iter_tim = time.time()
    
    if conn == 'sparse':  # (sparsely-connected network)
        # initialize the biases of the input layer
        B_x = np.zeros((n_hidden, 1))  # biases of the input layer
        
        # initialize the weights between the input and the hidden layer
        W_x = np.zeros(
            (n_input, n_hidden))  # weights between the input and the hidden layer
        A_x = np.zeros(W_x.shape, dtype=bool)  # binary matrix of used weights
        
        c = 0
        for i in range(n_hidden):
            for j in range(dim_len[dim_list[i]]):
                # print(j + c, i)
                W_x[j + c, i] = np.random.uniform(low=low, high=high)
                A_x[j + c, i] = True
            c += dim_len[dim_list[i]]
        
        # initialize the weights between the hidden and the output layer
        W_h = np.zeros(
            (n_hidden, 1))  # weights between the hidden and the output layer
        for i in range(n_hidden):
            if dim_len[dim_list[i]] > 0:
                W_h[i, 0] = np.random.uniform(low=low, high=high)
    
    if conn == 'full':  # (fully-connected network)
        # initialize the bias of the input layer
        B_x = np.zeros((1, 1))  # bias of the input layer
        
        # initialize the weights between the input and the hidden layer
        W_x = np.random.uniform(low=low, high=high, size=(n_input, n_hidden))
        
        # initialize the weights between the hidden and the output layer
        W_h = np.random.uniform(low=low, high=high, size=(
            n_hidden, 1))  # weights between the hidden and the output layer
    
    # initialize the bias of the hidden layer
    B_h = np.zeros((1, 1))  # bias of the hidden layer
    
    # start training...
    for i in range(n_iters):
        # shuffle items
        perm_index = np.random.permutation(n_sampl)
        # X = X.reindex(perm_index)
        # t = t.reindex(perm_index)
        
        y = np.zeros(t.shape)  # create output-values vector
        
        for j in perm_index:
            # feed-forward
            X_i = np.matrix(X.iloc[j]).T  # input layer values
            z_h = np.dot(W_x.T, X_i) + B_x  # non-activated hidden-layer values
            X_h = sigmoid(z_h)  # activated hidden-layer values
            z_y = np.dot(W_h.T, X_h) + B_h  # non-activated output layer values
            y[j] = sigmoid(z_y)  # activated output-layer values
            
            # calculate derivatives
            der_B_h = (y[j] - t[j]) * sigmoid_derivative(
                z_y)  # derivative of the bias of the hidden layer
            der_W_h = np.multiply(der_B_h,
                                  X_h)  # derivative of the weights between the hidden layer and the output layer
            der_B_x = np.multiply(der_B_h, np.multiply(W_h, sigmoid_derivative(
                z_h)))  # derivative of the biases of the input layer
            der_W_x = (
                        X_i * der_B_x.T)  # derivative of the weights between the input layer and the hidden layer
            
            # backpropagate
            B_h -= np.multiply(l_rate,
                               der_B_h)  # backpropagation on the bias of the hidden layer
            W_h -= np.multiply(l_rate,
                               der_W_h)  # backpropagation on the weights between the hidden layer and the output layer
            W_x -= np.multiply(l_rate,
                               der_W_x)  # backpropagation on the weights between the input layer and the hidden layer
            
            if conn == 'sparse':
                B_x -= np.multiply(l_rate,
                                   der_B_x)  # backpropagation on the biases of the input layer
            
            if conn == 'full':
                B_x -= np.multiply(l_rate,
                                   der_B_x).sum()  # backpropagation on the biases of the input layer
            
            # keep only the values of interest
            # (it pushes the neural network the update only the weights between the attributes and their corresponding dimensions)
            if conn == 'sparse':
                W_x = clean_weights(W_x, A_x)
        
        # calculate loss
        C = np.average(((y - t) ** 2) / 2)
        
        # save the weights, biases & outputs of the s-th start in the i-th iteration
        if conn == 'sparse':  # (sparsely-connected network)
            W_x_res[:, i] = get_attr_weights(
                W_x)  # get the weights of the attributes
            B_x_res[:, i] = B_x.flatten()
        
        if conn == 'full':  # (fully-connected network)
            W_x_res[:, :, i] = W_x
            B_x_res[i] = B_x
        
        W_h_res[:, i] = W_h.flatten()
        B_h_res[i] = B_h
        y_res[:, i] = y
        
        # if (i + 1) % max((n_iters // 100), 1) == 0 or i == 0:
        if (i + 1) % 10 == 0 or i == 0:
            print(f'[{year} | {s+1}/{n_start}] Iteration {i+1:>7d} | Loss = {C:.15f}')

    # output np.ndarray(s) to npy format
    now = ''.join([f'{item:02d}' for item in time.localtime()[:]])

    print('Saving W_x_res ...', end=' ')
    np.save(f'{nd_out}/{now}_W_x_res', W_x_res)
    print('Done!')

    print('Saving W_x_res ...', end=' ')
    np.save(f'{nd_out}/{now}_B_x_res', B_x_res)
    print('Done!')

    print('Saving W_x_res ...', end=' ')
    np.save(f'{nd_out}/{now}_W_h_res', W_h_res)
    print('Done!')

    print('Saving W_x_res ...', end=' ')
    np.save(f'{nd_out}/{now}_B_h_res', B_h_res)
    print('Done!')

    print('Saving W_x_res ...', end=' ')
    np.save(f'{nd_out}/{now}_y_res', y_res)
    print('Done!')

    print('All files are successfully saved!')
    
    print(f'\nRANDOM START {s+1}: {n_iters} iterations done in '
          f'{int(time.time() - iter_tim)} seconds.', end='\n\n')

print(f'The whole process took {int(time.time() - tim)} seconds.', end='\n\n')

===== RANDOM START 1 =====
[2010 | 1/1] Iteration       1 | Loss = 0.014943396275699
[2010 | 1/1] Iteration      10 | Loss = 0.007340508459431
[2010 | 1/1] Iteration      20 | Loss = 0.004489135730529
[2010 | 1/1] Iteration      30 | Loss = 0.002690996032641
[2010 | 1/1] Iteration      40 | Loss = 0.001587411364524
[2010 | 1/1] Iteration      50 | Loss = 0.001038509338618
[2010 | 1/1] Iteration      60 | Loss = 0.000681299615456
[2010 | 1/1] Iteration      70 | Loss = 0.000489098563388
[2010 | 1/1] Iteration      80 | Loss = 0.000319347850395
[2010 | 1/1] Iteration      90 | Loss = 0.000240536209370
[2010 | 1/1] Iteration     100 | Loss = 0.000174832431867
Saving W_x_res ... Done!
Saving W_x_res ... Done!
Saving W_x_res ... Done!
Saving W_x_res ... Done!
Saving W_x_res ... Done!
All files are successfully saved!

RANDOM START 1: 100 iterations done in 6 seconds.

The whole process took 6 seconds.



#### Linux run

<code>for x in {1..5}; do (python HDR_4_nn_1a_init_training.py > run_$x.log) & done</code>

<code>for x in {1..5}; do (python HDR_4_nn_1a_init_training.py > run_$(date +%s%N).log) & done</code>