# ANN - PyTorch

## Imports

In [56]:
import pandas as pd
import numpy as np
import pickle
import networkx as nx
import matplotlib.pyplot as plt

import os
import math
import time

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch_geometric.utils import convert

from random import shuffle

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

## ANN model definition

In [57]:
class MLP(nn.Module):
    def __init__(self, in_features, h_sizes, out_size):
        super(MLP, self).__init__()
        self.hidden_linear = nn.ModuleList()
        #self.hidden_dropout = nn.ModuleList()
        self.h_len = len(h_sizes)
        
        #Add initial layer
        self.hidden_linear.append(nn.Linear(in_features, h_sizes[0]))
        #self.hidden_dropout.append(nn.Dropout(p=0.5))
        
        #Add hidden layers        
        for k in range(len(h_sizes)-1):
            self.hidden_linear.append(nn.Linear(h_sizes[k], h_sizes[k+1]))
        #    self.hidden_dropout.append(nn.Dropout(p=0.5))
        
        #Add output layer
        self.hidden_linear.append(nn.Linear(h_sizes[-1], out_size))
        
        
    def forward(self, x):
        y = x
        for i in range(len(self.hidden_linear)-1):
            y = self.hidden_linear[i](y)
        #    y = self.hidden_dropout[i](y)
            y = torch.tanh(y) #F.relu(y)
        
        y = self.hidden_linear[-1](y)
        y = torch.tanh(y)
            
        return y

In [58]:
mlp = MLP(40, [50, 50, 50], 1)

In [60]:
#mlp(torch.rand(1,40))

In [65]:
networks = ['BLA', 'FOS', 'MOD']
for name_WDS in networks:
    database = load_database(name_WDS)
    create_X_and_Y = 

BLA
FOS
MOD


## Data processing

In [64]:
def load_database(name_WDS): 
    N_iter = 5000
    #name_WDS = 'PES'
    dir_path = os.getcwd()+'\{name}_Geom_DB_{num_iter}'.format(name = name_WDS, num_iter = N_iter)
    #Read the Network info
    WDS_info = pd.read_csv('./networks/Exeter/'+name_WDS+'_info.csv', dtype = {'NodeID': str} )
    # Import database
    #database = pickle.load(open(f'{dir_path}/{name_WDS}_dataset.p', 'rb'))
    print(name_WDS)
    return database

In [47]:
database['data'][0]

Data(ID=[71], base_demand=[71], base_head=[71], diameter=[196], edge_index=[2, 196], elevation=[71], length=[196], pos=[71, 2], res_PP=[1], res_Todini=[1], roughness=[196], type=[196], type_1H=[71])

In [48]:
diams = database['data'][0].diameter

In [49]:
a = convert.to_networkx(database['data'][0], edge_attrs=['diameter'], to_undirected=True)

In [50]:
a.edges.data()

EdgeDataView([(0, 1, {'diameter': 0.15}), (0, 2, {'diameter': 0.3}), (1, 3, {'diameter': 0.25}), (1, 58, {'diameter': 0.5}), (2, 3, {'diameter': 0.45}), (2, 4, {'diameter': 0.35000000000000003}), (3, 55, {'diameter': 0.8}), (4, 57, {'diameter': 0.5}), (4, 64, {'diameter': 0.2}), (5, 6, {'diameter': 0.45}), (5, 7, {'diameter': 0.5}), (5, 8, {'diameter': 0.2}), (5, 70, {'diameter': 0.45}), (7, 26, {'diameter': 0.25}), (7, 59, {'diameter': 0.5}), (8, 30, {'diameter': 0.6}), (8, 37, {'diameter': 0.6}), (9, 10, {'diameter': 0.6}), (10, 28, {'diameter': 0.6}), (10, 39, {'diameter': 0.5}), (11, 22, {'diameter': 0.8}), (11, 36, {'diameter': 0.7000000000000001}), (12, 14, {'diameter': 0.8}), (12, 15, {'diameter': 0.6}), (13, 21, {'diameter': 0.35000000000000003}), (13, 68, {'diameter': 0.45}), (14, 21, {'diameter': 0.7000000000000001}), (14, 23, {'diameter': 0.125}), (15, 16, {'diameter': 0.7000000000000001}), (15, 17, {'diameter': 0.45}), (16, 17, {'diameter': 0.3}), (16, 35, {'diameter': 0.3}

In [51]:
diams = []
for i in list(a.edges.data()):
    diams.append(i[2]['diameter'])

#### Data Normalization

In [52]:
avail_diams = list(WDS_info.D.dropna())
UNIT_TO_M = WDS_info['UNITS_TO_M'].dropna().item()

In [53]:
np.array(avail_diams)*UNIT_TO_M

array([0.1  , 0.125, 0.15 , 0.2  , 0.25 , 0.3  , 0.35 , 0.4  , 0.45 ,
       0.5  , 0.6  , 0.7  , 0.8  ])

In [54]:
def norm_diams(ls_diams, avail_diams):
    ##avail_diams = [6, 8, 10, 12, 14] #inches
    min_diam = min(avail_diams)
    max_diam = max(avail_diams)
    n_d = (ls_diams-min_diam)/(max_diam-min_diam)
    return n_d


In [55]:
database['data'][0]['diameter'].numpy()

array([0.15 , 0.3  , 0.15 , 0.25 , 0.5  , 0.3  , 0.45 , 0.35 , 0.25 ,
       0.45 , 0.8  , 0.35 , 0.2  , 0.5  , 0.45 , 0.5  , 0.2  , 0.45 ,
       0.45 , 0.5  , 0.25 , 0.5  , 0.2  , 0.6  , 0.6  , 0.6  , 0.6  ,
       0.6  , 0.5  , 0.7  , 0.8  , 0.8  , 0.6  , 0.35 , 0.45 , 0.8  ,
       0.7  , 0.125, 0.6  , 0.7  , 0.45 , 0.7  , 0.3  , 0.3  , 0.45 ,
       0.3  , 0.7  , 0.7  , 0.7  , 0.2  , 0.3  , 0.2  , 0.25 , 0.7  ,
       0.3  , 0.25 , 0.7  , 0.35 , 0.7  , 0.4  , 0.8  , 0.4  , 0.35 ,
       0.125, 0.3  , 0.15 , 0.7  , 0.7  , 0.3  , 0.15 , 0.6  , 0.25 ,
       0.15 , 0.6  , 0.6  , 0.25 , 0.8  , 0.6  , 0.6  , 0.125, 0.6  ,
       0.5  , 0.5  , 0.6  , 0.25 , 0.45 , 0.8  , 0.5  , 0.8  , 0.35 ,
       0.6  , 0.8  , 0.8  , 0.125, 0.6  , 0.3  , 0.6  , 0.7  , 0.8  ,
       0.6  , 0.6  , 0.6  , 0.8  , 0.45 , 0.5  , 0.8  , 0.5  , 0.2  ,
       0.4  , 0.2  , 0.3  , 0.7  , 0.6  , 0.45 , 0.4  , 0.5  , 0.6  ,
       0.25 , 0.3  , 0.2  , 0.3  , 0.7  , 0.25 , 0.5  , 0.7  , 0.7  ,
       0.25 , 0.7  ,

In [22]:
x_diameters = []
y_res_index = []
for i in range(len(database['data'])):
    x_diameters.append(norm_diams(database['data'][i]['diameter'].numpy(), avail_diams))
    y_res_index.append(database['data'][i].res_PP.numpy())

In [23]:
X_train, X_test, y_train, y_test = train_test_split(x_diameters, y_res_index, random_state=1, test_size=0.2 )
print(len(X_train), len(y_test))

4000 1000


In [24]:
X_train[0]

array([-0.142     , -0.14228571, -0.142     , -0.142     , -0.14228571,
       -0.14228571, -0.142     , -0.14185714, -0.142     , -0.142     ,
       -0.14185714, -0.14185714, -0.1425    , -0.142     , -0.14171429,
       -0.14228571, -0.14267857, -0.14171429, -0.14171429, -0.14228571,
       -0.142     , -0.14221429, -0.14267857, -0.14271429, -0.14221429,
       -0.14228571, -0.14228571, -0.14214286, -0.14228571, -0.142     ,
       -0.14171429, -0.14267857, -0.14228571, -0.14257143, -0.14185714,
       -0.14267857, -0.14221429, -0.14257143, -0.14228571, -0.14221429,
       -0.14171429, -0.14221429, -0.14264286, -0.14171429, -0.14171429,
       -0.14264286, -0.14171429, -0.14221429, -0.14221429, -0.14221429,
       -0.14228571, -0.14221429, -0.14214286, -0.14214286, -0.14228571,
       -0.14214286, -0.14171429, -0.14257143, -0.14221429, -0.142     ,
       -0.14171429, -0.142     , -0.14185714, -0.14257143, -0.14235714,
       -0.14228571, -0.14171429, -0.14171429, -0.14235714, -0.14

In [6]:
X_train, X_test, y_train, y_test = torch.tensor(X_train, dtype = torch.float32), torch.tensor(X_test, dtype = torch.float32), torch.tensor(y_train, dtype = torch.float32), torch.tensor(y_test, dtype = torch.float32)

In [10]:
# # Hyperparameters
batch_size = 50
lr = 0.001
weight_decay = 8e-4
num_epochs = 1500

In [11]:
data_loader_X_train = torch.utils.data.DataLoader(
    X_train,
    shuffle=False,
    batch_size = batch_size
)

data_loader_y_train = torch.utils.data.DataLoader(
    y_train,
    shuffle = False,
    batch_size = batch_size
)

In [12]:
optimizer = torch.optim.Adam(mlp.parameters(), lr=lr, weight_decay=weight_decay, amsgrad = True)

In [13]:
mlp = pickle.load(open('ANN_PyTorch_96.p', 'rb'))

In [14]:
mlp.train()
f_loss = nn.MSELoss() #(preds-y_train[i])**2
start = time.time()
for epoch in range(num_epochs):
    total_loss= 0
#     total_correct =0
    
    iter_y_train = iter(data_loader_y_train)
    iter_X_train = iter(data_loader_X_train)
    
    #Data
    for batch in iter_X_train:
        #Prediction
        preds = mlp(batch)

        #Calculate the loss
        loss = f_loss(preds.reshape(batch_size,1), next(iter_y_train).reshape(batch_size,1))
        
        total_loss += loss
    
    #Backpropagate
    optimizer.zero_grad() #To avoid adding up gradients
    total_loss.backward() #calculate gradients

    #Optimizer step
    optimizer.step() #Update weights
        
    if epoch%50 == 0:
        print("epoch ", epoch, " total loss  ", total_loss )
        print(time.time()-start)
    
    if epoch == num_epochs-1:
        print("epoch ", epoch, " total loss  ", total_loss )
        

epoch  0  total loss   tensor(0.7390, grad_fn=<AddBackward0>)
0.08523917198181152
epoch  50  total loss   tensor(0.4879, grad_fn=<AddBackward0>)
3.970893144607544
epoch  100  total loss   tensor(0.4410, grad_fn=<AddBackward0>)
9.453950643539429
epoch  150  total loss   tensor(0.4335, grad_fn=<AddBackward0>)
15.185543060302734
epoch  200  total loss   tensor(0.4286, grad_fn=<AddBackward0>)
20.716964960098267
epoch  250  total loss   tensor(0.4241, grad_fn=<AddBackward0>)
26.527191638946533
epoch  300  total loss   tensor(0.4193, grad_fn=<AddBackward0>)
32.22769212722778
epoch  350  total loss   tensor(0.4134, grad_fn=<AddBackward0>)
37.95274782180786
epoch  400  total loss   tensor(0.4056, grad_fn=<AddBackward0>)
43.48607516288757
epoch  450  total loss   tensor(0.3948, grad_fn=<AddBackward0>)
49.35438323020935
epoch  500  total loss   tensor(0.3788, grad_fn=<AddBackward0>)
55.406304121017456
epoch  550  total loss   tensor(0.3533, grad_fn=<AddBackward0>)
60.83125352859497
epoch  600  t

In [15]:
num_params = []
i = 0
        
for parameter in mlp.parameters():
    if i%2 == 0:
        num_params.append(parameter.reshape(-1,1).shape[0])
    else:
        num_params[-1] += parameter.reshape(-1,1).shape[0]
    i += 1

print(num_params)

[2050, 2550, 2550, 51]


In [16]:
mlp(X_test[1:10]).t()

tensor([[ 0.4248,  0.0850,  0.3197,  0.3827,  0.3282,  0.3749,  0.4242,  0.4537,
         -0.1273]], grad_fn=<TBackward>)

In [15]:
y_test[1:10]

tensor([ 0.4086,  0.0567,  0.3450,  0.4014,  0.3403,  0.3413,  0.4215,  0.4311,
        -0.1600])

In [17]:
#R squared 
mlp.eval()
r2_score( y_train.detach().numpy() , mlp(X_train).detach().numpy().flatten() )

0.9419716436291272

In [18]:
#R squared
mlp.eval()
r2_score( y_test.detach().numpy() , mlp(X_test).detach().numpy().flatten() )

0.9528152713324181

In [18]:
x_real_values_train = y_train.detach().numpy()
y_pred_values_train = mlp(X_train).detach().numpy().flatten()

x_real_values_test = y_test.detach().numpy()
y_pred_values_test = mlp(X_test).detach().numpy().flatten()

In [19]:
import plotly.graph_objects as go

In [20]:
fig = go.FigureWidget(data=go.Scatter(x=x_real_values_train, y=y_pred_values_train,  line={'dash': 'dash'}, name = 'Train'))

fig.add_trace(go.Scatter(x=x_real_values_test, y=y_pred_values_test,  line={'dash': 'dash'}, name='Test'))
#fig.add_trace(go.Scatter(x=x_axis, y=first_DB_pd['minPrPa'],  line={'dash': 'dash'}, name='Min. Res.'))


fig.update_traces(mode='markers',  marker_size=3)
fig.update_layout(title='Pred. vs Actual - Prasad & Park Resilience Index',
                    xaxis_title="Real Index",
                    yaxis_title="Pred. Index",
                    legend_title="Legend",
                    autosize=False,
                    width=800,
                    height=500,
                    font=dict(
                        #family="Courier New, monospace",
                        size=18,
    ))
#yaxis_zeroline=False, xaxis_zeroline=False)
fig.update_layout(shapes=[
dict(
  type= 'line',
  yref= 'y', y0= -0.30, y1= 1,   # adding a horizontal line at Y = 1
  xref= 'x', x0= -0.30, x1= 1
     ) 
])

fig.update_xaxes(range=[-0.3, 1])
fig.update_yaxes(range=[-0.3, 1])

fig

FigureWidget({
    'data': [{'line': {'dash': 'dash'},
              'marker': {'size': 3},
              'mod…

In [24]:
pickle.dump(mlp, open('ANN_PyTorch_96.p', 'wb'))