#Run this before starting
!python -c "import torch; print(torch.__version__)"
!python -c "import torch; print(torch.version.cuda)"


In [172]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from torch.nn import Parameter
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.utils import remove_self_loops, add_self_loops, softmax
import torch.optim as optim
from torch_geometric.nn.conv import feast_conv


In [173]:
import datetime
versj = 1
today = datetime.date.today().strftime("%b-%d")
print(today)

Apr-20


In [174]:
if torch.cuda.is_available():
    device_name = torch.device("cuda")
else:
    device_name = torch.device('cpu')

print("Using {}.".format(device_name))

Using cpu.


In [175]:
#data

import numpy as np
import torch
import random
from torch_geometric.data import Data, DataLoader

#Import data function
def get_data(filepath):
    data = np.loadtxt(filepath)
    x = data[:, :20]
    z = data[:, 20:40]
    m = data[:, 40:60]

    dataX = []
    for i in range(x.shape[0]):
        dataXs = np.column_stack((np.arange(20), x[i], z[i]))
        dataX.append(dataXs)

    dataY = m
    zipped = list(zip(dataX, dataY))
    np.random.shuffle(zipped)
    dataX, dataY = zip(*zipped)

    dataX = np.array(dataX)
    dataY = np.array(dataY)

    dataEdgeIndex = np.column_stack((np.arange(20)[:-1], np.arange(20)[1:]))
    dataEdgeIndex = np.vstack((dataEdgeIndex, dataEdgeIndex[:, ::-1])).T

    dataset = [Data(x=torch.from_numpy(x).float(), edge_index=torch.from_numpy(dataEdgeIndex).long(), y=torch.from_numpy(y).float()) for x, y in zip(dataX, dataY)]

    return dataset


In [176]:


#Getting data
dataset = get_data("data1.txt")
print(len(dataset))
#Splitting dataset
datasetRun = dataset[:int(len(dataset)*0.90)]
datasetTest = dataset[int(len(dataset)*0.90):]

train_loader = datasetRun[:int(len(datasetRun)*0.75)]
test_loader = datasetRun[int(len(datasetRun)*0.75):]

train_loader = train_loader[:2000]
test_loader = test_loader[:500]


#Checks

print('train_loader size: ', len(train_loader))
print('test_loader size: ', len(test_loader))


# one data object from train_loader:
graph = train_loader[0]
print(graph.x.shape)
print(graph.edge_index.shape)
print(graph.y.shape)

7678
train_loader size:  2000
test_loader size:  500
torch.Size([20, 3])
torch.Size([2, 38])
torch.Size([20])


In [177]:
class ArchNN(torch.nn.Module):
    def __init__(self, in_channels, num_classes, heads, t_inv = True):
        super(ArchNN, self).__init__()
        self.fc0 = nn.Linear(in_channels, 16)
        #self.pool = nn.MaxPool1d(kernel_size=2)  # Add a pooling layer
        self.conv1 = feast_conv.FeaStConv(16, 32, heads=heads, t_inv=t_inv)
        self.conv2 = feast_conv.FeaStConv(32, 64, heads=heads, t_inv=t_inv)
        self.conv3 = feast_conv.FeaStConv(64, 128, heads=heads, t_inv=t_inv)
        #self.conv4 = feast_conv.FeaStConv(128, 256, heads=heads, t_inv=t_inv)
        #self.conv5 = feast_conv.FeaStConv(256, 512, heads=heads, t_inv=t_inv)
        #self.conv6 = feast_conv.FeaStConv(512, 256, heads=heads, t_inv=t_inv)
        #self.conv7 = feast_conv.FeaStConv(256, 128, heads=heads, t_inv=t_inv)
        self.fc1 = nn.Linear(128, 64)
        self.fc2 = nn.Linear(64, 1)

        self.reset_parameters()

    def reset_parameters(self):
        self.conv1.reset_parameters()
        self.conv2.reset_parameters()
        self.conv3.reset_parameters()
        #self.conv4.reset_parameters()
        #self.conv5.reset_parameters()
        #self.conv6.reset_parameters()
        #self.conv7.reset_parameters()

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.fc0(x))
        #x = self.pool(x)
        x = F.relu(self.conv1(x, edge_index))
        x = F.relu(self.conv2(x, edge_index))
        x = F.relu(self.conv3(x, edge_index))
        #x = F.elu(self.conv4(x, edge_index))
        #x = F.elu(self.conv5(x, edge_index))
        #x = F.elu(self.conv6(x, edge_index))
        #x = F.elu(self.conv7(x, edge_index))
        x = F.relu(self.fc1(x))
        #x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        #F.log_softmax(x, dijm=1)
        x = torch.squeeze(x, dim=1)
        return x




In [178]:
import time
import torch
import torch.nn.functional as F


def print_info(info):
    message = ('Epoch: {}/{}, Duration: {:.3f}s,'
               'Train Loss: {:.4f}, Test Loss:{:.4f}').format(
                   info['current_epoch'], info['epochs'], info['t_duration'],
                   info['train_loss'], info['test_loss'])
    print(message)


def run(model, train_loader, test_loader, num_nodes, epochs, optimizer, device):

    for epoch in range(1, epochs + 1):
        t = time.time()
        train_loss = train(model, train_loader, optimizer, device)
        t_duration = time.time() - t
        test_loss = test(model, test_loader, num_nodes, device)
        eval_info = {
            'train_loss': train_loss,
            'test_loss': test_loss,
            'current_epoch': epoch,
            'epochs': epochs,
            't_duration': t_duration
        }

        print_info(eval_info)

"""
def train(model, train_loader, optimizer, device):
    model.train()

    total_error_percentage = 0
    for idx, data in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data.to(device))
        errors = torch.abs(output - data.y.to(device))  # Compute absolute errors between output and label tensors
        #print('data.y: ', data.y)
        #print('output: ', output)
        #print('errors:', errors)
        error_percentage = (errors / data.y.to(device))   # Compute error percentage
        #print('error_percentage: ', error_percentage)
        mean_error_percentage = error_percentage.mean()  # Compute mean error percentage for backpropagation
        #print('mean error percentage: ', mean_error_percentage)
        mean_error_percentage.backward(retain_graph=True)
        optimizer.step()
        #print('mean error percentage item: ', mean_error_percentage.item())
        total_error_percentage += mean_error_percentage.item()
    return total_error_percentage / len(train_loader)
"""

def train(model, train_loader, optimizer, device):
    model.train()

    total_loss = 0
    for idx, data in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data.to(device))
        losses = F.l1_loss(output, data.y.to(device), reduction='none')  # Calculate MSE loss for each item in output and labels
        loss = losses.mean()  # Compute mean loss for backpropagation
        loss.backward(retain_graph=True)
        #loss = model.compute_loss(output, data.y)  # compute loss with L1 regularization
        #loss = F.nll_loss(log_probs, data.y)
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(train_loader)

"""
def train(model, train_loader, optimizer, device):
    model.train()

    total_loss = 0
    for idx, data in enumerate(train_loader):
        optimizer.zero_grad()
        print(model(data.to(device)).dtype)
        print(data.y.to(device).dtype)
        target = data.y.to(device)
        loss = F.nll_loss(model(data.to(device)), target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(train_loader)
"""
def test(model, test_loader, num_nodes, device):
    model.eval()
    correct = 0
    total_loss = 0
    n_graphs = 0
    with torch.no_grad():
        for idx, data in enumerate(test_loader):
            out = model(data.to(device))
            #total_loss += F.nll_loss(out, data.y.to(device)).item()
            total_loss += F.l1_loss(out, data.y.to(device)).item()
            #pred = out.max(1)[1]
            #correct += pred.eq(data.y).sum().item()
            #n_graphs += data.num_graphs
    return total_loss / len(test_loader)

In [179]:
#runner
num_nodes = train_loader[0].x.shape[0]
num_features = train_loader[0].x.shape[1]

model = ArchNN(num_features, num_nodes, heads=8).to(device_name)

optimizer = optim.Adam(model.parameters(),
                       lr=0.0001,
                       weight_decay=0.001)


run(model, train_loader, test_loader, num_nodes, 100, optimizer, device_name)

#versj defined at the top of the notebook
modelPath = today + "ver" + str(versj) +".pt"
torch.save(model, modelPath)
versj += 1
print("saved as: ", modelPath)

Epoch: 1/100, Duration: 15.716s,Train Loss: 1.1111, Test Loss:0.8901
Epoch: 2/100, Duration: 15.949s,Train Loss: 0.8760, Test Loss:0.8029
Epoch: 3/100, Duration: 18.347s,Train Loss: 0.8130, Test Loss:0.7881
Epoch: 4/100, Duration: 17.506s,Train Loss: 0.7616, Test Loss:0.7495
Epoch: 5/100, Duration: 19.176s,Train Loss: 0.7155, Test Loss:0.7145
Epoch: 6/100, Duration: 17.883s,Train Loss: 0.6616, Test Loss:0.5977
Epoch: 7/100, Duration: 17.748s,Train Loss: 0.6119, Test Loss:0.5602
Epoch: 8/100, Duration: 19.468s,Train Loss: 0.5692, Test Loss:0.5171
Epoch: 9/100, Duration: 19.325s,Train Loss: 0.5343, Test Loss:0.5159
Epoch: 10/100, Duration: 21.820s,Train Loss: 0.5162, Test Loss:0.5197
Epoch: 11/100, Duration: 17.960s,Train Loss: 0.4983, Test Loss:0.4750
Epoch: 12/100, Duration: 18.316s,Train Loss: 0.4793, Test Loss:0.4467
Epoch: 13/100, Duration: 17.065s,Train Loss: 0.4584, Test Loss:0.4147
Epoch: 14/100, Duration: 18.943s,Train Loss: 0.4408, Test Loss:0.4015
Epoch: 15/100, Duration: 18.4

In [186]:
model.eval()
print(model(datasetTest[200]))
print(datasetTest[200].y)

tensor([-1.7426,  1.3069,  1.1510,  0.7372,  0.3140, -0.1762, -0.5728, -0.9201,
        -1.2023, -1.3430, -1.3277, -1.2229, -0.9854, -0.6332, -0.1925,  0.2394,
         0.7380,  1.1224,  1.4310,  1.4865], grad_fn=<SqueezeBackward1>)
tensor([-1.4437,  1.3822,  1.1294,  0.7483,  0.2987, -0.1656, -0.5972, -0.9567,
        -1.2136, -1.3473, -1.3473, -1.2136, -0.9567, -0.5972, -0.1656,  0.2987,
         0.7483,  1.1294,  1.3822,  1.4437])


In [52]:
#versj defined at the top of the notebook
"""modelPath = today + "ver" + str(versj) +".pt"
torch.save(model, modelPath)
versj += 1"""

In [180]:
#load model

model1 = torch.load("Apr-19ver3.pt")
model2 = torch.load("Apr-19ver4.pt")
model3 = torch.load("Apr-19ver6.pt")
model4 = torch.load("Apr-20ver1.pt")

In [196]:
#Test for new dataset
def testFinal(model, indata, device, option):
    model.eval()
    title = ''
    E_A = 0
    E_A_Ad = 0
    with torch.no_grad():
        for id, data in enumerate(indata):
          out = model(data.to(device))
          real = data.y.to(device)
          errorList = []
          error = 0
          for i in range(len(out)):
            e = ((out[i].item() - real[i].item() ) / out[i].item())*100
            errorList.append(e)

          errorListAbs = [abs(x) for x in errorList]

          if (option == 1):
            errorListAbs.sort()
            errorListAbsAd = errorListAbs[:-2]
            title = 'After removing the two worst errors'
          if (option == 2):
            errorListAbsAd = errorListAbs[8:11]
            title = 'Average adjusted total error for nodes 8, 9, 10'
          if (option == 3):
            errorListAbsAd = errorListAbs[7:13]
            title = 'Average adjusted total error for nodes 7, 8, 9, 10, 11, 12'


          E_A_el = sum(errorListAbs)/len(errorListAbs)
          E_A_Ad_el = sum(errorListAbsAd)/len(errorListAbsAd)

          E_A += E_A_el
          E_A_Ad += E_A_Ad_el
          
          print([round(x,2) for x in errorList])
          print('Avarage Error: '+ str(round(E_A_el,2))+'%, Adjusted Avarage Error: '+ str(round(E_A_Ad_el,2))+'%')
          print('Mean Square: '+ str(round(F.mse_loss(out, data.y.to(device)).item(),2))+'\n')

          #errorAvg += sum(errorListAbs)/len(errorListAbs)
    print(title)
    print('Avarage Tot. Error: '+ str(round(E_A/len(indata),2))+'%, Avarage Adjusted Tot. Error: '+ str(round(E_A_Ad/len(indata),2))+'%')

#Load test data set, if this is not done from beginning!
#datasetTest = get_data("/content/drive/MyDrive/datasets/data1test.txt")

#Run testFinal
testFinal(model4, datasetTest,  device_name, 1)


[4.58, 1.25, 6.1, 9.65, 0.9, 14.76, 1.36, -0.02, 0.07, 3.3, 3.1, 3.34, 2.47, 5.61, 7.32, -10.66, -0.51, 3.4, 4.01, 4.56]
Avarage Error: 4.35%, Adjusted Avarage Error: 3.42%
Mean Square: 0.0

[-17.23, 2.66, 5.16, 2.06, 9.79, 7.18, -17.18, 0.54, 4.56, 2.81, 3.5, 0.14, 3.39, 12.27, -10.75, -2.55, -0.66, 5.56, 9.35, -17.57]
Avarage Error: 6.75%, Adjusted Avarage Error: 5.56%
Mean Square: 0.0

[2.3, 23.61, 6.66, 4.0, 3.58, 3.55, -0.92, -1.02, 0.04, 2.02, 1.53, 0.29, 0.51, 1.58, 0.34, -1.97, -0.76, 0.5, -12.59, 1.91]
Avarage Error: 3.48%, Adjusted Avarage Error: 1.86%
Mean Square: 0.0

[-2.47, 17.49, -2.62, 1.04, -0.41, -0.97, 6.31, 0.92, 1.18, -1.12, 0.76, 2.42, 5.05, -7.04, -1.88, 1.86, 1.82, 4.25, 0.5, -0.66]
Avarage Error: 3.04%, Adjusted Avarage Error: 2.01%
Mean Square: 0.0

[-0.89, 10.8, 4.8, 4.73, 5.14, 5.02, 13.84, 2.34, -0.37, -1.3, 1.31, 1.36, 1.78, -1.9, 1.61, 2.19, 1.63, 4.1, -10.46, 0.65]
Avarage Error: 3.81%, Adjusted Avarage Error: 2.87%
Mean Square: 0.0

[-1.27, 14.07, -3.26

In [195]:
# printing parameters

def count_parameters(model):
    """Count the number of trainable parameters in a PyTorch model."""
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


# Print the number of parameters
print("Number of parameters in the model: {}".format(count_parameters(model4)))

Number of parameters in the model: 95545


In [187]:
#Test for new dataline

def testData(trainedModel, testset, line):
    trainedModel.eval()
    real = testset[line].y
    
    out = trainedModel(testset[line].to(device_name))
    Bool = ''
    print('{:<10}{:<10}{:<10}{:<10}'.format('real', 'out', '%error', 'Sign'))
    for i in range(len(out)):
        error = ((out[i].item() - real[i].item())/ out[i] )    *100
        if torch.sign(real[i]) == torch.sign(out[i]):
            Bool = 'True'
        else:
            Bool = 'False'
        real_formatted = '{:<10.3f}'.format(round(real[i].item(),5))
        out_formatted = '{:<10.3f}'.format(round(out[i].item(),5))
        error_formatted = '{:<10.2f}'.format(error)
        list_formatted = [real_formatted, out_formatted, error_formatted, Bool]
        print('{:<10}{:<10}{:<10}{:<10}'.format(*list_formatted))

#Test
testData(model4, datasetTest, 30)


real      out       %error    Sign      
-3.450    -3.584    3.75      True      
2.534     2.529     -0.18     True      
1.654     1.702     2.78      True      
0.834     0.839     0.59      True      
0.093     0.080     -15.10    True      
-0.553    -0.572    3.37      True      
-1.088    -1.081    -0.60     True      
-1.499    -1.474    -1.71     True      
-1.779    -1.757    -1.26     True      
-1.920    -1.923    0.11      True      
-1.920    -1.948    1.39      True      
-1.779    -1.813    1.89      True      
-1.499    -1.516    1.12      True      
-1.088    -1.084    -0.30     True      
-0.553    -0.567    2.46      True      
0.093     0.007     -1241.44  True      
0.834     0.764     -9.15     True      
1.654     1.680     1.55      True      
2.534     2.602     2.62      True      
3.450     3.525     2.13      True      


In [25]:
model_tester_data = "testdata.txt"
model_tester_dataset = create_dataset(model_tester_data)

print(model_tester_dataset[0].y)
tester = model(model_tester_dataset[0].to(device_name))

print(tester)

print(model_tester_dataset[1].y)
tester2 = model(model_tester_dataset[1].to(device_name))
print(tester2)

tensor([ 1.3550,  0.1630,  0.9600,  1.1790,  0.9760,  0.5080, -0.0740, -0.6390,
        -1.0780, -1.3180, -1.3180, -1.0780, -0.6390, -0.0740,  0.5080,  0.9760,
         1.1790,  0.9600,  0.1630, -1.3550])
tensor([ 1.4588,  0.0168,  0.9337,  1.0978,  0.8159,  0.3859, -0.1911, -0.7819,
        -1.2410, -1.4660, -1.4306, -1.0969, -0.5489,  0.1082,  0.5618,  0.9636,
         1.3206,  0.9643,  0.0294, -1.8978], grad_fn=<SqueezeBackward1>)
tensor([ 1.6200,  0.1130,  1.0340,  1.3030,  1.0950,  0.5870, -0.0530, -0.6770,
        -1.1640, -1.4290, -1.4290, -1.1640, -0.6770, -0.0530,  0.5870,  1.0950,
         1.3030,  1.0340,  0.1130, -1.6200])
tensor([ 1.7416, -0.0129,  1.0249,  1.2137,  0.9230,  0.4418, -0.1897, -0.8252,
        -1.3337, -1.5737, -1.4708, -1.0195, -0.3363,  0.4624,  0.9845,  1.3977,
         1.5277,  1.0012, -0.6278, -2.5273], grad_fn=<SqueezeBackward1>)
