In [None]:
from __future__ import print_function
import numpy as np
import torch
from torch.utils.data import Dataset
from scipy import sparse
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

class GraphDataSet(Dataset):
    def __init__(self, num_data, graph_seq):
        max_node = 300
        num_features = 5
        for i in range(num_data):
            ind = graph_seq[i]
            # load files
            file_paths = ['data/structure-{}/neighbor.txt'.format(ind), 'data/structure-{}/feature.txt'.format(ind),
                          'data/structure-{}/property.txt'.format(ind)]

            graph_elements = [np.loadtxt(file_paths[0]), np.loadtxt(file_paths[1]), np.loadtxt(file_paths[2])]

            # feature data manipulation
            graph_elements[1] = manipulate_feature(graph_elements[1], max_node, num_features)

            # normalize the adjacency matrix
            graph_elements[0] = normalize_adj(graph_elements[0], max_node)

            # delete data points with negative properties
            graph_elements[2] = graph_elements[2][graph_elements[2].min(axis=1) >= 0, :]
            # get the dimension of proprty
            num_properties, width = np.shape(graph_elements[2])
            # independent variable t, the external field
            t = np.delete(graph_elements[2], 1, axis=1)
            # label, the magnetostriction
            label = np.delete(graph_elements[2], 0, axis=1)

            # change it to the several data points
            multiple_neighbor, multiple_feature = [graph_elements[0] for x in range(num_properties)], \
                                                  [graph_elements[1] for x in range(num_properties)]

                # concatenating the matrices
            if i == 0:
                adjacency_matrix, node_attr_matrix, t_matrix, label_matrix = multiple_neighbor, multiple_feature, t, label
            else:
                adjacency_matrix, node_attr_matrix, t_matrix, label_matrix = np.concatenate((adjacency_matrix, multiple_neighbor)), \
                                                                             np.concatenate((node_attr_matrix, multiple_feature)), \
                                                                             np.concatenate((t_matrix, t)),\
                                                                             np.concatenate((label_matrix, label))

        # normalize the independent variable t matrix
        t_matrix, label_matrix = normalize_t_label(t_matrix, label_matrix)

        self.adjacency_matrix = np.array(adjacency_matrix)
        self.node_attr_matrix = np.array(node_attr_matrix)
        self.t_matrix = np.array(t_matrix)
        self.label_matrix = np.array(label_matrix)

        print('--------------------')
        print('Training Data:')
        print('adjacency matrix:\t', self.adjacency_matrix.shape)
        print('node attribute matrix:\t', self.node_attr_matrix.shape)
        print('t matrix:\t\t', self.t_matrix.shape)
        print('label name:\t\t', self.label_matrix.shape)
        print('--------------------')

    def __len__(self):
        return len(self.adjacency_matrix)

    def __getitem__(self, idx):
        adjacency_matrix = self.adjacency_matrix[idx].todense()
        node_attr_matrix = self.node_attr_matrix[idx].todense()
        t_matrix = self.t_matrix[idx]
        label_matrix = self.label_matrix[idx]

        adjacency_matrix = torch.from_numpy(adjacency_matrix)
        node_attr_matrix = torch.from_numpy(node_attr_matrix)
        t_matrix = torch.from_numpy(t_matrix)
        label_matrix = torch.from_numpy(label_matrix)
        return adjacency_matrix, node_attr_matrix, t_matrix, label_matrix

def normalize_adj(neighbor, max_node):
    np.fill_diagonal(neighbor, 1)  # add the identity matrix
    D = np.sum(neighbor, axis=0)  # calculate the diagnoal element of D
    D_inv = np.diag(np.power(D, -0.5))  # construct D
    neighbor = np.matmul(D_inv, np.matmul(neighbor, D_inv))  # symmetric normalization of adjacency matrix

    # match dimension to the max dimension for neighbors
    result = np.zeros((max_node, max_node))
    result[:neighbor.shape[0], :neighbor.shape[1]] = neighbor
    neighbor = result

    # convert the feature matrix to sparse matrix
    neighbor = sparse.csr_matrix(neighbor)

    return neighbor

def manipulate_feature(feature, max_node, features):
    feature = np.delete(feature, 0, axis=1)  # remove the first column (Grain ID)
    feature[:, [3]] = (feature[:, [3]] - np.mean(feature[:, [3]])) / np.std(
        feature[:, [3]])  # normalize grain size
    feature[:, [4]] = (feature[:, [4]] - np.mean(feature[:, [4]])) / np.std(
        feature[:, [4]])  # normalize number of neighbors

    # match dimension to the max dimension for features
    result = np.zeros((max_node, features))
    result[:feature.shape[0], :feature.shape[1]] = feature
    feature = result

    # convert the feature matrix to sparse matrix
    feature = sparse.csr_matrix(feature)

    return feature

def normalize_t_label(t_matrix, label_matrix):
    t_matrix = t_matrix / 10000
    label_mean = np.mean(label_matrix)
    label_std = np.std(label_matrix)
    label_matrix = (label_matrix - label_mean) / label_std

    # save the mean and standard deviation of label
    norm = np.array([label_mean, label_std])
    np.savez_compressed('norm.npz', norm=norm)

    return t_matrix, label_matrix


In [None]:
from __future__ import print_function

import argparse
import time
from collections import OrderedDict
import os
import numpy as np
import json
import torch
import torch.nn as nn
import torch.optim as optim
import logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)
from util import *
# from data import *
import sys
import snntorch as snn

class Message_Passing(nn.Module):
    def forward(self, x, adjacency_matrix):
        neighbor_nodes = torch.bmm(adjacency_matrix, x)
        logging.debug('neighbor message\t', neighbor_nodes.size())
        logging.debug('x shape\t', x.size())
        return neighbor_nodes

class GraphModel(nn.Module):
    def __init__(self, max_node_num, atom_attr_dim, latent_dim1, latent_dim2):
        super(GraphModel, self).__init__()

        self.max_node_num = max_node_num
        self.atom_attr_dim = atom_attr_dim
        self.latent_dim1 = latent_dim1
        self.latent_dim2 = latent_dim2

        self.message_passing_0 = Message_Passing()
        self.dense_0 = nn.Linear(self.atom_attr_dim, self.latent_dim2)
        self.activation_0 = nn.Sigmoid()
        self.message_passing_1 = Message_Passing()
        self.dense_1 = nn.Linear(self.latent_dim2, self.latent_dim1)
        self.activation_1 = nn.Sigmoid()

        self.fc1 = nn.Linear(self.max_node_num * self.latent_dim1 + 1, 1024)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(1024, 128)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(128, 1)
        
        # defining the spiking neurons
        
        beta_1 = torch.rand(1024)
        thr_1 = torch.rand(1024)
        self.lif_1 = snn.Leaky(beta=beta_1, learn_beta=True, threshold=thr_1, learn_threshold=True, reset_mechanism='zero')

        beta_2 = torch.rand(128)
        thr_2 = torch.rand(128)
        self.lif_2 = snn.Leaky(beta=beta_2, learn_beta=True, threshold=thr_2, learn_threshold=True, reset_mechanism='zero')
        
        beta_3 = torch.rand(300,40)
        thr_3 = torch.rand(300,40)
        self.lif_3 = snn.Leaky(beta=beta_3, learn_beta=True, threshold=thr_3, learn_threshold=True, reset_mechanism='zero')
        
        beta_4 = torch.rand(300,3)
        thr_4 = torch.rand(300,3)
        self.lif_4 = snn.Leaky(beta=beta_4, learn_beta=True, threshold=thr_4, learn_threshold=True, reset_mechanism='zero')

        return

    def forward(self, node_attr_matrix, adjacency_matrix, t_matrix):
        node_attr_matrix = node_attr_matrix.float()
        adjacency_matrix = adjacency_matrix.float()
        x = node_attr_matrix
        logging.debug('shape\t', x.size())
 
        #defining memory for spiking neurons
        mem_1 = self.lif_1.init_leaky()
        mem_2 = self.lif_2.init_leaky()
        mem_3 = self.lif_3.init_leaky()
        mem_4 = self.lif_4.init_leaky()
        
        
        s1_sum = torch.zeros([1]).to('cuda:0')
        s2_sum = torch.zeros([1]).to('cuda:0')
        s3_sum = torch.zeros([1]).to('cuda:0')
        s4_sum = torch.zeros([1]).to('cuda:0')
        
        
        # Message Passing Layer 1
        x = self.message_passing_0(x, adjacency_matrix)
        x = self.dense_0(x)
#         x = self.activation_0(x)

        spk_in3, mem_3 = self.lif_3(x, mem_3)
        x = spk_in3
        s3_sum[0] += torch.sum(spk_in3)/spk_in3.numel()

        # Message Passing Layer 2
        x = self.message_passing_1(x, adjacency_matrix)
        x = self.dense_1(x)
#         x = self.activation_1(x)

        spk_in4, mem_4 = self.lif_4(x, mem_4)
        x = spk_in4
        s4_sum[0] += torch.sum(spk_in4)/spk_in4.numel()

        # Before flatten, the size should be [Batch size, max_node_num, latent_dim]
        logging.debug('size of x after GNN\t', x.size())
        # After flatten is the graph representation
        x = x.view(x.size()[0], -1)
        logging.debug('size of x after GNN\t', x.size())

        # Concatenate [x, t]
        x = torch.cat((x, t_matrix), 1)

        x = self.fc1(x)


        spk_in1, mem_1 = self.lif_1(x, mem_1)
        x = spk_in1
        s1_sum[0] += torch.sum(spk_in1)/spk_in1.numel()
        x = self.fc2(x)

        spk_in2, mem_2 = self.lif_2(x, mem_2)
        x = spk_in2
        s2_sum[0] += torch.sum(spk_in2)/spk_in2.numel()
        x = self.fc3(x)
        
        return x,s1_sum,s2_sum
    

def train(model, train_data_loader, validation_data_loader, epochs, checkpoint_dir, optimizer, criterion, validation_index, folder_name):
    print()
    print("*** Training started! ***")
    print()
    
    filename='{}/learning_Output_{}.txt'.format(folder_name, validation_index)
    output=open(filename, "w")
    print('Epoch Training_time Training_MSE Validation_MSE',file=output, flush = True)  

    for epoch in range(epochs):
        # print("hi")
        model.train()
        total_macro_loss = []
        total_mse_loss = []

        train_start_time = time.time()

        for batch_id, (adjacency_matrix, node_attr_matrix, t_matrix, label_matrix) in enumerate(train_data_loader):
            # print("yo")
            # print(x)
            adjacency_matrix = tensor_to_variable(adjacency_matrix)
            node_attr_matrix = tensor_to_variable(node_attr_matrix)
            t_matrix = tensor_to_variable(t_matrix)
            label_matrix = tensor_to_variable(label_matrix)

            optimizer.zero_grad()
            y_pred,s1_t,s2_t = model(adjacency_matrix=adjacency_matrix, node_attr_matrix=node_attr_matrix, t_matrix=t_matrix)
            loss = criterion(y_pred, label_matrix)
            total_macro_loss.append(macro_avg_err(y_pred, label_matrix).item())
            total_mse_loss.append((loss.item()))
            loss.backward()
            optimizer.step()

        train_end_time = time.time()
        _, training_loss_epoch, _, _ = test(model, train_data_loader, 'Training', False, criterion, validation_index, folder_name) 
        _, validation_loss_epoch, _, _ = test(model, validation_dataloader, 'Validation', False, criterion, validation_index, folder_name)
        print('%d %.3f %e %e' % (epoch, train_end_time-train_start_time, training_loss_epoch, validation_loss_epoch), file=output,flush=True )

def test(model, data_loader, test_val_tr, printcond, criterion, running_index, folder_name):
    model.eval()
    if data_loader is None:
        return None, None

    y_label_list, y_pred_list, total_loss = [], [], 0

    for batch_id, (adjacency_matrix, node_attr_matrix, t_matrix, label_matrix) in enumerate(data_loader):
        adjacency_matrix = tensor_to_variable(adjacency_matrix)
        node_attr_matrix = tensor_to_variable(node_attr_matrix)
        t_matrix = tensor_to_variable(t_matrix)
        label_matrix = tensor_to_variable(label_matrix)

        y_pred,s1_test,s2_test = model(adjacency_matrix=adjacency_matrix, node_attr_matrix=node_attr_matrix, t_matrix=t_matrix)

        y_label_list.extend(variable_to_numpy(label_matrix))
        y_pred_list.extend(variable_to_numpy(y_pred))

    norm = np.load('norm.npz', allow_pickle=True)['norm']
    label_mean, label_std = norm[0], norm[1]

    y_label_list = np.array(y_label_list) * label_std + label_mean
    y_pred_list = np.array(y_pred_list) * label_std + label_mean

    total_loss = macro_avg_err(y_pred_list, y_label_list)
    total_mse = criterion(torch.from_numpy(y_pred_list), torch.from_numpy(y_label_list)).item()

    length, w = np.shape(y_label_list)
    if printcond:
        filename = '{}/{}_Output_{}.txt'.format(folder_name, test_val_tr, running_index)
        output = open(filename, 'w')
        #print()
        print('{} Set Predictions: '.format(test_val_tr), file = output, flush = True)
        print('True_value Predicted_value', file=output, flush = True)
        for i in range(0, length):
            print('%f, %f' % (y_label_list[i], y_pred_list[i]),file=output,flush = True)

    return total_loss, total_mse, s1_test, s2_test

if __name__ == '__main__':
    
    if 'ipykernel' in sys.modules:
        args = ['--max_node_num','300','--atom_attr_dim','5','--num_graphs','492','--batch_size','32','--min_learning_rate','0','--seed','123','--checkpoint','checkpoints/','--validation_index','0','--testing_index','1','--folds','10','--idx_path','data/indices_and_graphseq.npz','--folder_name','output_test_spk1/','--num_data','492','--hyper','0']
        
    else:
        args = sys.argv[1:]
        
    parser = argparse.ArgumentParser()
    parser.add_argument('--max_node_num', type=int, default=300)
    parser.add_argument('--atom_attr_dim', type=int, default=5)
    parser.add_argument('--num_graphs', type=int, default=492)
    parser.add_argument('--batch_size', type=int, default=32)
    parser.add_argument('--min_learning_rate', type=float, default=0)
    parser.add_argument('--seed', type=int, default=123)
    parser.add_argument('--checkpoint', type=str, default='checkpoints/')
    parser.add_argument('--validation_index', type=int, default=0)
    parser.add_argument('--testing_index', type=int, default=1)
    parser.add_argument('--folds', type=int, default=10)
    parser.add_argument('--idx_path', type=str, default='data/indices_and_graphseq.npz')
    parser.add_argument('--folder_name', type=str, default='output_test_spk1/')
    parser.add_argument('--num_data', type=int, default=492)
    parser.add_argument('--hyper',type=int,default=0)

    given_args = parser.parse_args(args)
    max_node_num = given_args.max_node_num
    atom_attr_dim = given_args.atom_attr_dim
    num_graphs = given_args.num_graphs
    checkpoint_dir = given_args.checkpoint
    validation_index = given_args.validation_index
    testing_index = given_args.testing_index
    idx_path = given_args.idx_path
    folds = given_args.folds
    batch_size = given_args.batch_size
    min_learning_rate = given_args.min_learning_rate
    seed = given_args.seed
    checkpoint_dir = given_args.checkpoint
    folds = given_args.folds
    idx_path = given_args.idx_path
    folder_name = given_args.folder_name
    num_data = given_args.num_data
    hyper=given_args.hyper

    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)

    os.environ['PYTHONHASHargs.seed'] = str(given_args.seed)
    os.environ["CUBLAS_WORKSPACE_CONFIG"]=":4096:8"
    np.random.seed(given_args.seed)
    torch.manual_seed(given_args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(given_args.seed)
        torch.cuda.manual_seed_all(given_args.seed)
    torch.backends.cudnn.deterministic = True
    torch.use_deterministic_algorithms(True)
    
    filename='hyper1/hyper/'+str(hyper)+'.json'
    with open(filename,'r') as h:
        hyperset=json.load(h)
        
    latent_dim1=hyperset['latent_dim1']
    latent_dim2=hyperset['latent_dim2']
    epochs=hyperset['epoch']
    learning_rate=hyperset['lr']
    in_optim=hyperset['optim']

    # Define the model
    model = GraphModel(max_node_num, atom_attr_dim, latent_dim1, latent_dim2)
    if torch.cuda.is_available():
        model.cuda()
        
    if in_optim=="Adam":
        optimizer = optim.Adam(model.parameters(),lr=learning_rate)
    elif in_optim=="RMSprop":
        optimizer = optim.RMSprop(model.parameters(),lr=learning_rate)
    elif in_optim=="SGD":
        optimizer = optim.SGD(model.parameters(),lr=learning_rate)
        
    criterion = nn.MSELoss()
    
    def seed_worker(worker_id):
        '''Seeding for DataLoaders'''
        worker_seed = torch.initial_seed() % 2**32
        np.random.seed(42)
        random.seed(42)

    # Load data
    train_loader = torch.load("Example-2/train_loader.pt",weights_only=False)
    val_loader = torch.load("Example-2/val_loader.pt",weights_only=False)
    test_loader = torch.load("Example-2/test_loader.pt",weights_only=False)
    

    # train the model
    train_start_time = time.time()
    train(model, train_dataloader, validation_dataloader,epochs, checkpoint_dir, optimizer, criterion, validation_index,folder_name)
    train_end_time = time.time()

    torch.save(model, '{}/checkpoint.pth'.format(checkpoint_dir))    
    
    # predictions on the entire training and test datasets
    train_rel, train_mse, sa_t, sb_t= test(model, train_dataloader, 'Training', True, criterion, validation_index, folder_name)
    validation_rel, validation_mse, se_t, sf_t=test(model, validation_dataloader, 'Validation', True, criterion, validation_index, folder_name)
    test_start_time = time.time()
    test_rel, test_mse, s1_test, s2_test= test(model, test_dataloader, 'Test', True, criterion, testing_index, folder_name)
    test_end_time = time.time()

    
    print('--------------------')
    print("validation_index : {}".format(validation_index))
    print("testing_index : {}".format(testing_index))
    print("training_time : {}".format(train_end_time-train_start_time))
    print("testing_time : {}".format(test_end_time-test_start_time))
    print("Train Relative Error: {:.3f}%".format(100 * train_rel))
    print("Validation Relative Error: {:.3f}%".format(100 * validation_rel))
    print("Test Relative Error: {:.3f}%".format(100 * test_rel))
    print("Train MSE : {}".format(train_mse))
    print("Validation MSE : {}".format(validation_mse))
    print("Test MSE: {}".format(test_mse))
    print("Spking Activity for s1: ",s1_test)
    print("Spking Activity for s2: ",s2_test)
    # print("Spking Activity for s3: ",s3_test)
    # print("Spking Activity for s4: ",s4_test)





Plotting Results

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Load the true and predicted values from the output files
def load_predictions(output_file):
    true_values = []
    predicted_values = []
    with open(output_file, 'r') as f:
        next(f)  # Skip the header
        next(f)
        for line in f:
            true_value, predicted_value = map(float, line.strip().split(','))
            true_values.append(true_value)
            predicted_values.append(predicted_value)
    return true_values, predicted_values

# Load the output file for the test set
test_output_file = 'outputs/Test_Output_1.txt'  # Adjust the file path as needed

# Load true and predicted values for the test set
test_true, test_pred = load_predictions(test_output_file)

# Scale the predicted values by 10^6 to convert them to ppm
test_pred = np.array(test_pred) * 10**6
test_true = np.array(test_true) * 10**6  # Scale true values similarly if needed

# Plot true vs predicted values for the test dataset
plt.figure(figsize=(6, 5))

# Scatter plot with outlined circles in a subtle color
plt.scatter(test_true, test_pred, facecolors='none', edgecolors='blue', label='Test')

# Set axis titles with the required math symbols and formatting and larger font size
plt.xlabel(r'$\mathrm{True\ \lambda^{eff}_{xx}\ (ppm)}$', fontsize=16)  # Adjust fontsize here
plt.ylabel(r'$\mathrm{Predicted\ \lambda^{eff}_{xx}\ (ppm)}$', fontsize=16)  # Adjust fontsize here
# plt.title('VSNN-SLF(a)')
plt.legend()

# Fit a least squares line
coeffs = np.polyfit(test_true, test_pred, 1)
least_squares_line = np.polyval(coeffs, test_true)

# Plot the least squares line
plt.plot(test_true, least_squares_line, color='black', linestyle='--', label='Least Squares Line')
plt.legend()

# Set x and y ticks to be 0, 800, 1600
tick_values = [0, 800, 1600]
plt.xticks(tick_values, labels=[f'{x} ppm' for x in tick_values])
plt.yticks(tick_values, labels=[f'{x} ppm' for x in tick_values])

plt.tight_layout()
plt.show()
