# LPBF Demo
This notebook is an example of training a stgnn on a LPBF dataset. Here we will show all details of how to build the model and load data from the csv of LPBF dataset.
Before running this notebook, make sure the package is installed in your system by running 
`pip install -e .` from the base directory of this repository.

In [1]:
import pyarrow.parquet as pq
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import copy
import random
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import Sequential, GCNConv
from torch_geometric.utils import to_undirected
from sklearn.model_selection import train_test_split

import seaborn as sns




Genrated class of `STConvAE` model, which include 2 `TemporalConv` layers and one normal spatial convolution layer (GCN).

In [2]:
class TemporalConv(nn.Module):
    """
    Args:
        in_channels (int): Number of input features.
        out_channels (int): Number of output features.
        kernel_size (int): Convolutional kernel size.
    """

    def __init__(self, in_channels: int, out_channels: int, kernel_size, stride: int, padding: int):
        super(TemporalConv, self).__init__()
        self.conv_1 = nn.Conv2d(in_channels, out_channels, (1, kernel_size), (1, stride), (0, padding))
        self.conv_2 = nn.Conv2d(in_channels, out_channels, (1, kernel_size), (1, stride), (0, padding))
        self.conv_3 = nn.Conv2d(in_channels, out_channels, (1, kernel_size), (1, stride), (0, padding))

    def forward(self, X: torch.FloatTensor) -> torch.FloatTensor:
        """Forward pass through temporal convolution block.

        Arg types:
            * **X** (torch.FloatTensor) -  Input data of shape
                (batch_size, input_time_steps, num_nodes, in_channels).

        Return types:
            * **H** (torch.FloatTensor) - Output data of shape
                (batch_size, in_channels, num_nodes, input_time_steps).
        """
        X = X.permute(0, 3, 2, 1)
        P = self.conv_1(X)
        Q = torch.sigmoid(self.conv_2(X))
        PQ = P * Q
        H = F.relu(PQ + self.conv_3(X))
        H = H.permute(0, 3, 2, 1)
        return H

class STConvEncoder(nn.Module):

    def __init__(
        self,
        num_nodes: int,
        in_channels: int,
        hidden_channels: int,
        out_channels: int,
        kernel_size: int,
        stride: int,
        padding: int,
        K: int,
        normalization: str = "sym",
        bias: bool = True,
    ):
        super(STConvEncoder, self).__init__()
        self.num_nodes = num_nodes
        self.in_channels = in_channels
        self.hidden_channels = hidden_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.K = K
        self.normalization = normalization
        self.bias = bias

        self._temporal_conv1 = TemporalConv(
            in_channels=in_channels,
            out_channels=hidden_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
        )

        self._graph_conv = GCNConv(
            in_channels=hidden_channels,
            out_channels=hidden_channels,
            bias=bias,
        )

        self._temporal_conv2 = TemporalConv(
            in_channels=hidden_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
        )

        

    def forward(self, X: torch.FloatTensor, edge_index: torch.LongTensor, edge_weight: torch.FloatTensor = None) -> torch.FloatTensor:

        r"""Forward pass. If edge weights are not present the forward pass
        defaults to an unweighted graph.

        Arg types:
            * **X** (PyTorch FloatTensor) - Sequence of node features of shape (Batch size X Input time steps X Num nodes X In channels).
            * **edge_index** (PyTorch LongTensor) - Graph edge indices.
            * **edge_weight** (PyTorch LongTensor, optional)- Edge weight vector.

        Return types:
            * **T** (PyTorch FloatTensor) - Sequence of node features.
        """
      
        T_0 = self._temporal_conv1(X)
        T = torch.zeros_like(T_0).to(T_0.device)
        for b in range(T_0.size(0)):
            for t in range(T_0.size(1)):
                T[b][t] = self._graph_conv(T_0[b][t], edge_index)  # Assuming edge_weight not used

        T = F.relu(T)
        
        T = self._temporal_conv2(T)

        #######

        #T_mean = T.mean(dim=1, keepdim=True)
        #predictions = T_mean.view(-1, self.out_channels * self.num_nodes)    

    

        #return T_mean,predictions
        return T


Load data from the csv here.

In [4]:
# Specify the path to your Parquet file
parquet_file_path_width = '/mnt/vstor/CSE_MSE_RXF131/cradle-members/mds3/kjh125/radiography-datatable/111417_width_measure_interval.parquet'

# Read the Parquet file into a PyArrow Table
table1 = pq.read_table(parquet_file_path_width)

# Convert the Table to a Pandas DataFrame (optional)
df1 = table1.to_pandas()
df1 = df1.dropna(subset=['distance'])

# create a dictionary called sanu_index to map each unique 'sanu' value to a unique index
sanu_index_1 = {sanu: i for i, sanu in enumerate(df1['sanu'].unique())}

Exact node features and labels

In [6]:
node_features = []
node_labels_1 = []
node_labels_2 = []
for sanu, group in df1.groupby('sanu'):
    distances = group['distance'].values
    sasp_value = group['sasp'].iloc[0]  # Extract 'sasp' for this 'sanu'
    sapw_value = group['sapw'].iloc[0]  # Extract 'sapw' for this 'sanu'
    labels = group['complete'].iloc[0]
    labels_2 = group['pore_obs'].iloc[0]
    if len(distances) == 40:
        # Remove first 3 and last 3 features
        trimmed_distances = distances[3:-3]
        # Add 'sasp' and 'sapw' to each node's feature vector
        node_feature_vector = [sanu] + list(trimmed_distances) + [sasp_value, sapw_value]
        #node_feature_vector = [sanu] + list(trimmed_distances) 
        node_features.append(node_feature_vector)
        node_labels_1.append(labels)
        node_labels_2.append(labels_2) 

Splite training, testing and validation set.

In [29]:
#splite dataset
def split_array(data):
    # Shuffle the array randomly
    arr = copy.deepcopy(data)
    random.seed(42)
    random.shuffle(arr)
    
    # Determine the length of each part
    total_length = len(arr)
    part_length_1 = int(total_length * 0.8)  # 80% of the total length
    part_length_2 = total_length - part_length_1
    
    # Initialize parts
    parts = [[], []]
    
    # Distribute elements into parts
    for i, elem in enumerate(arr):
        if len(parts[0]) < part_length_1:
            parts[0].append(elem)
        else:
            parts[1].append(elem)
    
    return parts[0], parts[1]

train_features, test_features  = split_array(node_features)
train_features, valid_features = split_array(train_features)

train_labels_1, test_labels_1 = split_array(node_labels_1)
train_labels_1, valid_label_1 = split_array(train_labels_1)

train_labels_2, test_labels_2 = split_array(node_labels_2)
train_labels_2, valid_label_2 = split_array(train_labels_2)

Compute energy density between two nodes among all nodes, there will be an edge between two node if the neergy density less than the `ratio_threshold`. 

In [33]:
ratio_threshold = 0.3

train_edge_index = []
test_edge_index = []
valid_edge_index = []

node_features = train_features
for i in range(len(node_features)):
    for j in range(i + 1, len(node_features)):
        sasp1, sapw1 = node_features[i][-2], node_features[i][-1]  # 'sasp' and 'sapw' values for node i
        egy_den1 = sapw1/sasp1
        sasp2, sapw2 = node_features[j][-2], node_features[j][-1]  # 'sasp' and 'sapw' values for node j
        egy_den2 = sapw2/sasp2

        if abs(egy_den1 - egy_den2) < ratio_threshold:
            train_edge_index.append([i, j])

train_edge_index = to_undirected(torch.tensor(train_edge_index).t())

node_features = valid_features
for i in range(len(node_features)):
    for j in range(i + 1, len(node_features)):
        sasp1, sapw1 = node_features[i][-2], node_features[i][-1]  # 'sasp' and 'sapw' values for node i
        egy_den1 = sapw1/sasp1
        sasp2, sapw2 = node_features[j][-2], node_features[j][-1]  # 'sasp' and 'sapw' values for node j
        egy_den2 = sapw2/sasp2

        if abs(egy_den1 - egy_den2) < ratio_threshold:
            valid_edge_index.append([i, j])

valid_edge_index  = to_undirected(torch.tensor(valid_edge_index).t())

node_features = test_features
for i in range(len(node_features)):
    for j in range(i + 1, len(node_features)):
        sasp1, sapw1 = node_features[i][-2], node_features[i][-1]  # 'sasp' and 'sapw' values for node i
        egy_den1 = sapw1/sasp1
        sasp2, sapw2 = node_features[j][-2], node_features[j][-1]  # 'sasp' and 'sapw' values for node j
        egy_den2 = sapw2/sasp2

        if abs(egy_den1 - egy_den2) < ratio_threshold:
            test_edge_index.append([i, j])

test_edge_index  = to_undirected(torch.tensor(test_edge_index).t())

#len(test_edge_index [0])
        

Modify the shape of dataset that can be feed them into the model i.e. shape: (time step, number of nodes, number of features)

In [34]:
for i in test_features:
    del i[0]
    density = i[-1]/i[-2]
    i.append(density)
    i.append(0)

temp = []
for data in test_features:

    last_four = data[-4:]

    # Create sublists with first 34 elements split individually
    split_data = [[data[i]] for i in range(34)]

    # Append the last four consistent elements to each sublist
    for sublist in split_data:
        sublist.extend(last_four)
    temp.append(split_data)

test_features= torch.tensor(temp, dtype=torch.float)
test_features= test_features.permute(1,0,2)
test_features= test_features.unsqueeze(0)

test_features.shape


torch.Size([1, 34, 86, 5])

In [35]:
for i in valid_features:
    del i[0]
    density = i[-1]/i[-2]
    i.append(density)
    i.append(0)

temp = []
for data in valid_features:

    last_four = data[-4:]

    # Create sublists with first 34 elements split individually
    split_data = [[data[i]] for i in range(34)]

    # Append the last four consistent elements to each sublist
    for sublist in split_data:
        sublist.extend(last_four)
    temp.append(split_data)

valid_features= torch.tensor(temp, dtype=torch.float)
valid_features= valid_features.permute(1,0,2)
valid_features= valid_features.unsqueeze(0)

valid_features.shape


torch.Size([1, 34, 69, 5])

In [36]:
for i in train_features:
    # Get elements at index 35 and 36
    del i[0]
    density = i[-1]/i[-2]
    i.append(density)
    i.append(0)

temp = []
for data in train_features:

    last_four = data[-4:]

    # Create sublists with first 34 elements split individually
    split_data = [[data[i]] for i in range(34)]

    # Append the last four consistent elements to each sublist
    for sublist in split_data:
        sublist.extend(last_four)
    temp.append(split_data)

train_features = torch.tensor(temp, dtype=torch.float)
train_features = train_features.permute(1,0,2)
train_features= train_features.unsqueeze(0)

train_features.shape


torch.Size([1, 34, 273, 5])

Transfer node features into tensor

In [37]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

train_labels_1 = torch.tensor(train_labels_1, dtype=torch.long)  # Assuming node labels are integers, for complete
valid_label_1 = torch.tensor(valid_label_1, dtype=torch.long)
test_labels_1 = torch.tensor(test_labels_1, dtype=torch.long) 

train_labels_2 = torch.tensor(train_labels_2, dtype=torch.long)  #  for pore_obs
valid_label_2 = torch.tensor(valid_label_2, dtype=torch.long)
test_labels_2 = torch.tensor(test_labels_2, dtype=torch.long) 

train_data = Data(x=train_features, edge_index=train_edge_index, y=train_labels_1)
valid_data = Data(x=valid_features, edge_index=valid_edge_index, y=valid_label_1)
test_data = Data(x=test_features, edge_index=test_edge_index, y=test_labels_1)


Training the model, set learning rate and epoch and number of node for training

In [67]:
epoch = 200
learnR = 0.0001

num_nodes = 273
#num_edges = 20
num_features = 5
num_time_steps = 34

# Instantiate and run STConvEncoder
model = STConvEncoder(
    num_nodes=num_nodes,
    in_channels=num_features,
    hidden_channels=16,
    out_channels=2,
    kernel_size=3,
    stride=7,
    padding=1,
    K=2
)

#print("Input shape:", node_features.shape)
#print("Output shape:", output.shape)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learnR)

# Training loop
def train(model, data, val_data, optimizer, criterion, epochs):
    model.train()
    history = {
        'train_losses': [],
        'val_losses': [],
        'train_accuracies': [],
        'val_accuracies': []
    }
    for epoch in range(epochs):
        optimizer.zero_grad()
        output = model(data.x, data.edge_index)
        #print("Output shape:", output.shape)
        pred = torch.argmax(output.squeeze(), dim=1) 
        correct = (pred == data.y).sum().item()
        epoch_accuracy = 100 * correct / len(data.y)
        history['train_accuracies'].append(epoch_accuracy)
            
        loss = criterion(output.squeeze(), data.y.squeeze())
        history['train_losses'].append(loss.item())
     
        loss.backward()
        optimizer.step()
        #print(epoch_accuracy)
        #print(f'Epoch {epoch+1}/{epochs}, Loss: {loss}')

        model.eval()
        with torch.no_grad():
            val_output = model(val_data.x, val_data.edge_index)
            val_pred = torch.argmax(val_output.squeeze(), dim=1)
            val_correct = (val_pred == val_data.y).sum().item()
            val_accuracy = 100 * val_correct / len(val_data.y)
            history['val_accuracies'].append(val_accuracy)
            
            val_loss = criterion(val_output.squeeze(), val_data.y.squeeze())
            history['val_losses'].append(val_loss.item())

        print(f'Epoch {epoch+1}/{epochs}, Train Loss: {loss.item()}, Train Accuracy: {epoch_accuracy}%, Val Loss: {val_loss.item()}, Val Accuracy: {val_accuracy}%')

    return history

# Train the model
his2 = train(model, train_data, valid_data, optimizer, criterion, epochs=epoch)

Epoch 1/200, Train Loss: 8.583873748779297, Train Accuracy: 22.71062271062271%, Val Loss: 8.431390762329102, Val Accuracy: 18.840579710144926%
Epoch 2/200, Train Loss: 8.306279182434082, Train Accuracy: 22.71062271062271%, Val Loss: 8.13394546508789, Val Accuracy: 18.840579710144926%
Epoch 3/200, Train Loss: 8.030369758605957, Train Accuracy: 22.71062271062271%, Val Loss: 7.8359904289245605, Val Accuracy: 18.840579710144926%
Epoch 4/200, Train Loss: 7.755929946899414, Train Accuracy: 22.71062271062271%, Val Loss: 7.537489891052246, Val Accuracy: 18.840579710144926%
Epoch 5/200, Train Loss: 7.483226776123047, Train Accuracy: 22.71062271062271%, Val Loss: 7.239008903503418, Val Accuracy: 18.840579710144926%
Epoch 6/200, Train Loss: 7.212098121643066, Train Accuracy: 22.71062271062271%, Val Loss: 6.941434383392334, Val Accuracy: 18.840579710144926%
Epoch 7/200, Train Loss: 6.9424729347229, Train Accuracy: 22.71062271062271%, Val Loss: 6.645351409912109, Val Accuracy: 18.840579710144926%
E

Save training history

In [None]:
his1 = history
epochs_range = range(1, len(his1['train_losses']) + 1)

data1 = {
    'Epoch': list(epochs_range) * 2,
    'Loss': his1['train_losses'] + his1['val_losses'],
    'Type': ['Train'] * len(his1['train_losses']) + ['Validation'] * len(his1['val_losses'])
}

Evaluate model in testing set

In [81]:
model.eval()
 
with torch.no_grad():
    
    output = model(test_data.x, test_data.edge_index)
    pred = torch.argmax(output.squeeze(), dim=1) 
    correct = (pred == test_data.y).sum().item()
    accuracy = 100 * correct / len(test_data.y)
    test_loss = criterion(output.squeeze(), test_data.y.squeeze())

    print(f'test set loss: {test_loss:.4f}, accuary : {accuracy}' )

test set loss: 0.6522, accuary : 80.23255813953489


Saving and loading model

In [80]:
#torch.save(model.state_dict(), 'path')


model = STConvEncoder(
    num_nodes=num_nodes,
    in_channels=num_features,
    hidden_channels=16,
    out_channels=2,
    kernel_size=3,
    stride=7,
    padding=1,
    K=2
)
model.load_state_dict('path')


<All keys matched successfully>