# Topology Estimation Model

This notebook contains all the steps of topology model developement. 

In this notebook, the model can be:
- trained and tested
- loaded and run for new data

In [1]:
import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

In [2]:
# This will resolve log_dir relative to main.py
version = 19
# StdoutToTensorBoard(log_dir=f"logs/trials/nri/version_{version}")

print("Starting run from main script...")

Starting run from main script...


## 1. Load dataset

In [3]:
from data.config import DataConfig
from data.load import load_spring_particle_data

data_config = DataConfig()
data_config.set_train_valid_dataset()

# get node and edge dataset path from which data will be loaded
node_ds_paths, edge_ds_paths = data_config.get_dataset_paths()

# load datalaoders
train_loader, valid_loader, test_loader = load_spring_particle_data(node_ds_paths, edge_ds_paths)

#### Set the number of timesteps and dimensions of the node data

In [4]:
dataiter = iter(train_loader)
data = next(dataiter)

n_nodes = data[0].shape[1]
n_timesteps = data[0].shape[2]
n_dims = data[0].shape[3]

print(f"Number of nodes: {n_nodes}")
print(f"Number of timesteps: {n_timesteps}")  
print(f"Number of dimensions: {n_dims}")

Number of nodes: 5
Number of timesteps: 49
Number of dimensions: 4


### Prepare the relation matrix for encoder input

##### Helper functions

In [5]:
import numpy as np
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def encode_onehot(labels):
    classes = set(labels)
    classes_dict = {c: np.identity(len(classes))[i, :] for i, c in
                    enumerate(classes)}
    labels_onehot = np.array(list(map(classes_dict.get, labels)),
                             dtype=np.int32)
    return labels_onehot

##### Generate off-diagonal fully connected graph

In [6]:

# Generate off-diagonal fully connected graph
off_diag = np.ones([5, 5]) - np.eye(5)

rec_rel = np.array(encode_onehot(np.where(off_diag)[0]), dtype=np.float32)
send_rel = np.array(encode_onehot(np.where(off_diag)[1]), dtype=np.float32)
rec_rel = torch.FloatTensor(rec_rel)
send_rel = torch.FloatTensor(send_rel)

rec_rel = rec_rel.to(device)
send_rel = send_rel.to(device)

## 2. Load and prepare the topology estimator model blocks

### Encoder

In [7]:
from topology_estimation.config import TopologyEstimatorConfig
from topology_estimation.encoder_blocks import Encoder
from torchinfo import summary

tp_config = TopologyEstimatorConfig()
tp_config.set_encoder_params()

encoder = Encoder(n_timesteps=n_timesteps, 
                  n_dims=n_dims,
                  pipeline=tp_config.encoder_pipeline, 
                  n_edge_types=tp_config.n_edge_types, 
                  is_residual_connection=tp_config.is_residual_connection,
                  edge_emd_configs=tp_config.edge_emb_configs_enc, 
                  node_emd_configs=tp_config.node_emb_configs_enc, 
                  drop_out_prob=tp_config.dropout_prob_enc,
                  batch_norm=tp_config.batch_norm_enc, 
                  attention_output_size=tp_config.attention_output_size)

encoder.set_input_graph(rec_rel, send_rel)
enocder = encoder.to(device)

# print(summary(encoder, (64, 5, n_timesteps, n_dims)))
print(encoder)

Encoder(
  (emb_fn_dict): ModuleDict(
    (1/node_emd1): MLP(
      (layers): ModuleList(
        (0): Linear(in_features=196, out_features=64, bias=True)
        (1): ReLU()
        (2): Dropout(p=0.0, inplace=False)
        (3): Linear(in_features=64, out_features=32, bias=True)
        (4): ReLU()
        (5): Dropout(p=0.0, inplace=False)
        (6): Linear(in_features=32, out_features=16, bias=True)
        (7): ReLU()
        (8): Dropout(p=0.0, inplace=False)
        (9): Linear(in_features=16, out_features=8, bias=True)
      )
    )
    (1/node_emd2): MLP(
      (layers): ModuleList(
        (0): Linear(in_features=8, out_features=64, bias=True)
        (1): ReLU()
        (2): Dropout(p=0.0, inplace=False)
        (3): Linear(in_features=64, out_features=32, bias=True)
        (4): ReLU()
        (5): Dropout(p=0.0, inplace=False)
        (6): Linear(in_features=32, out_features=16, bias=True)
        (7): ReLU()
        (8): Dropout(p=0.0, inplace=False)
        (9): Linear

### Decoder

In [8]:
from topology_estimation.decoder_blocks import Decoder

tp_config.set_decoder_params()

decoder = Decoder(n_dim=n_dims,
                  msg_out_size=tp_config.msg_out_size,
                  n_edge_types=tp_config.n_edge_types,
                  skip_first=tp_config.skip_first_edge_type,
                  edge_mlp_config=tp_config.edge_mlp_config_dec,
                  recurrent_emd_type=tp_config.recurrent_emd_type,
                  out_mlp_config=tp_config.out_mlp_config_dec,
                  do_prob=tp_config.dropout_prob_dec,
                  is_batch_norm=tp_config.is_batch_norm_dec)


# generate random edge matrix
edge_matrix = torch.rand((64, 20, 2))
edge_matrix = edge_matrix.to(device)

decoder.set_input_graph(rec_rel, send_rel)
decoder.set_edge_matrix(edge_matrix)
decoder.set_run_params()

decoder = decoder.to(device)

# print(summary(decoder, (64, 5, n_timesteps, n_dims)))
print(decoder)

Decoder(
  (edge_mlp_fn): ModuleList(
    (0-1): 2 x MLP(
      (layers): ModuleList(
        (0): Linear(in_features=128, out_features=64, bias=True)
        (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): Tanh()
        (3): Dropout(p=0, inplace=False)
        (4): Linear(in_features=64, out_features=32, bias=True)
        (5): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (6): Tanh()
        (7): Dropout(p=0, inplace=False)
        (8): Linear(in_features=32, out_features=16, bias=True)
        (9): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (10): Tanh()
        (11): Dropout(p=0, inplace=False)
        (12): Linear(in_features=16, out_features=64, bias=True)
        (13): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
  )
  (recurrent_emb_fn): GRU(
    (input_u): Linear(in_features=4, out_features=64,

### NRI model (Combine Encoder and Decoder blocks)

In [9]:
from topology_estimation.nri import NRI


nri_model = NRI(encoder, decoder)
nri_model.set_run_params()
nri_model.set_input_graph(rec_rel, send_rel)

print(nri_model)

NRI(
  (encoder): Encoder(
    (emb_fn_dict): ModuleDict(
      (1/node_emd1): MLP(
        (layers): ModuleList(
          (0): Linear(in_features=196, out_features=64, bias=True)
          (1): ReLU()
          (2): Dropout(p=0.0, inplace=False)
          (3): Linear(in_features=64, out_features=32, bias=True)
          (4): ReLU()
          (5): Dropout(p=0.0, inplace=False)
          (6): Linear(in_features=32, out_features=16, bias=True)
          (7): ReLU()
          (8): Dropout(p=0.0, inplace=False)
          (9): Linear(in_features=16, out_features=8, bias=True)
        )
      )
      (1/node_emd2): MLP(
        (layers): ModuleList(
          (0): Linear(in_features=8, out_features=64, bias=True)
          (1): ReLU()
          (2): Dropout(p=0.0, inplace=False)
          (3): Linear(in_features=64, out_features=32, bias=True)
          (4): ReLU()
          (5): Dropout(p=0.0, inplace=False)
          (6): Linear(in_features=32, out_features=16, bias=True)
          (7): R

## 3. Training topology estimator model

In [10]:
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import RichProgressBar

tp_config.set_training_params()
nri_model.set_training_params()

logger = TensorBoardLogger('logs/trials', name='nri', version=version)

trainer = Trainer(
    max_epochs=tp_config.max_epochs,
    logger=logger,
    enable_progress_bar=True,
    log_every_n_steps=1,)

trainer.fit(model=nri_model, train_dataloaders=train_loader)

ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Anaconda3\envs\afd_env\Lib\site-packages\pytorch_lightning\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
You are using a CUDA device ('NVIDIA GeForce RTX 3050 Ti Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type    | Params | Mode 
----------------------------------

Epoch 4: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 5/5 [00:01<00:00,  2.97it/s, v_num=19, train_edge_accuracy=0.510, train_loss=109.0, train_loss_encoder=-2.73, train_loss_decoder=112.0]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 5/5 [00:01<00:00,  2.80it/s, v_num=19, train_edge_accuracy=0.510, train_loss=109.0, train_loss_encoder=-2.73, train_loss_decoder=112.0]


#### Plot and upload

In [None]:
from tensorboard.backend.event_processing import event_accumulator
import os

event_dir = os.path.join('model_logs', 'trials', 'nri_model_trial3', 'version_0')
print(event_dir)

ea = event_accumulator.EventAccumulator(event_dir)
ea.Reload()

# List all tags
# print(ea.Tags())

loss_events = ea.Scalars('train_loss')
losses = [event.value for event in loss_events]
steps = [event.step for event in loss_events]

print("Steps:", steps)
print("Losses:", losses)



In [None]:
import matplotlib
import matplotlib.pyplot as plt
from torch.utils.tensorboard import SummaryWriter
import io
from pytorch_lightning.loggers import TensorBoardLogger
from PIL import Image
import numpy as np

fig, ax = plt.subplots()
ax.plot(steps, losses, label='Train Loss')
ax.set_xlabel('Steps')
ax.set_ylabel('Loss')
ax.legend()

# Convert matplotlib fig to image
buf = io.BytesIO()
fig.savefig(buf, format='png', dpi=1000)
buf.seek(0)
image = Image.open(buf)
image_np = np.array(image)

writer = SummaryWriter("model_logs\\trials\\nri_model_trial3")

writer.add_image("test_plot", image_np.transpose(2, 0, 1), global_step=0)
buf.close()
plt.close(fig)

### Trying the config file stuff

In [2]:
from topology_estimation.config import TopologyEstimatorConfig, SelectTopologyEstimatorModel
from data.config import DataConfig
from data.load import load_spring_particle_data
tp_config = TopologyEstimatorConfig()
data_config = DataConfig()

if tp_config.is_nri:
    tp_config.set_encoder_params()
    tp_config.set_decoder_params()
    
# load data
data_config = DataConfig()
data_config.set_train_valid_dataset()

# get node and edge dataset path from which data will be loaded
node_ds_paths, edge_ds_paths = data_config.get_dataset_paths()

# load datalaoders
train_loader, valid_loader, test_loader = load_spring_particle_data(node_ds_paths, edge_ds_paths)

dataiter = iter(train_loader)
data = next(dataiter)

n_nodes = data[0].shape[1]
n_timesteps = data[0].shape[2]
n_dims = data[0].shape[3]

In [3]:
import os

tp_config.set_log_path(data_config, n_timesteps)
print(tp_config.log_path)

os.makedirs(tp_config.log_path, exist_ok=True)

logs\spring_particles\P005\scenario_1\std_nri\enc=mlp_1_dec=gru\dp-49\healthy\H1_[OG]\_no_fex\v1


In [12]:
tp_config.check_if_version_exists()

Version 1 already exists in the log path 'logs\spring_particles\P005\scenario_1\std_nri\enc=mlp_1_dec=gru\dp-49\healthy\H1_[OG]\_no_fex\v2'.
Removed version 1 from the log path logs\spring_particles\P005\scenario_1\std_nri\enc=mlp_1_dec=gru\dp-49\healthy\H1_[OG]\_no_fex\v2.


In [9]:
print(tp_config.log_path)
os.makedirs(tp_config.log_path, exist_ok=True)

logs\spring_particles\P005\scenario_1\std_nri\enc=mlp_1_dec=gru\dp-49\healthy\H1_[OG]\_no_fex\v2


In [13]:
from topology_estimation.config import SelectTopologyEstimatorModel

model_selector = SelectTopologyEstimatorModel(application='spring_particles',
                                              machine='P005',
                                              scenario='scenario_1')

ckpt = model_selector.select_and_get_ckpt()
print(f"Selected checkpoint: {ckpt}")


Available version paths:
0: logs/spring_particles\P005\scenario_1\std_nri\enc=mlp1_dec=gru\dp-50\healthy\H1_[A1+A2+G]_+_H2_[G+A1]\_no_fex\v1
1: logs/spring_particles\P005\scenario_1\std_nri\enc=mlp1_dec=gru\dp-50\healthy\H1_[A1+A2+G]_+_H2_[G+A1]\_no_fex\v28
2: logs/spring_particles\P005\scenario_1\std_nri\enc=mlp1_dec=gru\dp-50\healthy\H1_[A1+A2+G]_+_H2_[G+A1]\fex_1\v1
3: logs/spring_particles\P005\scenario_1\std_nri\enc=mlp_1_dec=gru\dp-49\healthy\H1_[OG]\_no_fex\v1


ValueError: invalid literal for int() with base 10: ''