# Notebook

## Setup

In [1]:
In [1]: %load_ext kedro.ipython
import pandas as pd
import numpy as np

## Load Datasets

In [2]:
alarms = catalog.load("clean_alarms")
status = catalog.load("clean_station_overviews")
# sessions = catalog.load("clean_charging_sessions")

sn_map = catalog.load("map_serial_number")
# ss_map = catalog.load("map_station_status")
# ns_map = catalog.load("map_network_status")
# a_map = catalog.load("map_alarm")

domain_alarms = catalog.load("domain_alarms")
domain_status = catalog.load("domain_status")
domain_observations = catalog.load("domain_observations")

## Hidden Markov Model (HMM)

In [3]:
domain_observations.observation.cat.categories


[1;35mIndex[0m[1m([0m[1m[[0m[32m'[0m[32m([0m[32m'[0mAvailable', [32m'Reachable'[0m[1m)[0m[32m', '[0m[1m([0m[32m'Faulted'[0m, [32m'Reachable'[0m[1m)[0m',
       [32m'[0m[32m([0m[32m'[0mUnavailable', [32m'Reachable'[0m[1m)[0m[32m', '[0m[1m([0m[32m'Unavailable'[0m, [32m'‚ä•'[0m[1m)[0m',
       [32m'[0m[32m([0m[32m'[0mUnreachable', [32m'Reachable'[0m[1m)[0m[32m', '[0m[1m([0m[32m'Unreachable'[0m, [32m'Unreachable'[0m[1m)[0m',
       [32m'Boot up'[0m, [32m'Bootup Due to POWER ON'[0m, [32m'Bootup Due to SOFT RESET'[0m,
       [32m'Bootup Due to SWITCH'[0m, [32m'Bootup Due to WATCHDOG'[0m,
       [32m'Circuit Sharing Current Reduced'[0m, [32m'Circuit Sharing Current Restored'[0m,
       [32m'Data Partition Full'[0m, [32m'Earth Fault Station In Service'[0m,
       [32m'Earth Fault Station Out Of Service'[0m, [32m'Fault Cleared'[0m, [32m'GFCI Hard Trip'[0m,
       [32m'IP Mismatch Detected'[0m, [32m'Mai

In [None]:
# Num of hidden states (num functional states * num network states * num events)
N = 3 * 2 * 7
print(f"Number of hidden states: N = {N}")

# Num of observation symbols
K = domain_observations.observation.cat.codes.nunique()
print(f"Number of observation symbols: K = {K}")

# Base observation format


Number of hidden states: N = 42
Number of observation symbols: K = 31


### Pomegranate Implementation

In [5]:
import os

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
import random
import torch
from torch.nn.utils.rnn import pad_sequence
from torch.masked import as_masked_tensor
from pomegranate.hmm import DenseHMM
from pomegranate.distributions import Categorical

In [6]:
num_states = N
num_observation_symbols = K

# torch_device = torch.device("cpu")
torch_device = torch.device("cuda")

# --- Step 1: Define Emission Distributions ---
# We create a list of n_observation_symbols distinct Categorical distributions.
# We initialize them with random probabilities so the model has
# something to start with before training (EM algorithm).
dists = []
for _ in range(num_states):
    # Create random probabilities for the 31 symbols
    init_probs = torch.rand(1, num_observation_symbols)
    # Normalize so they sum to 1.0
    init_probs = init_probs / init_probs.sum(dim=1, keepdim=True)

    # Create the distribution object
    dists.append(Categorical(probs=init_probs).to(torch_device))
[d.probs for d in dists]


[1m[[0m
    Parameter containing:
[1;35mtensor[0m[1m([0m[1m[[0m[1m[[0m[1;36m0.0187[0m, [1;36m0.0504[0m, [1;36m0.0127[0m, [1;36m0.0367[0m, [1;36m0.0213[0m, [1;36m0.0311[0m, [1;36m0.0604[0m, [1;36m0.0093[0m, [1;36m0.0589[0m,
         [1;36m0.0150[0m, [1;36m0.0039[0m, [1;36m0.0141[0m, [1;36m0.0118[0m, [1;36m0.0343[0m, [1;36m0.0209[0m, [1;36m0.0433[0m, [1;36m0.0193[0m, [1;36m0.0609[0m,
         [1;36m0.0621[0m, [1;36m0.0196[0m, [1;36m0.0515[0m, [1;36m0.0564[0m, [1;36m0.0608[0m, [1;36m0.0272[0m, [1;36m0.0106[0m, [1;36m0.0106[0m, [1;36m0.0186[0m,
         [1;36m0.0071[0m, [1;36m0.0299[0m, [1;36m0.0613[0m, [1;36m0.0615[0m[1m][0m[1m][0m, [33mdevice[0m=[32m'cuda:0'[0m[1m)[0m,
    Parameter containing:
[1;35mtensor[0m[1m([0m[1m[[0m[1m[[0m[1;36m0.0456[0m, [1;36m0.0154[0m, [1;36m0.0379[0m, [1;36m0.0374[0m, [1;36m0.0138[0m, [1;36m0.0184[0m, [1;36m0.0218[0m, [1;36m0.0178[0m, [1;36m0.0407[0m,


In [8]:
dists[0].probs.shape

[1;35mtorch.Size[0m[1m([0m[1m[[0m[1;36m1[0m, [1;36m31[0m[1m][0m[1m)[0m

In [None]:
# --- Step 2: Prepare Observation Sequences ---
# Convert observation sequences to torch tensors
num_batches = int(len(o_base) * 0.8)  # Use 80% of data for training
o = [torch.tensor(x, dtype=torch.long) for x in o_base]
o_train = o[:num_batches]
# o_train = random.sample(o, num_batches)

# Pad sequences to create a batch tensor
padded_o = pad_sequence(o_train, batch_first=True, padding_value=-1)

# Create a mask to ignore padding during training
mask = padded_o != -1

# Convert to masked tensor
masked_o = as_masked_tensor(padded_o, mask)

# Add an extra dimension to match expected input shape (batch_size, seq_length, 1)
X_train = masked_o.unsqueeze(-1)
print(f"Shape of training data: {X_train.shape}")

In [None]:
# --- Step 3: Initialize the HMM ---
# We pass the list of distributions to the DenseHMM.
# 'verbose=True' helps you see the improvement during training.
model = DenseHMM(dists, verbose=True).to(torch_device)

In [None]:
# --- Step 4: Train the HMM ---
model.fit(X_train.to(torch_device))

In [None]:
# Save model
torch.save(model, "hmm_model.pt")

In [None]:
# Get the Observation Matrix (State x Symbol)
# We stack them to get a nice (42, 31) tensor
obs_matrix = torch.stack([d.probs for d in model.distributions])
print("Observation Matrix (State x Symbol):")
obs_matrix.round(decimals=3)

In [None]:
# Get the Transition Matrix (State x State)
trans_matrix = torch.exp(model.edges)
print("Transition Matrix (State x State):")
trans_matrix.round(decimals=3)

In [None]:
pd.Series(domain_observations.observation.cat.categories)