This file studies how the states change from pre-void-> void -> post-void and based on this creates the transition matrix for the HMM implementation.


Window size being used.
1. 5s_0.5
2. 3s

In [1]:
import numpy as np
import pandas as pd
from hmmlearn import hmm
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import GroupKFold, GroupShuffleSplit

5s_0.5

In [19]:
data_path = "/home/edumaba/Public/MPhil_Thesis/Code/wear_uropatch/pipeline/data_extracted_features/three_class_pp_3s_no.csv"
features = pd.read_csv(data_path)

In [20]:
X = features.drop(columns=['label', 'experiment_id'])
label_group = features[['label', 'experiment_id', 'center_time', 'start_time', 'end_time']]

In [21]:
groups = features['experiment_id']
splitter = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, test_idx = next(splitter.split(X, label_group, groups))

In [22]:
df_train, df_test = label_group.iloc[train_idx], label_group.iloc[test_idx]

In [23]:
df_train

Unnamed: 0,label,experiment_id,center_time,start_time,end_time
0,pre-void,1,1.5,0.0,3.0
1,pre-void,1,4.5,3.0,6.0
2,pre-void,1,7.5,6.0,9.0
3,pre-void,1,10.5,9.0,12.0
4,pre-void,1,13.5,12.0,15.0
...,...,...,...,...,...
704,void,39,22.5,21.0,24.0
705,void,39,25.5,24.0,27.0
706,void,39,28.5,27.0,30.0
707,post-void,39,31.5,30.0,33.0


In [7]:
def calculate_transition_matrix_3class(df_train, state_col='label', 
                                    id_col='experiment_id', time_col='center_time'):
    """
    Calculate transition matrix from 3-class training ground truth labels
    """
    states = ['pre-void', 'void', 'post-void']
    n_states = 3
    state_to_idx = {state: i for i, state in enumerate(states)}
    
    transition_counts = np.zeros((n_states, n_states))
    
    # Count transitions across all training experiments
    for exp_id, group in df_train.groupby(id_col):
        group = group.sort_values(time_col)
        states_sequence = group[state_col].values
        
        for i in range(len(states_sequence) - 1):
            current_state = states_sequence[i]
            next_state = states_sequence[i + 1]
            
            current_idx = state_to_idx[current_state]
            next_idx = state_to_idx[next_state]
            
            transition_counts[current_idx, next_idx] += 1
    
    # Convert counts to probabilities
    transmat = np.zeros((n_states, n_states))
    for i in range(n_states):
        row_sum = transition_counts[i, :].sum()
        if row_sum > 0:
            transmat[i, :] = transition_counts[i, :] / row_sum
        else:
            # Default: stay in same state if no transitions observed
            transmat[i, i] = 1.0
    
    return transmat, transition_counts, states

In [8]:
transmat, transition_counts, states = calculate_transition_matrix_3class(df_train)

In [9]:
transmat

array([[0.81097561, 0.18902439, 0.        ],
       [0.        , 0.88447653, 0.11552347],
       [0.        , 0.        , 1.        ]])

In [10]:
transition_counts

array([[133.,  31.,   0.],
       [  0., 245.,  32.],
       [  0.,   0., 189.]])

In [11]:
states

['pre-void', 'void', 'post-void']

In [12]:
df_train.groupby('experiment_id')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fe3b7dd29f0>

In [16]:
states = ['pre-void', 'void', 'post-void']
n_states = 3
state_to_idx = {state: i for i, state in enumerate(states)}

transition_counts = np.zeros((n_states, n_states))

for exp_id, group in df_train.groupby('experiment_id'):
        group = group.sort_values('center_time')
        states_sequence = group['label'].values
        # print(states_sequence)
        for i in range(len(states_sequence) - 1):
            current_state = states_sequence[i]
            next_state = states_sequence[i + 1]
            
            current_idx = state_to_idx[current_state]
            next_idx = state_to_idx[next_state]
            
            transition_counts[current_idx, next_idx] += 1
            
# Convert counts to probabilities
transmat = np.zeros((n_states, n_states))
for i in range(n_states):
        row_sum = transition_counts[i, :].sum()
        if row_sum > 0:
            transmat[i, :] = transition_counts[i, :] / row_sum
        else:
            # Default: stay in same state if no transitions observed
            transmat[i, i] = 1.0

In [17]:
transition_counts

array([[133.,  31.,   0.],
       [  0., 245.,  32.],
       [  0.,   0., 189.]])

In [18]:
transmat

array([[0.81097561, 0.18902439, 0.        ],
       [0.        , 0.88447653, 0.11552347],
       [0.        , 0.        , 1.        ]])