In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler

In [3]:
df = pd.read_csv("./data/preprocessed/subband_data.csv")
#df = pd.read_csv("./data/preprocessed/data.csv")
df = df.dropna()

In [77]:
subbands = ["alpha", "beta", "delta", "theta", "gamma", "sigma"]
#features = ["energy", "activity", "mobility", "complexity"]
features = ["energy"]

columns = []
for subband in subbands:
    for feature in features:
        columns.append(f"{feature}_{subband}")
        #columns.append(f"{feature}_{subband}_rel")

In [5]:
data = df.to_numpy()
Y = data[:,-1]

X = data[:,1:-1]
X = df[columns].to_numpy()

X = X-np.mean(X, axis=0)
X = X/np.std(X, axis=0)

print(X.shape)
print(np.unique(Y))

(45222, 6)
[0. 1. 2. 3. 4.]


In [6]:
from IOHMM import UnSupervisedIOHMM
from IOHMM import OLS, DiscreteMNL, CrossEntropyMNL

In [101]:
y_columns = ['Awake', 'Stage 1', 'Stage 2', 'Stage 3/4', 'REM']

df_data = pd.DataFrame()
for c in columns:
    x = df[c].to_numpy()
    x = x-np.mean(x, axis=0)
    x = x/np.std(x, axis=0)
    df_data[c] = x

#df_data['sleep_stage'] = df['sleep_stage']
for i, y_column in enumerate(y_columns):
    df_data[y_column] = (df['sleep_stage'] == i).astype(int)

In [102]:
pids = np.unique(df['pid'])

idxs = np.arange(0, len(pids))
np.random.shuffle(idxs)

Ntraining = max(1, int(len(pids)*0.01))
training_idxs = idxs[:Ntraining]
validation_idxs = idxs[Ntraining:]

training_pids = pids[training_idxs]
validation_pids = pids[validation_idxs]

In [103]:
y = [df_data[df['pid'] == i] for i in training_pids]

In [106]:
clf = UnSupervisedIOHMM(num_states=3, max_EM_iter=100, EM_tol=1e-3)

clf.set_models(
    model_emissions=[CrossEntropyMNL(solver='lbfgs')],
    model_transition=CrossEntropyMNL(solver='lbfgs'),
    model_initial=CrossEntropyMNL(solver='lbfgs'))

clf.set_inputs(
    #covariates_initial=columns, 
    covariates_initial=[], 
    #covariates_transition=columns, 
    covariates_transition=[], 
    #covariates_emissions=[columns],
    covariates_emissions=[[]],
)

clf.set_outputs([y_columns])
#clf.set_outputs([['sleep_stage']])
clf.set_data(y)

In [107]:
clf.train()

In [97]:
print(len(columns))

6


In [108]:
print(len(clf.model_transition))
print(clf.model_transition[0].coef.shape)

3
(3, 1)


In [110]:
print(len(clf.model_emissions))
print(len(clf.model_emissions[0]))
print(clf.model_emissions[0][0].coef.shape)

for m in clf.model_emissions:
    print(m[0].coef.flatten())

3
1
(5, 1)
[ 0.89341363 -0.43026775  0.62790416 -0.82599908 -0.26505096]
[ 0.91395592 -0.42674055  0.61766986 -0.78550091 -0.31938432]
[ 0.91064275 -0.41086215  0.56814959 -0.77221531 -0.29571489]


In [109]:
print(clf.model_initial.coef.shape)

(3, 1)


In [14]:
dir(clf)

['EM_tol',
 'E_step',
 'M_step',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_from_setup',
 '_initialize',
 'covariates_emissions',
 'covariates_initial',
 'covariates_transition',
 'dfs_logStates',
 'from_config',
 'from_json',
 'inp_emissions',
 'inp_emissions_all_sequences',
 'inp_initials',
 'inp_initials_all_sequences',
 'inp_transitions',
 'inp_transitions_all_sequences',
 'log_epsilons',
 'log_gammas',
 'log_likelihood',
 'log_likelihoods',
 'max_EM_iter',
 'model_emissions',
 'model_initial',
 'model_transition',
 'num_emissions',
 'num_seqs',
 'num_states',
 'out_emissions',
 'out_emissions_all_sequences',
 'responses_emissions',
 'set_data',
 'set_inputs