/
HMM_MultiSeq_MultiObs_leftRight.py
89 lines (66 loc) · 3.09 KB
/
HMM_MultiSeq_MultiObs_leftRight.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
############################################################################################################
# IMPORTING LIBRARIES
# ##########################################################################################################
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from hmmlearn import hmm
standard = StandardScaler()
minmax = MinMaxScaler()
############################################################################################################
# DATASET PREPARATION
# ##########################################################################################################
dir_path = os.getcwd()
dataset = 'train_FD001.txt'
df = pd.read_csv(dir_path + r'/CMAPSSData/' + dataset, sep=" ", header=None, skipinitialspace=True).dropna(axis=1)
df = df.rename(columns={0: 'unit', 1: 'cycle', 2: 'W1', 3: 'W2', 4: 'W3'})
df_A = df[df.columns[[0, 1]]]
df_S = df[df.columns[list(range(5, 26))]]
############################################################################################################
# **********************************************************************************************************
# HIDDEN MARKOV MODEL (LIBRARY)
# **********************************************************************************************************
# ##########################################################################################################
df_hmm = minmax.fit_transform(df_S)
df_hmm = pd.DataFrame(df_hmm)
cols_to_drop = df_hmm.nunique()[df_hmm.nunique() == 1].index
df_hmm = df_hmm.drop(cols_to_drop, axis=1)
cols_to_drop = df_hmm.nunique()[df_hmm.nunique() == 2].index
df_hmm = df_hmm.drop(cols_to_drop, axis=1).to_numpy()
lengths = [df[df['unit'] == i].cycle.max() for i in range(1, df_A['unit'].max() + 1)]
num_states = 15
remodel = hmm.GaussianHMM(n_components=num_states,
n_iter=500,
verbose=True,
init_params="cm", params="cmt")
transmat = np.zeros((num_states, num_states))
# Left-to-right: each state is connected to itself and its direct successor.
for i in range(num_states):
if i == num_states - 1:
transmat[i, i] = 1.0
else:
transmat[i, i] = transmat[i, i + 1] = 0.5
# Always start in first state
startprob = np.zeros(num_states)
startprob[0] = 1.0
remodel.startprob_ = startprob
remodel.transmat_ = transmat
remodel = remodel.fit(df_hmm, lengths)
state_seq = remodel.predict(df_hmm)
pred = [state_seq[df[df['unit'] == i].index[0]:df[df['unit'] == i].index[-1] + 1] for i in
range(1, df_A['unit'].max() + 1)]
plt.figure(0)
plt.plot(pred[0])
plt.xlabel('# Flights')
plt.ylabel('HMM states')
plt.figure(1)
plt.plot(pred[1])
plt.xlabel('# Flights')
plt.ylabel('HMM states')
plt.figure(2)
plt.plot(pred[2])
plt.xlabel('# Flights')
plt.ylabel('HMM states')
plt.show()