In [1]:
import os
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from hmm import HMM
import re

In [5]:
def extract_prefix(text, pattern):
    match = re.match(pattern, text)
    if match:
        # If the first group (beat\d+_) is matched, use it; otherwise, use the second group
        prefix = match.group(1) if match.group(1) else match.group(2)
        return prefix
    
    return None


def quantization_model(data, n_clusters):
    X_all = []
    for v in data.values():
        X_all.extend(v)
    x_all = np.concatenate(X_all, axis=0)
    x_all = x_all[:,1:] # remove time column
    kmeans = KMeans(n_clusters=n_clusters, random_state=0, n_init="auto").fit(x_all)

    return kmeans

def train_model(data, key, n_clusters, n_hidden_states, q_model):
    X = np.concatenate(data[key], axis=0)
    X = X[:,1:] # remove time column

    y = q_model.predict(X)

    model = HMM(n_hidden=n_hidden_states, n_obs=n_clusters, verbose=False)
    loss = model.fit(y)
    model.save(filepath=f'pretrained_models/hmm_{key}.json')

    plt.plot(y)
    plt.savefig(f'plots/quantized_{key}.png')
    plt.clf()
    
    plt.plot(-np.array(loss))
    plt.savefig(f'plots/logloss_{key}.png')
    plt.clf()

The data contains six different motions: Wave, Infinity, Eight, Circle, Beat3, Beat4

In [3]:
data = {}
train_dir = 'data/train'

pattern = r"^(beat\d+)|([^\d]+)"

files = os.listdir(train_dir)
for fn in files:
    type = extract_prefix(fn, pattern)
    x = np.loadtxt(os.path.join(train_dir, fn))
    if type not in data:
        data[type] = [x]
    else:
        data[type].append(x)

ts , Wx, Wy, Wz, Ax, Ay, Az
(Time (millisecond), 3x Gyroscope (rad/sec), 3x Accelerometer (m/s2) )

In [6]:
# train models
n_clusters = 50
n_hidden_states = 10

q_model = quantization_model(data, n_clusters)

for key in data.keys():
    train_model(data, key, n_clusters, n_hidden_states, q_model)

<Figure size 640x480 with 0 Axes>

In [7]:
test_dir = 'data/val'
model_path = 'pretrained_models/'
model_name = ['hmm_beat3', 'hmm_beat4', 'hmm_circle', 'hmm_eight', 'hmm_inf', 'hmm_wave']  
results = {}

pattern = r"^(beat\d+)|([^\d]+)"
files = os.listdir(test_dir)
for fn in files:
    log_loss = []
    type = extract_prefix(fn, pattern)
    x = np.loadtxt(os.path.join(test_dir, fn))
    x = x[:,1:] # remove time column

    y = q_model.predict(x)
    for m in model_name:
        model = HMM(n_hidden=n_hidden_states, n_obs=n_clusters, verbose=False)
        model.load(filepath=f'{model_path}{m}.json')
        log_loss.append(model.predict(y))
                        
    results[fn] = log_loss

In [8]:
results

{'circle31.txt': [-5285.248467791619,
  -3692.729480671605,
  -233.20570070540225,
  -11568.187821202006,
  -11568.187821202006,
  -11568.187821202006],
 'beat3_31.txt': [-526.7971705886984,
  -764.7980122945303,
  -12450.459589257785,
  -6876.539848993832,
  -11522.776624889731,
  -6408.0567261040105],
 'inf31.txt': [-7214.001575112213,
  -8533.431762502509,
  -14629.21362424254,
  -3606.5779007096935,
  -551.5849858110192,
  -9909.444106972069],
 'beat4_31.txt': [-1660.593467886958,
  -766.3292184788799,
  -13521.889015969406,
  -13870.323738384723,
  -6141.987382377855,
  -7894.353832883502],
 'eight31.txt': [-7551.250290195274,
  -7928.065715852421,
  -10978.726021395534,
  -602.7805358537229,
  -3324.7213016509545,
  -6643.393489883381]}

In [9]:
for k,v in results.items():
    print(f'{k}: {model_name[np.argmax(v)]}')

circle31.txt: hmm_circle
beat3_31.txt: hmm_beat3
inf31.txt: hmm_inf
beat4_31.txt: hmm_beat4
eight31.txt: hmm_eight
