In [5]:
import os
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from hmm import HMM
import re
import pickle

In [2]:
# define useful functions
def extract_prefix(text, pattern):
    match = re.match(pattern, text)
    if match:
        # If the first group (beat\d+_) is matched, use it; otherwise, use the second group
        prefix = match.group(1) if match.group(1) else match.group(2)
        return prefix
    
    return None


def quantization_model(data, n_clusters):
    X_all = []
    for v in data.values():
        X_all.extend(v)
    x_all = np.concatenate(X_all, axis=0)
    x_all = x_all[:,1:] # remove time column
    kmeans = KMeans(n_clusters=n_clusters, random_state=0, n_init="auto").fit(x_all)

    return kmeans

def train_model(data, key, n_clusters, n_hidden_states, q_model):
    X = np.concatenate(data[key], axis=0)
    X = X[:,1:] # remove time column

    y = q_model.predict(X)

    model = HMM(n_hidden=n_hidden_states, n_obs=n_clusters, verbose=False)
    loss = model.fit(y)
    model.save(filepath=f'pretrained_models/hmm_{key}.json')

    plt.plot(y)
    plt.savefig(f'plots/quantized_{key}.png')
    plt.clf()
    
    plt.plot(-np.array(loss))
    plt.savefig(f'plots/logloss_{key}.png')
    plt.clf()

# Training

The data contains six different motions: Wave, Infinity, Eight, Circle, Beat3, Beat4

In [6]:
data = {}
train_dir = 'data/train'

pattern = r"^(beat\d+)|([^\d]+)"

files = os.listdir(train_dir)
for fn in files:
    type = extract_prefix(fn, pattern)
    x = np.loadtxt(os.path.join(train_dir, fn))
    if type not in data:
        data[type] = [x]
    else:
        data[type].append(x)

ts , Wx, Wy, Wz, Ax, Ay, Az
(Time (millisecond), 3x Gyroscope (rad/sec), 3x Accelerometer (m/s2) )

In [7]:
# train models
n_clusters = 50
n_hidden_states = 10

q_model = quantization_model(data, n_clusters)

In [8]:
pickle.dump(q_model, open('pretrained_models/kmeans_50.pkl', 'wb'))

In [13]:
for key in data.keys():
    train_model(data, key, n_clusters, n_hidden_states, q_model)

<Figure size 640x480 with 0 Axes>

# Testing

In [9]:
# TODO: update directory to the appropriate test file
test_dir = 'data/val'

In [12]:
with open("pretrained_models/kmeans_50.pkl", "rb") as f:
    q_model = pickle.load(f)

In [14]:
model_path = 'pretrained_models/'
model_name = ['hmm_beat3', 'hmm_beat4', 'hmm_circle', 'hmm_eight', 'hmm_inf', 'hmm_wave']  
results = {}

pattern = r"^(beat\d+)|([^\d]+)"
files = os.listdir(test_dir)
for fn in files:
    log_loss = []
    type = extract_prefix(fn, pattern)
    x = np.loadtxt(os.path.join(test_dir, fn))
    x = x[:,1:] # remove time column

    y = q_model.predict(x)
    for m in model_name:
        model = HMM(n_hidden=n_hidden_states, n_obs=n_clusters, verbose=False)
        model.load(filepath=f'{model_path}{m}.json')
        log_loss.append(model.predict(y))
                        
    results[fn] = log_loss

In [15]:
results

{'circle31.txt': [-5570.778230347408,
  -4034.2529190469036,
  -266.95294916283655,
  -11568.187821202006,
  -11568.187821202006,
  -11568.187821202006],
 'beat3_31.txt': [-749.2817548369553,
  -921.0345745400232,
  -12449.981424876627,
  -7109.6727791937665,
  -11451.808963923431,
  -6352.691317430477],
 'inf31.txt': [-6950.617999659129,
  -8554.63179049122,
  -14663.080375151883,
  -4067.0538031143165,
  -490.22800139279667,
  -9352.333466865295],
 'beat4_31.txt': [-1720.7516574551764,
  -775.7216855536545,
  -13550.998035732659,
  -14018.925284817647,
  -6549.664136303634,
  -7400.683331013022],
 'eight31.txt': [-7483.572796925468,
  -7831.512305257039,
  -10978.726021395534,
  -792.7728915528071,
  -3578.3804842837285,
  -6481.384839868422]}

In [16]:
# report top scoring model for each test file
for k,v in results.items():
    print(f'{k}: {model_name[np.argmax(v)]}')

circle31.txt: hmm_circle
beat3_31.txt: hmm_beat3
inf31.txt: hmm_inf
beat4_31.txt: hmm_beat4
eight31.txt: hmm_eight
