This file demonstrates how poorly the clustering methods work on their own

In [1]:
import cov_aff_inv_util as caiu
import cov_util as cu
import SE3util as se3
from eeg_generator_augmented import DataGenerator
import numpy as np
import pandas as pd

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


Load samples

In [2]:
batch_size = 50
sample_dim = 32
steps_back = 1
target_dim = (6)
#n_evals cannot exceed sample_dim
n_evals=32
mode='unaugmented'
dist='gmm'

generator = DataGenerator(batch_size=batch_size, 
    dim1=sample_dim, target_dim=target_dim, 
    steps_back=steps_back, mode=mode, n_evals=n_evals, dist=dist)

X_test, y_test = generator.test_generation(1000)

list_IDs and test_IDs filled


Test with event combinations

In [26]:
event_means = np.load('eeg_event_means.npy')
event_covmat_invs = np.load('eeg_event_covmat_invs.npy')
event_evals = np.load('eeg_event_evals.npy')
events = np.load('eeg_events.npy')

In [34]:
def test_cov_clusters(n, event_means, event_covmat_invs, print_freq=1000):
    samples = X_test[:n]
    targets = y_test[:n]
        
    preds = np.empty((n,6), dtype=np.uint8)
    for i, vec in enumerate(samples):
        if i%print_freq==0:
            print(i, 'preds made')
        targ = np.array(targets[i])
        shortest, best = np.inf, None
        for j in range(len(event_covmat_invs)):
            mat = event_covmat_invs[j]
            mu = event_means[j]
            diff = vec-mu
            diff = diff.flatten()
            dist = (diff.T@mat@diff)**0.5
            if dist<shortest:
                shortest=dist
                best=j
        preds[i] = events[j]
    correct = float(0)
    for i,pred in enumerate(preds):
        t = targets[i]
        if np.array_equal(pred,t):
            correct += 1
    return correct, correct/n
        

In [35]:
test_cov_clusters(10000, event_means, event_covmat_invs)

0 preds made
1000 preds made
2000 preds made
3000 preds made
4000 preds made
5000 preds made
6000 preds made
7000 preds made
8000 preds made
9000 preds made


(0.0, 0.0)

In [37]:
def test_gmm_clusters(n, event_means, event_covmat_invs, event_evals, print_freq=1000):
    samples = X_test[:n]
    targets = y_test[:n]
        
    preds = np.empty((n,6), dtype=np.uint8)
    for i, vec in enumerate(samples):
        if i%print_freq==0:
            print(i, 'preds made')
        targ = np.array(targets[i])
        highest, best = -np.inf, None
        for j in range(len(event_covmat_invs)):
            mat = event_covmat_invs[j]
            mu = event_means[j]
            diff = vec-mu
            diff = diff.flatten()
            dist = (diff.T@mat@diff)**0.5
            prob = np.exp(-dist/2)/(np.prod(event_evals[j])**0.5)
            if prob>highest:
                highest=prob
                best=j
        preds[i] = events[j]
    correct = float(0)
    for i,pred in enumerate(preds):
        t = targets[i]
        if np.array_equal(pred,t):
            correct += 1
    return correct, correct/n

In [38]:
test_gmm_clusters(10000, event_means, event_covmat_invs, event_evals)

0 preds made
1000 preds made
2000 preds made
3000 preds made
4000 preds made
5000 preds made
6000 preds made
7000 preds made
8000 preds made
9000 preds made


(0.0, 0.0)

Test with sub_event combinations

In [39]:
sub_event_means = np.load('eeg_sub_event_means.npy')
sub_event_covmat_invs = np.load('eeg_sub_event_covmat_invs.npy')
sub_event_evals = np.load('eeg_sub_event_evals.npy')


In [43]:
def test_sub_cov_clusters(n, sub_event_means, sub_event_covmat_invs, sub_event_evals, print_freq=1000):
    samples = X_test[:n]
    targets = y_test[:n]

    preds = np.empty((n,6), dtype=np.uint8)
    for i, vec in enumerate(samples):
        if i%print_freq==0:
            print(i, 'preds made')
        targ = targets[i]
        for j in range(len(sub_event_covmat_invs)):
            (mat0,mat1) = sub_event_covmat_invs[j]
            (mu0,mu1) = sub_event_means[j]
            diff0=(vec-mu0).flatten()
            diff1=(vec-mu1).flatten()
            dist0 = (diff0.T@mat0@diff0)**0.5
            dist1 = (diff1.T@mat1@diff1)**0.5
            if dist0<dist1:
                preds[i,j]=0
            else:
                preds[i,j]=1
    correct = float(0)

    for i,pred in enumerate(preds):
        if np.allclose(pred,targets[i]):
            correct += 1
    return correct, correct/n
        

In [44]:
test_sub_cov_clusters(10000, sub_event_means, sub_event_covmat_invs, sub_event_evals)

0 preds made
1000 preds made
2000 preds made
3000 preds made
4000 preds made
5000 preds made
6000 preds made
7000 preds made
8000 preds made
9000 preds made


(8.0, 0.0008)

In [45]:
def test_sub_gmm_clusters(n, sub_event_means, sub_event_covmat_invs, sub_event_evals, print_freq=1000):
    samples = X_test[:n]
    targets = y_test[:n]

    preds = np.empty((n,6), dtype=np.uint8)
    for i, vec in enumerate(samples):
        if i%print_freq==0:
            print(i, 'preds made')
        targ = targets[i]
        for j in range(len(sub_event_covmat_invs)):
            (mat0,mat1) = sub_event_covmat_invs[j]
            (mu0,mu1) = sub_event_means[j]
            (ev0,ev1) = sub_event_evals[j]
            ev0 = np.prod(ev0)
            ev1 = np.prod(ev1)
            diff0=(vec-mu0).flatten()
            diff1=(vec-mu1).flatten()
            dist0 = (diff0.T@mat0@diff0)**0.5
            dist1 = (diff1.T@mat1@diff1)**0.5
            prob0 = np.exp(-dist0/2)/(ev0**0.5)
            prob1 = np.exp(-dist1/2)/(ev1**0.5)            
            if prob0<prob1:
                preds[i,j]=0
            else:
                preds[i,j]=1
    correct = float(0)

    for i,pred in enumerate(preds):
        if np.allclose(pred,targets[i]):
            correct += 1
    return correct, correct/n

In [46]:
test_sub_gmm_clusters(10000, sub_event_means, sub_event_covmat_invs, sub_event_evals)

0 preds made
1000 preds made
2000 preds made
3000 preds made
4000 preds made
5000 preds made
6000 preds made
7000 preds made
8000 preds made
9000 preds made


(0.0, 0.0)