# Estimate gaussian mixture parameters
This notebook estimates the gaussian mixture parameters to fill the missing hand crafted features of the original dataset

# Load data

In [1]:
import numpy as np
import os

data = np.load(os.path.join('data', 'preprocessed', 'preprocessed_dataset_4.npz'), allow_pickle = True)

#training dataset 
train = data['x_train']
idx_train = dict()
idx_test = dict()
idx_train['ECG_mask'] = data['idx_train']
idx_test['ECG_mask'] = data['idx_test']
data_ecg = train.tolist()['ECG_features']

# Gaussian mixture estimation for ECG features

In [2]:
import matplotlib.pyplot as plt
from sklearn import mixture
import warnings
warnings.filterwarnings('ignore')

c_m = []
w_m = []
m_m = []
for i in range(data_ecg.shape[2]):#feature
    m = []
    w = []
    c = []
    for j in range(data_ecg.shape[0]):#label
        clf = mixture.GaussianMixture(n_components=4, covariance_type="full")
        clf.fit(data_ecg[j,:,i].reshape(-1, 1));
        # print('mean'+ str(clf.means_.reshape(-1,1)));
        # print('cov' + str(clf.covariances_.reshape(-1,1)));
        # print('w'+ str(clf.weights_.reshape(-1,1)));
        sortint=np.argsort(clf.weights_)
        m.append([clf.means_[i] for i in sortint ])
        w.append([clf.weights_[i] for i in sortint ])
        c.append([clf.covariances_[i] for i in sortint ])
        # s = clf.sample(60)[0]
        
        
        # plt.plot(data_ecg[j,:,i])
        # plt.plot(s)
        # plt.legend(['data', 'sample'])
        # plt.show()
    c_m.append(np.mean(c, axis=0))
    w_m.append(np.mean(w, axis=0))
    m_m.append(np.mean(m, axis=0))

    # clf_m = mixture.GaussianMixture(n_components=len(c_m[i]), covariance_type="full")
    # clf_m.weights_ = w_m[i]
    # clf_m.covariances_ = c_m[i]
    # clf_m.means_ = m_m[i]
    # s = clf.sample(60)[0]
    # plt.plot(s)
    # plt.show()

    

In [3]:
features_mixture = dict()
features_mixture['ECG_mixture'] = dict()
features_mixture['ECG_mixture']['mean'] = m_m
features_mixture['ECG_mixture']['weight'] = w_m
features_mixture['ECG_mixture']['covariance'] = c_m

# Load data for GSR

In [4]:
data = np.load(os.path.join('data', 'preprocessed', 'preprocessed_dataset_5.npz'), allow_pickle = True)

#training dataset 
train = data['x_train']
idx_train['GSR_mask'] = data['idx_train']
idx_test['GSR_mask'] = data['idx_test']
data_gsr = train.tolist()['GSR_features']

# Gaussian mixture estimation for GSR feature

In [5]:
import matplotlib.pyplot as plt
from sklearn import mixture

c_m = []
w_m = []
m_m = []
for i in range(data_gsr.shape[2]):#feature
    m = []
    w = []
    c = []
    for j in range(data_gsr.shape[0]):#label
        clf = mixture.GaussianMixture(n_components=4, covariance_type="full")
        clf.fit(data_gsr[j,:,i].reshape(-1, 1));
        # print('mean'+ str(clf.means_.reshape(-1,1)));
        # print('cov' + str(clf.covariances_.reshape(-1,1)));
        # print('w'+ str(clf.weights_.reshape(-1,1)));
        sortint=np.argsort(clf.weights_)
        m.append([clf.means_[i] for i in sortint ])
        w.append([clf.weights_[i] for i in sortint ])
        c.append([clf.covariances_[i] for i in sortint ])
        # s = clf.sample(60)[0]
        
        
        # plt.plot(data_ecg[j,:,i])
        # plt.plot(s)
        # plt.legend(['data', 'sample'])
        # plt.show()
    c_m.append(np.mean(c, axis=0))
    w_m.append(np.mean(w, axis=0))
    m_m.append(np.mean(m, axis=0))

    # clf_m = mixture.GaussianMixture(n_components=len(c_m[i]), covariance_type="full")
    # clf_m.weights_ = w_m[i]
    # clf_m.covariances_ = c_m[i]
    # clf_m.means_ = m_m[i]
    # s = clf.sample(60)[0]
    # plt.plot(s)
    # plt.show()
    

In [6]:
features_mixture['GSR_mixture'] = dict()
features_mixture['GSR_mixture']['mean'] = m_m
features_mixture['GSR_mixture']['weight'] = w_m
features_mixture['GSR_mixture']['covariance'] = c_m

In [7]:
np.savez(os.path.join('data', 'filled', 'estimators'), 
         idx_train = idx_train,
         idx_test = idx_test,
         features_mixture = features_mixture)