# Missing features filling
This notebook fills the missing handcrafted features both in test and training

# Load estimators

In [9]:
import numpy as np
import os
estimators = np.load(os.path.join('data', 'filled', 'estimators.npz'), allow_pickle=True)
features_mixture = estimators['features_mixture']

# Train

### Load data

In [10]:
idx_train = estimators['idx_train']

data = np.load(os.path.join('data','original', 'dataset_smile_challenge.npy'), allow_pickle = True).item()

#training dataset 
train = data['train']
handcrafted_features_train = train['hand_crafted_features'] # for hand-crafted features {'ECG_features', 'GSR_features'}.
gsr = handcrafted_features_train['GSR_features']
ecg = handcrafted_features_train['ECG_features']

### ECG features

In [11]:
from sklearn.mixture import GaussianMixture
ecg_mix = features_mixture.tolist()['ECG_mixture']
ecg_mask = idx_train.tolist()['ECG_mask']
for f in range(ecg.shape[2]):
    clf_m = GaussianMixture(n_components=len(ecg_mix['mean'][f]), covariance_type="full")
    clf_m.weights_ = ecg_mix['weight'][f]
    clf_m.covariances_ = ecg_mix['covariance'][f]
    clf_m.means_ = ecg_mix['mean'][f]
    for l in [x for x in range(ecg.shape[0]) if x not in ecg_mask]:
         ecg[l,:,f] = clf_m.sample(60)[0].reshape(60)


### GSR features

In [12]:
gsr_mix = features_mixture.tolist()['GSR_mixture']
gsr_mask = idx_train.tolist()['GSR_mask']
for f in range(gsr.shape[2]):
    clf_m = GaussianMixture(n_components=len(gsr_mix['mean'][f]), covariance_type="full")
    clf_m.weights_ = gsr_mix['weight'][f]
    clf_m.covariances_ = gsr_mix['covariance'][f]
    clf_m.means_ = gsr_mix['mean'][f]
    for l in [x for x in range(gsr.shape[0]) if x not in gsr_mask]:
         gsr[l,:,f] = clf_m.sample(60)[0].reshape(60)

### Save

In [13]:
handcrafted_features_train['GSR_features'] = gsr
handcrafted_features_train['ECG_features'] = ecg
x_train = handcrafted_features_train
threshold = 1
y_train = [0 if i < threshold else 1 for i in train['labels']]

# Test

In [14]:
idx_test = estimators['idx_test']
test = data['test']
handcrafted_features_test = test['hand_crafted_features'] # for hand-crafted features {'ECG_features', 'GSR_features'}.
gsr = handcrafted_features_test['GSR_features']
ecg = handcrafted_features_test['ECG_features']

### ECG features

In [15]:
ecg_mask = idx_test.tolist()['ECG_mask']
for f in range(ecg.shape[2]):
    clf_m = GaussianMixture(n_components=len(ecg_mix['mean'][f]), covariance_type="full")
    clf_m.weights_ = ecg_mix['weight'][f]
    clf_m.covariances_ = ecg_mix['covariance'][f]
    clf_m.means_ = ecg_mix['mean'][f]
    for l in [x for x in range(ecg.shape[0]) if x not in ecg_mask]:
         ecg[l,:,f] = clf_m.sample(60)[0].reshape(60)

### GSR features

In [16]:
gsr_mix = features_mixture.tolist()['GSR_mixture']
gsr_mask = idx_train.tolist()['GSR_mask']
for f in range(gsr.shape[2]):
    clf_m = GaussianMixture(n_components=len(gsr_mix['mean'][f]), covariance_type="full")
    clf_m.weights_ = gsr_mix['weight'][f]
    clf_m.covariances_ = gsr_mix['covariance'][f]
    clf_m.means_ = gsr_mix['mean'][f]
    for l in [x for x in range(gsr.shape[0]) if x not in gsr_mask]:
         gsr[l,:,f] = clf_m.sample(60)[0].reshape(60)

### Save

In [17]:
handcrafted_features_test['GSR_features'] = gsr
handcrafted_features_test['ECG_features'] = ecg
x_test = handcrafted_features_test

# Save dataset

In [18]:
np.savez(os.path.join('data', 'filled', 'filled_dataset'), 
         x_train = x_train,
         y_train = y_train,
         x_test = x_test)