In [2]:
import numpy as np
from pandas import DataFrame


inp = np.load("data/original/dataset_smile_challenge.npy", allow_pickle=True).item()
train = inp['train']
test = inp['test']
threshold = 1 # no stress below th
test_label = [0 if i < threshold else 1 for i in test['labels']]
train_label = [0 if i < threshold else 1 for i in train['labels']]


## Masking

In [3]:
data_hand_features = train['hand_crafted_features']
maskECG = data_hand_features['ECG_masking']
maskGSR = data_hand_features['GSR_masking']
mask = np.any(maskECG*maskGSR == 1, axis=1)
data_ecg = data_hand_features['ECG_features']
data_gsr = data_hand_features['GSR_features']
data_ecg = np.array(data_ecg)[mask]
data_gsr = np.array(data_gsr)[mask]
data_hand_features['ECG_features'] = data_ecg
data_hand_features['GSR_features'] = data_gsr
train['hand_crafted_features'] = data_hand_features

data_deep_features = train['deep_features']
mask_deep = np.any(data_deep_features['masking'] == 1, axis = 1)
dataECG_C = data_deep_features['ECG_features_C']
dataECG_T = data_deep_features['ECG_features_T']
dataECG_C = dataECG_C[mask_deep]
dataECG_T = dataECG_T[mask_deep]
data_deep_features['ECG_features_C'] = dataECG_C
data_deep_features['ECG_features_T'] = dataECG_T
train['deep_features'] = data_deep_features

In [4]:
def getFeatures(features, rangestart = 0, rangeend = 60):
    from sklearn.linear_model import LinearRegression
    fi = []
    for j in range(0,np.shape(features)[1]):
        ft = [x if ~np.isnan(x) else 0 for x in features[rangestart:rangeend,j]]
       
        m = np.mean(ft)
        std = np.std(ft)
        diff = np.max(np.abs(np.diff(ft)))
        max = np.max(np.abs(ft))
        med = np.median(ft)

        fi.append(m)
        fi.append(std)
        fi.append(diff)
        fi.append(max)
        fi.append(med)
    return fi

In [5]:
def calcFeatures(data):
    data_deep_features = data['deep_features']
    data_hand_features = data['hand_crafted_features']
    data_ecg = data_hand_features['ECG_features']
    data_gsr = data_hand_features['GSR_features']
    dataECG_C = data_deep_features['ECG_features_C']
    dataECG_T = data_deep_features['ECG_features_T']
    
    l = len(data_ecg)
    f = []
    for i in range(0,l):
        
        ecg_c = dataECG_C[i]
        ecg_t = dataECG_T[i]
        gsr = data_gsr[i]
        ecg = data_ecg[i]
        
        features = gsr
        features = np.append(features, ecg, axis=1)
    
        f.append(getFeatures(features))
    return f

In [6]:
dotrain = True
if dotrain:
   f = calcFeatures(train)


In [7]:
select = True
if select:
    from sklearn.feature_selection import SelectKBest
    from sklearn.feature_selection import *
    # define feature selection
    fs = SelectKBest(score_func=chi2, k=15)
    # apply feature selection
    fs.fit(f, np.array(train_label)[mask])
    selected = fs.transform(f)
    selected_train_features = selected
    mask_features = fs.get_support()
    

In [8]:
dotest = True
if dotest:
    f = calcFeatures(test)

    selected_test_features = np.array(f)[:,mask_features]


## Logistic Regression

In [9]:
test_label = [0 if i < threshold else 1 for i in inp['test']['labels']]

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import *

modelML = LogisticRegression()

modelML.fit(selected_train_features, np.array(train_label)[mask])
result = modelML.score(selected_train_features, np.array(train_label)[mask])
print("Accuracy on train: %.3f%%" % (result*100.0))


Accuracy on train: 62.500%


In [11]:
import os
predicted = modelML.predict(np.array(selected_test_features))
np.savetxt(os.path.join('submission','modelD', 'answer.txt'),predicted,fmt='%1i')
from zipfile import ZipFile

# create a ZipFile object
zipObj = ZipFile('submission/modelD/answer.zip', 'w')
# Add multiple files to the zip
zipObj.write('submission/modelD/answer.txt',arcname='answer.txt')
# close the Zip File
zipObj.close()

print('zip created')

zip created
