In [None]:
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
np.random.seed(2049)

## Load data

In [None]:
X_train = np.load("data/X_train.npy")
X_test = np.load("data/X_test.npy")
y = np.load("data/y.npy")

## Processing

### Remove Nans

In [None]:
def impute_partly(X):
    pids = list(set(X[:, 0]))
    features_list = list()
    for i in range(len(pids)):
        datarows = X[i*12:12*(i+1), :]
        ids = np.where(np.sum(np.isnan(datarows), axis=0) < 12)[0]
        imputer = SimpleImputer(missing_values=np.nan, strategy='median')
        imputer.fit(X[i*12:12*(i+1), ids])
        X[i*12:12*(i+1), ids] = imputer.transform(X[i*12:12*(i+1), ids])
    return X

print(np.sum(np.isnan(X_train)))
X_train = impute_partly(X_train)
X_test = impute_partly(X_test)
print(np.sum(np.isnan(X_train)))

In [None]:
print(np.sum(np.isnan(X_train)))
imputer = SimpleImputer(missing_values=np.nan, strategy='median')
imputer.fit(np.concatenate((X_train, X_test), axis=0))
X_train = imputer.transform(X_train)
X_test = imputer.transform(X_test)
print(np.sum(np.isnan(X_train)))

### Scaling


In [None]:
scaler = StandardScaler()
scaler.fit(np.concatenate((X_train[:, 1:], X_test[:, 1:]), axis=0))
X_train[:, 1:] = scaler.transform(X_train[:, 1:])
X_test[:, 1:] = scaler.transform(X_test[:, 1:])

### Extract features

In [None]:
def extract(X):
    pids = list(set(X[:, 0]))
    features_list = list()
    for i in range(len(pids)):
        datarows = X[i*12:12*(i+1), :]
        features = list()
        features.append(datarows[0, 0])
        features.append(datarows[0, 2])

        for j in range(3, datarows.shape[1]):
            features.append(np.mean(datarows[:, j]))
            features.append(np.std(datarows[:, j]))
        features_list.append(features)
    features_list = np.array(features_list)
    return features_list

train_features = extract(X_train)
test_features = extract(X_test)
print(train_features.shape)
print(test_features.shape)

In [None]:
X_train_processed = train_features
X_test_processed = test_features

## Saving

In [None]:
np.save("data/X_train_processed.npy", X_train_processed)
np.save("data/X_test_processed.npy", X_test_processed)
np.save("data/y.npy", y)