In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [24]:
features_alert = np.load('features_long_alert.npy', allow_pickle = True)
labels_alert = np.loadtxt('labels_long_alert.npy')
features_tired = np.load('features_long_tired.npy', allow_pickle = True)
labels_tired = np.loadtxt('labels_long_tired.npy')

if features_alert.shape[0] != labels_alert.shape[0] or features_tired.shape[0] != labels_tired.shape[0]:
    raise IndexError('For the same label, the features and labels must be of same size')


In [30]:
def unison_shuffled_copies(a, b):
    assert len(a) == len(b) , f"array in index 0 has lenght {len(a)} and array in index 1 has lenght {len(b)}"
    p = np.random.permutation(len(a))
    return a[p], b[p]

def split (features :np.ndarray, labels:np.ndarray, splitp : float):
    """
    split splits two datasets of features and labels into training and testing

    : features is the features dataset
    : labels is the labels dataset
    : slitp is the split percentage. Needs to range from 0 to 1. A split percentage of 0.8 will put 80% of set in train and 20% in test
    """

    features, labels = unison_shuffled_copies(features, labels)
    split_index = int(splitp * len(labels))

    features_train = features[0:split_index]
    features_test = features[split_index:len(features)]
    # the [split index: -1] doesn't seem to work as inteded so use len(features)

    labels_train = labels[0:split_index]
    labels_test = labels[split_index:len(labels)]

    return [features_train, labels_train, features_test, labels_test]

def concatenation(split1: list, split2:list):
    """
    concatenation puts together the train datasets and tests datasets that arise from the split function
    This function needs to be changed if more than two labels exist in the dataset

    : split1 is the split of the first lable
    : split2 is the split of the second label

    return a list with [train_features, train_labels, test_features, test_labels] where the datasets are shuffled
    """
    train_feat = np.concatenate((split1[0], split2[0]))
    train_lab = np.concatenate((split1[1], split2[1]))
    train_feat, train_lab = unison_shuffled_copies(train_feat, train_lab)

    test_feat = np.concatenate((split1[2], split2[2]))
    test_lab = np.concatenate((split1[3], split2[3]))
    test_feat, test_lab = unison_shuffled_copies(test_feat, test_lab)

    return [train_feat, train_lab, test_feat, test_lab]


In [31]:
split1 = split(features_alert, labels_alert, 0.8)
split2 = split(features_tired, labels_tired, 0.8)

dataset = concatenation(split1, split2)
#see function description to understand structure of dataset

(19, 5, 8)
