###  Created by Luis Alejandro (alejand@umich.edu)

Verify proper dataset generation, splitting and k-fold generation.

In [1]:
import pandas as pd
import numpy as np
from aircraft_dataset import aircraft_dataset_split
from aircraft_dataset import AircraftDatasetFoldIterator

In [2]:
# Loading dataset
dataset = pd.read_csv('exports/2020-01-31 16-02-04/segment_0.csv')
features = len(dataset.columns) - 2
predictors = dataset.iloc[:,:features].values
responses = dataset.iloc[:,features].values
measurements = dataset.iloc[:,features+1].values
np.unique(responses)

array(['A320-2xx (CFM56-5)', 'B737-7xx (CF56-7B22-)',
       'B737-8xx (CF56-7B22+)', 'ERJ190 (CF34-10E)'], dtype=object)

In [3]:
# Dataset split unit test
X,y,X_holdout,y_holdout, indexes, holdout_indexes = aircraft_dataset_split(predictors,responses,measurements,
                                                                           return_measurements = True)

In [4]:
# Measurements properly distributed
A = set(np.unique(indexes))
B = set(np.unique(holdout_indexes))
C = set(measurements)
assert(len(B) == len(holdout_indexes))
assert(A.intersection(B) == set())
assert(A.union(B) == C)

In [5]:
# All classes present in both datasets
A = set(np.unique(y))
B = set(np.unique(y_holdout))
assert(A == B)

In [6]:
# Number of correct measurements returned
assert(predictors.shape[0] == (X.shape[0] + X_holdout.shape[0] * 12))

In [7]:
# Verifying every split of the k-fold validator
iterator = AircraftDatasetFoldIterator(X,y,indexes,folds=5)
iterator.build()
for train_obs, test_obs in iterator:
    A = set(np.unique(indexes[train_obs]))
    B = set(np.unique(indexes[test_obs]))
    C = set(indexes)
    assert(len(B) == len(test_obs))
    assert(A.intersection(B) == set())
    assert(A.union(B) == C)