In [1]:
from noise import compressed_pickle, decompress_pickle
from sklearn.model_selection import StratifiedKFold
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
INPUT_DATA_PATH = '../input-data/'
data = decompress_pickle(INPUT_DATA_PATH + 'cycle_data')

In [3]:
def format_dataframe(data):
    cols = int(data.shape[0] / 4)
    shaped_data = data.reshape((4, cols)).T
    s1 = pd.Series(shaped_data[:, 0])
    s2 = pd.Series(shaped_data[:, 1])
    s3 = pd.Series(shaped_data[:, 2])
    s4 = pd.Series(shaped_data[:, 3])
    dicio = {'A': [], 'B': [], 'C': [], 'Z': []}
    dicio['A'].append(s1)
    dicio['B'].append(s2)
    dicio['C'].append(s3)
    dicio['Z'].append(s4)
    return pd.DataFrame(dicio)

In [4]:
def open_data(signal_type, cycle_name):
    data_list = []
    target_list = []
    for d in data:
        data_list.append(format_dataframe(d[f'{signal_type}_{cycle_name}']))
        target_list.append(d['fault_type'])
    X = pd.concat(data_list).reset_index(drop=True)
    y = np.array(target_list)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y,
                                                        random_state=42, shuffle=True)
    compressed_pickle(INPUT_DATA_PATH + f'folds/{signal_type}/{cycle_name}/' + 'X_train', X_train)
    compressed_pickle(INPUT_DATA_PATH + f'folds/{signal_type}/{cycle_name}/' + 'y_train', y_train)
    compressed_pickle(INPUT_DATA_PATH + f'folds/{signal_type}/{cycle_name}/' + 'X_val', X_test)
    compressed_pickle(INPUT_DATA_PATH + f'folds/{signal_type}/{cycle_name}/' + 'y_val', y_test)
    return X_train, y_train

In [5]:
def save_folds(signal_type, cycle_name):
    X_train, y_train = open_data(signal_type, cycle_name)
    data_folds_path = INPUT_DATA_PATH + f'folds/{signal_type}/{cycle_name}/'
    kf = StratifiedKFold(n_splits=10, random_state=42, shuffle=True)
    for fold, (tr, te) in enumerate(kf.split(X_train, y_train), start=1):
        X_tr, X_te = X_train.iloc[tr, :], X_train.iloc[te, :]
        y_tr, y_te = y_train[tr], y_train[te]
        compressed_pickle(data_folds_path + f'X_train_fold_{fold}', X_tr)
        compressed_pickle(data_folds_path + f'X_test_fold_{fold}', X_te)
        compressed_pickle(data_folds_path + f'y_train_fold_{fold}', y_tr)
        compressed_pickle(data_folds_path + f'y_test_fold_{fold}', y_te)

In [6]:
cycle_list = ['cycle_1', 'cycle_2', 'cycle_4', 'cycle_8', 'cycle_16', 'cycle_32', 'cycle_64',
              'cycle_128']

for cycle_name in cycle_list:
    save_folds('v', cycle_name)
    save_folds('i', cycle_name)

## Testes aleatórios

In [8]:
def drange(start, stop, step):
    while start < stop:
        yield start
        start *= step

from noise import decompress_pickle, compressed_pickle
INPUT_DATA_PATH = '../input-data/'
all_data = decompress_pickle(INPUT_DATA_PATH + 'noise_data')
for i, data in enumerate(all_data):
    for n in drange(1, 129, 2):
        size = int((data['i_noise'][0,64:].shape[0] - 64) / n + 64)
        print(size)
    break

320
192
128
96
80
72
68
66


In [10]:
for ciclos in drange(1, 129, 2):
    print(64 + 256 / ciclos)

320.0
192.0
128.0
96.0
80.0
72.0
68.0
66.0
