## Extract FL Data

In [76]:
import numpy as np
import pandas as pd

In [77]:
train_df = pd.read_csv('../../data_analysis/fd003/fd003-scaled_train.csv', sep=' ')
test_df = pd.read_csv('../../data_analysis/fd003/fd003-scaled_test.csv', sep=' ')
train_labels_df = pd.read_csv('../../data_analysis/fd003/fd003-training_labels.csv', sep=' ')
test_labels_df = pd.read_csv('../../data_analysis/fd003/fd003-testing_labels.csv', sep=' ')
test_labels_at_break_df = pd.read_csv('../../TED/CMAPSSData/RUL_FD003.txt', sep = ' ', header = None)

In [78]:
columns = train_df.columns
ms_used = []
for i in range(1, 22):
    if i not in [1, 5, 6, 9, 10, 14, 16, 18, 19]:
        ms_used.append('SensorMeasure' + str(i))

In [79]:
test_at_break_df = test_df.groupby(['ID']).last().reset_index()
test_labels_at_break_df.columns = ['RUL', 'NaN']
test_labels_at_break_df.drop(columns = ['NaN'], inplace = True)
train_labels_df[ms_used] = train_df[ms_used]
test_labels_at_break_df[ms_used] = test_at_break_df[ms_used]
train_labels_df['ID'] = train_df['ID']
test_labels_at_break_df['ID'] = test_at_break_df['ID']
train_df = train_labels_df.copy()
test_df = test_labels_at_break_df.copy()
train_df['RUL'] = train_labels_df['RUL'].clip(upper=125)

In [80]:
test_at_break_df['RUL'] = test_labels_at_break_df['RUL']
test_at_break_df.to_csv('./tf/fd003/scaled/test_partition.csv', sep = ',', index = False)

test_at_break_df = test_at_break_df.reindex(columns=['ID', 'Cycle', 'RUL'] + ms_used)
test_at_break_df.drop(columns = ['ID', 'Cycle'], inplace = True)
new_cols = ['y'] + ['x' + str(num) for num in range(len(ms_used))]
test_at_break_df.columns = new_cols

test_at_break_df.to_csv('./decision-trees/fd003/scaled/test_partition.csv', sep = ',', index_label = 'id')

In [81]:
train_gb = train_df.groupby(['ID'], sort = False)
test_gb = test_df.groupby(['ID'], sort = False)

In [82]:
# Args: List of pcts -> ex: [0.75, 0.2, 0.05]
def split_data(pcts, mode = 'train'):

    gb = train_gb if mode == 'train' else test_gb

    idx_remaining = [i for i in range(1, len(gb) + 1)]
    nums = []
    indices_concat = []
    for pct in pcts:
        nums.append(int(pct * len(gb)))
    n_workers = len(pcts)

    for i in range(n_workers):
        idx_worker = np.sort(np.random.choice(idx_remaining, nums[i], replace = False))
        indices_concat.append(idx_worker) 
        idx_remaining = np.setdiff1d(idx_remaining, idx_worker)

    for i in range(len(indices_concat)):
        id_list = indices_concat[i]
        df = gb.get_group(id_list[0])
        for j in range(1, len(id_list)):
            df = pd.concat([df, gb.get_group(id_list[j])])
        folder_name = str('')
        for pct in pcts:
            folder_name += str(int(pct * 100))
            folder_name += '-'
        folder_name = folder_name[:-1]
        # df.to_csv('./tf/fd003/scaled/' + str(n_workers) + ' workers/' + folder_name + '/' + mode + '_partition_' + str(i) + '.csv', sep=',', index = False)
        new_cols = ['y'] + ['x' + str(num) for num in range(len(ms_used))]
        df.columns = new_cols + ['ID']
        df[new_cols].to_csv('./decision-trees/fd003/scaled/' + str(n_workers) + ' workers/' + folder_name + '/' + mode + '_partition_' + str(i) + '.csv', sep=',', index_label='id')

pct_list_total = [
    [0.5, 0.5], 
    [0.6, 0.4], 
    [0.7, 0.3], 
    [0.8, 0.2], 
    [0.9, 0.1]
    ]
pct_list_total.extend([
    [0.4, 0.3, 0.3], 
    [0.4, 0.4, 0.2], 
    [0.5, 0.4, 0.1], 
    [0.6, 0.3, 0.1], 
    [0.7, 0.2, 0.1], 
    [0.8, 0.1, 0.1]
    ])
pct_list_total.extend([
    [0.3, 0.3, 0.2, 0.2], 
    [0.3, 0.3, 0.3, 0.1],  
    [0.4, 0.3, 0.2, 0.1], 
    [0.4, 0.4, 0.1, 0.1], 
    [0.5, 0.3, 0.1, 0.1],
    [0.6, 0.2, 0.1, 0.1],
    [0.7, 0.1, 0.1, 0.1]
    ])

pct_list_total.extend([
    [0.2, 0.2, 0.2, 0.2, 0.2], 
    [0.3, 0.2, 0.2, 0.2, 0.1], 
    [0.3, 0.3, 0.2, 0.1, 0.1], 
    [0.4, 0.3, 0.1, 0.1, 0.1], 
    [0.5, 0.2, 0.1, 0.1, 0.1], 
    [0.6, 0.1, 0.1, 0.1, 0.1]
    ])

for pct_list in pct_list_total:
    print(pct_list)
    split_data(pct_list, mode = 'train')
    split_data(pct_list, mode = 'test')

[0.5, 0.5]
[0.6, 0.4]
[0.7, 0.3]
[0.8, 0.2]
[0.9, 0.1]
[0.4, 0.3, 0.3]
[0.4, 0.4, 0.2]
[0.5, 0.4, 0.1]
[0.6, 0.3, 0.1]
[0.7, 0.2, 0.1]
[0.8, 0.1, 0.1]
[0.3, 0.3, 0.2, 0.2]
[0.3, 0.3, 0.3, 0.1]
[0.4, 0.3, 0.2, 0.1]
[0.4, 0.4, 0.1, 0.1]
[0.5, 0.3, 0.1, 0.1]
[0.6, 0.2, 0.1, 0.1]
[0.7, 0.1, 0.1, 0.1]
[0.2, 0.2, 0.2, 0.2, 0.2]
[0.3, 0.2, 0.2, 0.2, 0.1]
[0.3, 0.3, 0.2, 0.1, 0.1]
[0.4, 0.3, 0.1, 0.1, 0.1]
[0.5, 0.2, 0.1, 0.1, 0.1]
[0.6, 0.1, 0.1, 0.1, 0.1]
