## Extract FL Data

In [103]:
import numpy as np
import pandas as pd

In [104]:
train_df = pd.read_csv('../../data_analysis/fd003/fd003-raw_train.csv', sep=' ')
test_df = pd.read_csv('../../data_analysis/fd003/fd003-raw_test.csv', sep=' ')
train_labels_df = pd.read_csv('../../data_analysis/fd003/fd003-training_labels.csv', sep=' ')
test_labels_df = pd.read_csv('../../data_analysis/fd003/fd003-testing_labels.csv', sep=' ')

In [105]:
columns = train_df.columns
ms_used = ['RUL']
for i in range(1, 22):
    if i not in [1, 5, 6, 9, 10, 14, 16, 18, 19]:
        ms_used.append('SensorMeasure' + str(i))

In [106]:
train_df['RUL'] = train_labels_df['RUL'].clip(upper=125)
test_df['RUL'] = test_labels_df['RUL']

In [107]:
train_gb = train_df.groupby(['ID'], sort = False)
test_gb = test_df.groupby(['ID'], sort = False)

In [108]:
# Args: List of pcts -> ex: [0.75, 0.2, 0.05]
def split_data(pcts, mode = 'train'):

    gb = train_gb if mode == 'train' else test_gb

    idx_remaining = [i for i in range(1, len(gb) + 1)]
    nums = []
    indices_concat = []
    for pct in pcts:
        nums.append(int(pct * len(gb)))
    n_workers = len(pcts)

    for i in range(n_workers):
        idx_worker = np.sort(np.random.choice(idx_remaining, nums[i], replace = False))
        indices_concat.append(idx_worker) 
        idx_remaining = np.setdiff1d(idx_remaining, idx_worker)

    for i in range(len(indices_concat)):
        id_list = indices_concat[i]
        df = gb.get_group(id_list[0])
        for j in range(1, len(id_list)):
            df = pd.concat([df, gb.get_group(id_list[j])])
        folder_name = str('')
        for pct in pcts:
            folder_name += str(int(pct * 100))
            folder_name += '-'
        folder_name = folder_name[:-1]
        df.to_csv('./data/fd003/raw/' + str(n_workers) + ' workers/' + folder_name + '/' + mode + '_partition_' + str(i) + '.csv', sep=',', index = False)

pct_list_total = [
    [0.5, 0.5], 
    [0.6, 0.4], 
    [0.7, 0.3], 
    [0.8, 0.2], 
    [0.9, 0.1]
    ]
pct_list_total.extend([
    [0.4, 0.3, 0.3], 
    [0.4, 0.4, 0.2], 
    [0.5, 0.4, 0.1], 
    [0.6, 0.3, 0.1], 
    [0.7, 0.2, 0.1], 
    [0.8, 0.1, 0.1]
    ])
pct_list_total.extend([
    [0.3, 0.3, 0.2, 0.2], 
    [0.3, 0.3, 0.3, 0.1],  
    [0.4, 0.3, 0.2, 0.1], 
    [0.4, 0.4, 0.1, 0.1], 
    [0.5, 0.3, 0.1, 0.1],
    [0.6, 0.2, 0.1, 0.1],
    [0.7, 0.1, 0.1, 0.1]
    ])

pct_list_total.extend([
    [0.2, 0.2, 0.2, 0.2, 0.2], 
    [0.3, 0.2, 0.2, 0.2, 0.1], 
    [0.3, 0.3, 0.2, 0.1, 0.1], 
    [0.4, 0.3, 0.1, 0.1, 0.1], 
    [0.5, 0.2, 0.1, 0.1, 0.1], 
    [0.6, 0.1, 0.1, 0.1, 0.1]
    ])

for pct_list in pct_list_total:
    print(pct_list)
    split_data(pct_list, mode = 'train')
    split_data(pct_list, mode = 'test')

[0.5, 0.5]
[0.6, 0.4]
[0.7, 0.3]
[0.8, 0.2]
[0.9, 0.1]
[0.4, 0.3, 0.3]
[0.4, 0.4, 0.2]
[0.5, 0.4, 0.1]
[0.6, 0.3, 0.1]
[0.7, 0.2, 0.1]
[0.8, 0.1, 0.1]
[0.3, 0.3, 0.2, 0.2]
[0.3, 0.3, 0.3, 0.1]
[0.4, 0.3, 0.2, 0.1]
[0.4, 0.4, 0.1, 0.1]
[0.5, 0.3, 0.1, 0.1]
[0.6, 0.2, 0.1, 0.1]
[0.7, 0.1, 0.1, 0.1]
[0.2, 0.2, 0.2, 0.2, 0.2]
[0.3, 0.2, 0.2, 0.2, 0.1]
[0.3, 0.3, 0.2, 0.1, 0.1]
[0.4, 0.3, 0.1, 0.1, 0.1]
[0.5, 0.2, 0.1, 0.1, 0.1]
[0.6, 0.1, 0.1, 0.1, 0.1]


### Test data - convert format

In [109]:
train_df

Unnamed: 0,ID,Cycle,SensorMeasure2,SensorMeasure3,SensorMeasure4,SensorMeasure6,SensorMeasure7,SensorMeasure8,SensorMeasure9,SensorMeasure10,SensorMeasure11,SensorMeasure12,SensorMeasure13,SensorMeasure14,SensorMeasure15,SensorMeasure17,SensorMeasure20,SensorMeasure21,RUL
0,1,1,642.36,1583.23,1396.84,21.61,553.97,2387.96,9062.17,1.3,47.30,522.31,2388.01,8145.32,8.4246,391,39.11,23.3537,125
1,1,2,642.50,1584.69,1396.89,21.61,554.55,2388.00,9061.78,1.3,47.23,522.42,2388.03,8152.85,8.4403,392,38.99,23.4491,125
2,1,3,642.18,1582.35,1405.61,21.61,554.43,2388.03,9070.23,1.3,47.22,522.03,2388.00,8150.17,8.3901,391,38.85,23.3669,125
3,1,4,642.92,1585.61,1392.27,21.61,555.21,2388.00,9064.57,1.3,47.24,522.49,2388.08,8146.56,8.3878,392,38.96,23.2951,125
4,1,5,641.68,1588.63,1397.65,21.61,554.74,2388.04,9076.14,1.3,47.15,522.58,2388.03,8147.80,8.3869,392,39.14,23.4583,125
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24715,100,148,643.78,1596.01,1424.11,21.61,551.86,2388.25,9070.82,1.3,48.27,519.66,2388.30,8138.08,8.5036,394,38.44,22.9631,4
24716,100,149,643.29,1596.38,1429.14,21.61,551.86,2388.23,9064.60,1.3,48.13,519.91,2388.28,8144.36,8.5174,395,38.50,22.9746,3
24717,100,150,643.84,1604.53,1431.41,21.61,551.30,2388.25,9063.45,1.3,48.18,519.44,2388.24,8135.95,8.5223,396,38.39,23.0682,2
24718,100,151,643.94,1597.56,1426.57,21.61,550.69,2388.26,9062.22,1.3,48.05,520.01,2388.26,8141.24,8.5148,395,38.31,23.0753,1
