In [15]:
import numpy as np
import pandas as pd
import pickle
import os
import matplotlib.pyplot as plt

In [19]:
%matplotlib inline
np.random.seed(42)

In [49]:
def import_data(filepath):
    col_names = ['engine_id', 'cycle', 'setting1', 'setting2', 'setting3']
    col_names.extend(['sensor%d' % i for i in range(1, 22)])
    col_names.append('dummy')
    
    df = pd.read_csv(filepath, sep=' ', index_col=False, names=col_names, header=None)
    df.drop(labels='dummy', axis=1, inplace=True)

    return df

In [108]:
def adjust_RUL(df):
    
    def total_time(engine_id):
        return max(df[df['engine_id'] == engine_id]['cycle'])
    
    df['RUL'] = df.apply(lambda row: int(row['RUL'] + total_time(row['engine_id']) - row['cycle']), axis=1)
    return df


def add_warnings(df, w0, w1):
    df['w1'] = df.apply(lambda row: 1 if row['RUL'] <= w1 else 0, axis=1)
    df['w0'] = df.apply(lambda row: int(row['w1'] + 1 if row['RUL'] <= w0 else row['w1']), axis=1)
    return df


def label_df(df, w0=15, w1=30):
    df = adjust_RUL(df)
    df = add_warnings(df, w0, w1)
    return df

In [109]:
train = import_data('./data/PM_train.txt')
train['RUL'] = 0
train = label_df(train)
print('Train DF shape:', train.shape)

Train DF shape: (20631, 29)


In [90]:
val_test = import_data('./data/PM_test.txt')

RULs = pd.read_csv('./data/PM_truth.txt', names=['RUL'])
RULs['engine_id'] = pd.Series([i for i in range(1, 101)])

val_test = val_test.merge(RULs, on='engine_id')
val_test = label_df(val_test)

In [91]:
val_ids = np.random.choice([i for i in range(1, 101)], size=50, replace=False)

val = val_test[val_test['engine_id'].isin(val_ids)]
test = val_test[~val_test['engine_id'].isin(val_ids)]

print('Validation DF shape:', val.shape)
print('Test DF shape:', test.shape)

Validation DF shape: (6927, 29)
Test DF shape: (6169, 29)
