In [1]:
import pycaret
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt

In [2]:
output_variables = ['T2','T24','T30','T50','P2','P15','P30','Nf','Nc','epr','Ps30','phi','NRf','NRc','BPR','farB','htBleed','Nf_dmd','PCNfR_dmd','W31','W32']

column_names = ['id','cycle']
features = ['o'+str(n) for n in range(1,4)]
features.extend(output_variables)
column_names.extend(features)
train_files = [f'train_FD00{i}.txt' for i in range(1,5)]

In [3]:
train_dfs = []
for train_file in train_files:    
    train_dfs.append(pd.read_csv(f'data/{train_file}',header=None,sep='\s+',names=column_names))

In [5]:
## Split between train and validation ##
split = 0.7
train_df = train_dfs[2]
units = set(train_df['id'].unique())
train_units = random.sample(units,k=int(split*len(units)))
val_units = units-set(train_units)
train_set = train_df[train_df['id'].isin(train_units)].copy().reset_index(drop=True)
val_set = train_df[train_df['id'].isin(val_units)].copy().reset_index(drop=True)

In [6]:
## RUL Calculation ##
max_cycles = train_set.groupby('id').max()['cycle']
rul = train_set.set_index('id')[['cycle']].apply(lambda x: max_cycles[x.name]-x,axis=1)
train_set['RUL'] = rul.values
train_set.drop(columns=['id','cycle'],inplace=True)

# Data Pre-Processing

In [7]:
from sklearn.preprocessing import StandardScaler

In [8]:
# Scaling
x_train = train_set.loc[:,train_set.var() > 10**-10].drop(columns=['RUL'])  #drop columns with zero variance
y_train = train_set['RUL']
scaler = StandardScaler().fit(x_train)
x_train = pd.DataFrame(scaler.transform(x_train),columns=x_train.columns)

In [9]:
## Saving mean and variance
scaler_mean = scaler.mean_
scaler_var = scaler.var_

# Sklearn Pipeline