In [1]:
from jupyter_dash import JupyterDash

In [2]:
import dash
from dash import dcc
from dash import html

In [3]:
import vaex
import pandas as pd

In [4]:
pd.set_option('display.max_columns', 30)
pd.set_option('display.max_rows', 50)

# data viz libraries
import matplotlib.pyplot as plt
import seaborn as sns

# configure seaborn display settings
sns.set_context("notebook", rc={"font.size": 8, "axes.titlesize": 8, "axes.labelsize": 13})

# library containing utility functions
import utils
# library containing data exploration functions
import exploration
# library containing data processing functions
import processing
# library containing data visualization functions
import visualization

## Read data and create-train and test datasets

### Train dataset

In [5]:
## Load data and display metadata

dataschema = ['Engine_no', 'Cycle', 'Altitude', 'Mach', 'TRA', 'T2', 'T24', 'T30', 'T50', 'P2', 'P15', 'P30', 'Nf', 'Nc',
              'epr', 'Ps30', 'phi', 'NRf', 'NRc', 'BPR', 'farB', 'htBleed', 'Nf_dmd', 'PCNfR_dmd', 'W31', 'W32']
dataframes = utils.load_data('RUL', ['RUL'], dataschema)
#adding rul
dataframes = processing.extract_rul(dataframes)

# list of indicators
indicators = dataschema[2:]
# dataset to analyze
dataset_no = 3
df_train = dataframes[f'train_FD00{dataset_no}']

  0%|          | 0/3 [00:00<?, ?it/s]


------------------------------
---------train_FD003----------
------------------------------
Number of rows : 24720 
Number of columns : 26 
------------------------------
Engine_no      int64
Cycle          int64
Altitude     float64
Mach         float64
TRA          float64
T2           float64
T24          float64
T30          float64
T50          float64
P2           float64
P15          float64
P30          float64
Nf           float64
Nc           float64
epr          float64
Ps30         float64
phi          float64
NRf          float64
NRc          float64
BPR          float64
farB         float64
htBleed        int64
Nf_dmd         int64
PCNfR_dmd    float64
W31          float64
W32          float64

------------------------------
----------RUL_FD003-----------
------------------------------
Number of rows : 100 
Number of columns : 1 
------------------------------
RUL    int64

------------------------------
----------test_FD003----------
------------------------------
Numb

In [6]:
df_train.head()

Unnamed: 0,Engine_no,Cycle,Altitude,Mach,TRA,T2,T24,T30,T50,P2,P15,P30,Nf,Nc,epr,Ps30,phi,NRf,NRc,BPR,farB,htBleed,Nf_dmd,PCNfR_dmd,W31,W32,RUL
0,1,1,-0.0005,0.0004,100.0,518.67,642.36,1583.23,1396.84,14.62,21.61,553.97,2387.96,9062.17,1.3,47.3,522.31,2388.01,8145.32,8.4246,0.03,391,2388,100.0,39.11,23.3537,258
1,1,2,0.0008,-0.0003,100.0,518.67,642.5,1584.69,1396.89,14.62,21.61,554.55,2388.0,9061.78,1.3,47.23,522.42,2388.03,8152.85,8.4403,0.03,392,2388,100.0,38.99,23.4491,257
2,1,3,-0.0014,-0.0002,100.0,518.67,642.18,1582.35,1405.61,14.62,21.61,554.43,2388.03,9070.23,1.3,47.22,522.03,2388.0,8150.17,8.3901,0.03,391,2388,100.0,38.85,23.3669,256
3,1,4,-0.002,0.0001,100.0,518.67,642.92,1585.61,1392.27,14.62,21.61,555.21,2388.0,9064.57,1.3,47.24,522.49,2388.08,8146.56,8.3878,0.03,392,2388,100.0,38.96,23.2951,255
4,1,5,0.0016,0.0,100.0,518.67,641.68,1588.63,1397.65,14.62,21.61,554.74,2388.04,9076.14,1.3,47.15,522.58,2388.03,8147.8,8.3869,0.03,392,2388,100.0,39.14,23.4583,254


### Test dataset

In [7]:
df_test = dataframes[f'test_FD00{dataset_no}']

In [8]:
df_test.head()

Unnamed: 0,Engine_no,Cycle,Altitude,Mach,TRA,T2,T24,T30,T50,P2,P15,P30,Nf,Nc,epr,Ps30,phi,NRf,NRc,BPR,farB,htBleed,Nf_dmd,PCNfR_dmd,W31,W32,RUL
0,1,1,-0.0017,-0.0004,100.0,518.67,641.94,1581.93,1396.93,14.62,21.58,554.56,2387.93,9048.65,1.3,47.09,521.89,2387.94,8133.48,8.376,0.03,391,2388,100.0,39.07,23.4468,276
1,1,2,0.0006,-0.0002,100.0,518.67,642.02,1584.86,1398.9,14.62,21.58,554.1,2387.94,9046.53,1.3,47.08,521.85,2388.01,8137.44,8.4062,0.03,391,2388,100.0,39.04,23.4807,275
2,1,3,0.0014,-0.0003,100.0,518.67,641.68,1581.78,1391.92,14.62,21.58,554.41,2387.97,9054.92,1.3,47.15,522.1,2387.94,8138.25,8.3553,0.03,391,2388,100.0,39.1,23.4244,274
3,1,4,0.0027,0.0001,100.0,518.67,642.2,1584.53,1395.34,14.62,21.59,554.58,2387.94,9055.04,1.3,47.26,522.45,2387.96,8137.07,8.3709,0.03,392,2388,100.0,38.97,23.4782,273
4,1,5,-0.0001,0.0001,100.0,518.67,642.46,1589.03,1395.86,14.62,21.58,554.16,2388.01,9048.59,1.3,46.94,521.91,2387.97,8134.2,8.4146,0.03,391,2388,100.0,39.09,23.395,272


## Export datasets to HDF5

In [9]:
df_vaex_train = vaex.from_pandas(df_train)
df_vaex_test= vaex.from_pandas(df_test)

In [13]:
df_vaex_train.export_hdf5('data/vaex/data_train.hdf5')

In [14]:
df_vaex_test.export_hdf5('data/vaex/data_test.hdf5')