In [1]:
import os
import h5py
import time
import matplotlib
import numpy as np
import pandas as pd
import seaborn as sns
from pandas import DataFrame
import matplotlib.pyplot as plt
from matplotlib import gridspec
%matplotlib inline

In [10]:
!wget "https://ti.arc.nasa.gov/m/project/prognostic-repository/data_set.zip"

--2021-03-23 06:46:26--  https://ti.arc.nasa.gov/m/project/prognostic-repository/data_set.zip
Resolving ti.arc.nasa.gov (ti.arc.nasa.gov)... 128.102.105.66, 2001:4d0:6311:2227:14b6:372b:2078:2a94
Connecting to ti.arc.nasa.gov (ti.arc.nasa.gov)|128.102.105.66|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 15840929269 (15G) [application/zip]
Saving to: ‘data_set.zip’


2021-03-23 06:52:54 (39.0 MB/s) - ‘data_set.zip’ saved [15840929269/15840929269]



In [12]:
!unzip data_set.zip

Archive:  data_set.zip
   creating: data_set/
  inflating: data_set/N-CMAPSS_DS01-005.h5  
  inflating: data_set/N-CMAPSS_DS02-006.h5  
  inflating: data_set/N-CMAPSS_DS03-012.h5  
  inflating: data_set/N-CMAPSS_DS04.h5  
  inflating: data_set/N-CMAPSS_DS05.h5  
  inflating: data_set/N-CMAPSS_DS06.h5  
  inflating: data_set/N-CMAPSS_DS07.h5  
  inflating: data_set/N-CMAPSS_DS08a-009.h5  
  inflating: data_set/N-CMAPSS_DS08c-008.h5  
  inflating: data_set/N-CMAPSS_DS08d-010.h5  
  inflating: data_set/N-CMAPSS_Example_data_loading_and_exploration.ipynb  
  inflating: data_set/Run_to_Failure_Simulation_Under_Real_Flight_Conditions_Dataset.pdf  


In [11]:
!ls

data_set.zip  sample_data


In [17]:
### Set-up - Define file location
filename = 'data_set/N-CMAPSS_DS02-006.h5'

# Time tracking, Operation time (min):  0.003
t = time.process_time()  

# Load data
with h5py.File(filename, 'r') as hdf:
        # Development set
        W_dev = np.array(hdf.get('W_dev'))             # W
        X_s_dev = np.array(hdf.get('X_s_dev'))         # X_s
        X_v_dev = np.array(hdf.get('X_v_dev'))         # X_v
        T_dev = np.array(hdf.get('T_dev'))             # T
        Y_dev = np.array(hdf.get('Y_dev'))             # RUL  
        A_dev = np.array(hdf.get('A_dev'))             # Auxiliary

        # Test set
        W_test = np.array(hdf.get('W_test'))           # W
        X_s_test = np.array(hdf.get('X_s_test'))       # X_s
        X_v_test = np.array(hdf.get('X_v_test'))       # X_v
        T_test = np.array(hdf.get('T_test'))           # T
        Y_test = np.array(hdf.get('Y_test'))           # RUL  
        A_test = np.array(hdf.get('A_test'))           # Auxiliary
        
        # Varnams
        W_var = np.array(hdf.get('W_var'))
        X_s_var = np.array(hdf.get('X_s_var'))  
        X_v_var = np.array(hdf.get('X_v_var')) 
        T_var = np.array(hdf.get('T_var'))
        A_var = np.array(hdf.get('A_var'))
        
        # from np.array to list dtype U4/U5
        W_var = list(np.array(W_var, dtype='U20'))
        X_s_var = list(np.array(X_s_var, dtype='U20'))  
        X_v_var = list(np.array(X_v_var, dtype='U20')) 
        T_var = list(np.array(T_var, dtype='U20'))
        A_var = list(np.array(A_var, dtype='U20'))
                          
W = np.concatenate((W_dev, W_test), axis=0)  
X_s = np.concatenate((X_s_dev, X_s_test), axis=0)
X_v = np.concatenate((X_v_dev, X_v_test), axis=0)
T = np.concatenate((T_dev, T_test), axis=0)
Y = np.concatenate((Y_dev, Y_test), axis=0) 
A = np.concatenate((A_dev, A_test), axis=0) 
    
print('')
print("Operation time (min): " , (time.process_time()-t)/60)
print('')
print ("W shape: " + str(W.shape))
print ("X_s shape: " + str(X_s.shape))
print ("X_v shape: " + str(X_v.shape))
print ("T shape: " + str(T.shape))
print ("A shape: " + str(A.shape))


Operation time (min):  0.11340095329999998

W shape: (6517190, 4)
X_s shape: (6517190, 14)
X_v shape: (6517190, 14)
T shape: (6517190, 10)
A shape: (6517190, 4)


In [18]:
df_A = DataFrame(data=A, columns=A_var)
df_A.describe()

Unnamed: 0,unit,cycle,Fc,hs
count,6517190.0,6517190.0,6517190.0,6517190.0
mean,11.66711,36.92344,2.885376,0.2424109
std,6.039334,21.87185,0.3867783,0.4285416
min,2.0,1.0,1.0,0.0
25%,5.0,18.0,3.0,0.0
50%,11.0,36.0,3.0,0.0
75%,18.0,54.0,3.0,0.0
max,20.0,89.0,3.0,1.0


In [20]:
df_A.head(10)

Unnamed: 0,unit,cycle,Fc,hs
0,2.0,1.0,3.0,1.0
1,2.0,1.0,3.0,1.0
2,2.0,1.0,3.0,1.0
3,2.0,1.0,3.0,1.0
4,2.0,1.0,3.0,1.0
5,2.0,1.0,3.0,1.0
6,2.0,1.0,3.0,1.0
7,2.0,1.0,3.0,1.0
8,2.0,1.0,3.0,1.0
9,2.0,1.0,3.0,1.0
