In [77]:
import os

from pymongo import MongoClient
from pymongo.errors import OperationFailure
import pandas as pd

import h5py
import time
import matplotlib
import numpy as np
import pandas as pd
import seaborn as sns
from pandas import DataFrame
import matplotlib.pyplot as plt
from matplotlib import gridspec

In [78]:
def check_mongo_connection(client_uri):
    connection = MongoClient(client_uri)

    try:
        connection.database_names
        print('\n')
        print(f'Data Base Connection to {client_uri} Established........')
        print('DBs available:')
        for db_name in connection.list_databases():
            print(' DB Name:', db_name['name'])
            db = connection[db_name['name']]
            collections =[]
            for collection_name in db.list_collection_names():
                collections.append(collection_name)
            print('   Colections:', collections)
          
        
    except OperationFailure as err:
        print(f"Data Base Connection failed. Error: {err}")

    return connection

client_uri = "mongodb://192.168.1.20:27017"
server = check_mongo_connection(client_uri)




Data Base Connection to mongodb://192.168.1.20:27017 Established........
DBs available:
 DB Name: admin
   Colections: ['system.version']
 DB Name: config
   Colections: ['system.sessions']
 DB Name: local
   Colections: ['startup_log']


## Source Dataset 1
6. Turbofan Engine Degradation Simulation
Engine degradation simulation was carried out using the Commercial Modular Aero-Propulsion System Simulation (C-MAPSS). Four different sets were simulated under different combinations of operational conditions and fault modes. This records several sensor channels to characterize fault evolution. The data set was provided by the NASA Ames Prognostics Center of Excellence (PCoE).

https://data.nasa.gov/Aerospace/CMAPSS-Jet-Engine-Simulated-Data/ff5v-kuh6
#### Read and Store Raw Data to DB

In [79]:
# Raw files descriptions from website notes 
meta_data = [
    {
        'Data Set': 'FD001',
        'Train trajectories': 100,
        'Test trajectories': 100,
        'Conditions': 'ONE (Sea Level)',
        'Fault Modes': 'ONE (HPC Degradation)'
    },
    {
        'Data Set': 'FD002',
        'Train trajectories': 260,
        'Test trajectories': 259,
        'Conditions': 'SIX',
        'Fault Modes': 'ONE (HPC Degradation)'
    },
    {
        'Data Set': 'FD003',
        'Train trajectories': 100,
        'Test trajectories': 100,
        'Conditions': 'ONE (Sea Level)',
        'Fault Modes': 'TWO (HPC Degradation, Fan Degradation)'
    },
    {
        'Data Set': 'FD004',
        'Train trajectories': 248,
        'Test trajectories': 249,
        'Conditions': 'SIX',
        'Fault Modes': 'TWO (HPC Degradation, Fan Degradation)'
    },

]

column_names = [
    'unit #', 'time (cycles)', 'op. setting 1', 'op. setting 2', 'op. setting 3',
    'sensor 01', 'sensor 02', 'sensor 03', 'sensor 04', 'sensor 05',
    'sensor 06', 'sensor 07', 'sensor 08', 'sensor 09', 'sensor 10',
    'sensor 11', 'sensor 12', 'sensor 13', 'sensor 14', 'sensor 15',
    'sensor 16', 'sensor 17', 'sensor 18', 'sensor 19', 'sensor 20', 'sensor 21',
]

data_sets = pd.DataFrame.from_records(meta_data)

data_sets.set_index('Data Set', inplace = True)
# data_sets

In [81]:
# Read downloaded data and create DB tables (MongoDB Collections)
def data_records(data_sets):

    file_path = '/Volumes/share/Datasets/6_TurbofanEngineDegradationSimulationDataSet/CMAPSSData/'
    file_prefixes = ['train', 'test']
    column_groups = {}
    database = server[db_set1]
    for data_set, row in data_sets.iterrows():
        columns = {}
        for prefix in file_prefixes:
            full_path = ''.join([file_path, prefix, '_', data_set,'.txt'])
            df = pd.read_csv(full_path, sep=" ")
            df.dropna(axis=1, how='all', inplace=True)
            df.columns = column_names

            # Store dataset in the DB

            colection = database[''.join([prefix, '_', data_set])]
            new_data = colection.insert_many(df.to_dict('records'))
        column_groups[data_set] = columns

    return column_groups


db_set1 = 'TurboFanDegradation_Set1'

file_prefix = ['train', 'test']
record_columns = data_records(data_sets)
new_columns = pd.DataFrame.from_dict(record_columns,orient='index')

data_sets = pd.concat([data_sets, new_columns], axis=1)
data_sets



Unnamed: 0_level_0,Train trajectories,Test trajectories,Conditions,Fault Modes
Data Set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
FD001,100,100,ONE (Sea Level),ONE (HPC Degradation)
FD002,260,259,SIX,ONE (HPC Degradation)
FD003,100,100,ONE (Sea Level),"TWO (HPC Degradation, Fan Degradation)"
FD004,248,249,SIX,"TWO (HPC Degradation, Fan Degradation)"


In [82]:
# List the new created tables (Mongo DB collections)
# list of strings
collections = server[db_set1].list_collection_names()
collections = sorted(collections)
documents = []
for collection in collections:
    documents.append(server[db_set1][collection].estimated_document_count())
   
# Calling DataFrame constructor after zipping
# both lists, with columns specified
mongo_collections_df = pd.DataFrame(list(zip(collections, documents)),
               columns =['Collection', 'Documents (Tot. Records)'])
mongo_collections_df



Unnamed: 0,Collection,Documents (Tot. Records)
0,test_FD001,26190
1,test_FD002,67980
2,test_FD003,16595
3,test_FD004,41213
4,train_FD001,41260
5,train_FD002,107516
6,train_FD003,24719
7,train_FD004,61248


## Source Dataset 2
17. Turbofan Engine Degradation Simulation-2
The generation of data-driven prognostics models requires the availability of data sets with run-to-failure trajectories. To contribute to the development of these methods, the data set provides a new realistic data set of run-to-failure trajectories for a small fleet of aircraft engines under realistic flight conditions. The damage propagation modelling used for the generation of this synthetic data set builds on the modeling strategy from previous work. The data set was generated with the Commercial Modular Aero-Propulsion System Simulation (C-MAPSS) dynamical model. The data set has been provided by the NASA Prognostics Center of Excellence (PCoE) in collaboration with ETH Zurich and PARC.

https://phm-datasets.s3.amazonaws.com/NASA/17.+Turbofan+Engine+Degradation+Simulation+Data+Set+2.zip
#### Read and Store Raw Data to DB

In [None]:
# Data files are very large and scored in H5 format
# The following function opends de files and 
def read_h5_file(filename):
    # Time tracking, Operation time (min):  0.003
    t = time.process_time()  

    # Load data
    with h5py.File(filename, 'r') as hdf:
            # Development set
            W_dev = np.array(hdf.get('W_dev'))             # W
            X_s_dev = np.array(hdf.get('X_s_dev'))         # X_s
            X_v_dev = np.array(hdf.get('X_v_dev'))         # X_v
            T_dev = np.array(hdf.get('T_dev'))             # T
            Y_dev = np.array(hdf.get('Y_dev'))             # RUL  
            A_dev = np.array(hdf.get('A_dev'))             # Auxiliary

            # Test set
            W_test = np.array(hdf.get('W_test'))           # W
            X_s_test = np.array(hdf.get('X_s_test'))       # X_s
            X_v_test = np.array(hdf.get('X_v_test'))       # X_v
            T_test = np.array(hdf.get('T_test'))           # T
            Y_test = np.array(hdf.get('Y_test'))           # RUL  
            A_test = np.array(hdf.get('A_test'))           # Auxiliary
            
            # Varnams
            W_var = np.array(hdf.get('W_var'))
            X_s_var = np.array(hdf.get('X_s_var'))  
            X_v_var = np.array(hdf.get('X_v_var')) 
            T_var = np.array(hdf.get('T_var'))
            A_var = np.array(hdf.get('A_var'))
            
            # from np.array to list dtype U4/U5
            W_var = list(np.array(W_var, dtype='U20'))
            X_s_var = list(np.array(X_s_var, dtype='U20'))  
            X_v_var = list(np.array(X_v_var, dtype='U20')) 
            T_var = list(np.array(T_var, dtype='U20'))
            A_var = list(np.array(A_var, dtype='U20'))
                            
    W = np.concatenate((W_dev, W_test), axis=0)  
    X_s = np.concatenate((X_s_dev, X_s_test), axis=0)
    X_v = np.concatenate((X_v_dev, X_v_test), axis=0)
    T = np.concatenate((T_dev, T_test), axis=0)
    Y = np.concatenate((Y_dev, Y_test), axis=0) 
    A = np.concatenate((A_dev, A_test), axis=0) 
        
    print('')
    print("Operation time (min): " , (time.process_time()-t)/60)
    print('')
    print ("W shape: " + str(W.shape))
    print ("X_s shape: " + str(X_s.shape))
    print ("X_v shape: " + str(X_v.shape))
    print ("T shape: " + str(T.shape))
    print ("A shape: " + str(A.shape))

    return A, A_var