In [6]:

import os
import numpy as np
from os import path
import pandas as pd
import random

os.chdir('/gneven/ETH/Sem2/Predictive_maintenance/Assignment/Assignment4/')


In [13]:
def sliding_window(data, N_tw = 30, stride = 1, f=1.0):
   
    # Set seed
    np.random.seed(229)
    random.seed(229)
    
    N_en = np.unique(data[:,0]).shape[0]                            # Number of engines (N_en)
    id_en = np.sort(random.sample([i+1 for i in range(N_en)], int(f*N_en)))
    mL = 0
    mU = 0
    for i in range(N_en):
        n_H   = data[data[:,0] == i+1,0].shape[0]
        N_sw  = int((n_H- N_tw) / stride + 1)                       # Number of sliding windows for engine 'i' 
        for h in range(N_sw):
            if i+1 in id_en:
                mL = mL + 1
            else:
                mU = mU + 1
                
    return mL, mU, N_en, id_en 


def load_dataset(N_tw, stride, sel, R_early, f=1.0):
    # Load training data
    basepath        = os.getcwd()
    train_set       = np.loadtxt(basepath + "/data/train_FD001.txt")# Training set
    train_set_x_org = train_set[:,sel]                              # Training set input space (x)    
    train_set_c     = train_set[:,np.array([1])]                    # Training set cycles (c)
    
    d_tr = pd.DataFrame(data = train_set_x_org)
    
    # Normalize the data
    ub = train_set_x_org.max(0)
    lb = train_set_x_org.min(0)    
    train_set_x = 2 * (train_set_x_org - lb) / (ub - lb) - 1   
   
    N_ft    = sel.shape[0]                                           # Nunber of features (N_ft)
    mL, mU, N_en, id_en = sliding_window(train_set, N_tw, stride, f)    # Number of training data & engines
    
    train_x = np.empty((mL, N_tw, N_ft), float)
    train_y = np.empty((mL), float)
    train_xU = np.empty((mU, N_tw, N_ft), float)
    train_yU = np.empty((mU), float)
    
    k = 0 
    l = 0
    for i in range(N_en):
        idx       = train_set[:,0] == i+1                            # Index for engine number 'i'
        if i+1 in id_en:
            train_i_x = train_set_x[idx,:]                               # Engine 'i' training  data
            train_i_c = train_set_c[idx]                                 # Engine 'i' cycles (c)
            train_i_y = train_i_c[-1] - train_i_c                        # RUL: Remaining Useful Lifetime for engine 'i'
            train_i_y[train_i_y > R_early] = R_early                     # R_early = 125
            N_sw      = int((train_i_x.shape[0] - N_tw) / stride + 1)    # Number of sliding windows for engine 'i' 
            for h in range(N_sw):
                k = k + 1
                vert_start = h * stride
                vert_end   = h * stride + N_tw
                train_i_x_slice = train_i_x[vert_start:vert_end,:]       # Training input data for engine 'i' on time window 'h'
                train_i_y_slice = train_i_y[vert_end-1,:]                # Training output data for engine 'i' on time window 'h'
                train_i_x_slice.shape = (N_tw, N_ft)                     # Reshape training set input (N_tw, N_ft, 1)
                train_i_y_slice.shape = (1)                              # Reshape training set output (1, 1)
                train_x[k-1,:] = train_i_x_slice
                train_y[k-1] = train_i_y_slice
        else:
            train_i_xU = train_set_x[idx,:]                               # Engine 'i' training  data
            train_i_cU = train_set_c[idx]                                 # Engine 'i' cycles (c)
            train_i_yU = train_i_cU[-1] - train_i_cU                        # RUL: Remaining Useful Lifetime for engine 'i'
            train_i_yU[train_i_yU > R_early] = R_early                     # R_early = 125
            N_sw      = int((train_i_xU.shape[0] - N_tw) / stride + 1)    # Number of sliding windows for engine 'i' 
            for h in range(N_sw):
                l = l + 1
                vert_start = h * stride
                vert_end   = h * stride + N_tw
                train_i_xU_slice = train_i_xU[vert_start:vert_end,:]       # Training input data for engine 'i' on time window 'h'
                train_i_yU_slice = train_i_yU[vert_end-1,:]                # Training output data for engine 'i' on time window 'h'
                train_i_xU_slice.shape = (N_tw, N_ft)                     # Reshape training set input (N_tw, N_ft, 1)
                train_i_yU_slice.shape = (1)                              # Reshape training set output (1, 1)
                train_xU[l-1,:] = train_i_xU_slice
                train_yU[l-1] = train_i_yU_slice
     
    # Load test data
    test_set       = np.loadtxt(basepath + "/data/test_FD001.txt")
    test_set_x_org = test_set[:,sel]                                 # Test set input space (x)
    test_y         = np.loadtxt(basepath + "/data/RUL_FD001.txt")    # Test set RUL (c)
    test_y.shape   = (test_y.shape[0], 1)
    test_y[test_y > R_early] = R_early                               # R_early = 125
    
    d_ts = pd.DataFrame(data = test_set_x_org)
    
    # Normalize the data
    test_set_x = 2 * (test_set_x_org - lb) / (ub - lb) - 1   
    
    m_ts, _ , N_en_ts ,_ = sliding_window(test_set, N_tw, stride, f=1.0)           # Number of training data & engines
    
    test_x = np.empty((N_en_ts, N_tw, N_ft), float)
    
    k = 0
    for ii in range(N_en_ts):
        engine         = test_set[:,0] == ii+1                       # Index for engine number 'i'
        test_i_x       = test_set_x[engine,:]                        # Engine 'i' test  data
        test_i_x_slice = test_i_x[-N_tw:]                            # Training input data for engine 'i' on time window 'h'
        test_i_x_slice.shape = (N_tw, N_ft)                          # Reshape training set input (N_tw, N_ft, 1)
        test_x[ii,:] = test_i_x_slice
    
    return train_x, train_y, train_xU, train_yU, test_x, test_y, d_tr, d_ts

def score_cal(y_hat, Y_test):
    d   = y_hat - Y_test
    tmp = np.zeros(d.shape[0])
    for i in range(d.shape[0]):
        if d[i,0] >= 0:
            tmp[i] = np.exp( d[i,0]/10) - 1
        else:
            tmp[i] = np.exp(-d[i,0]/13) - 1
    return tmp 

In [14]:
# In[2]: Set-up sumulation
print(os.getcwd())
N_tw     = 30     # Time Window (N_tw)
R_early  = 125    # Max RUL in training set
stride   = 1      # Stride for slide window
sel      = np.array([6, 7, 8, 11, 12, 13, 15, 16, 17, 18, 19, 21, 24, 25])  # Index of input features

X_train, Y_train, X_trainU, Y_trainU, X_test, Y_test, _, _ = load_dataset(N_tw, stride, sel, R_early)
print ("number of labeled training examples = " + str(X_train.shape[0]))
print ("number of unlabeled training examples = " + str(X_trainU.shape[0]))
print ("number of test examples = " + str(X_test.shape[0]))


/media/gneven/BAE632C5E6328225/drivesyc/drive tot/ETH/Sem2/Predictive_maintenance/Assignment/Assignment4
/media/gneven/BAE632C5E6328225/drivesyc/drive tot/ETH/Sem2/Predictive_maintenance/Assignment


OSError: /media/gneven/BAE632C5E6328225/drivesyc/drive tot/ETH/Sem2/Predictive_maintenance/Assignment/data/train_FD001.txt not found.