In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
cd /content/drive/My\ Drive/Colab\ Notebooks/summer_coop/ai4i2020

/content/drive/My Drive/Colab Notebooks/summer_coop/ai4i2020


In [None]:
import pandas as pd
ai4i_df = pd.read_csv('./ai4i2020.csv')
print(ai4i_df.columns)
# make dict of column name
ai4i_col = {}
for i in range(len(ai4i_df.columns)):
    ai4i_col[ai4i_df.columns[i]] = i 
print(ai4i_col)

Index(['UDI', 'Product ID', 'Type', 'Air temperature [K]',
       'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]',
       'Tool wear [min]', 'Machine failure', 'TWF', 'HDF', 'PWF', 'OSF',
       'RNF'],
      dtype='object')
{'UDI': 0, 'Product ID': 1, 'Type': 2, 'Air temperature [K]': 3, 'Process temperature [K]': 4, 'Rotational speed [rpm]': 5, 'Torque [Nm]': 6, 'Tool wear [min]': 7, 'Machine failure': 8, 'TWF': 9, 'HDF': 10, 'PWF': 11, 'OSF': 12, 'RNF': 13}


## Binary Classification
### Extract features and class
- features: Air temperature, Process temeprature, Rotatoinial speed, Torque, Tool wear
- class: machine failure

In [None]:
ai4i_arr = ai4i_df.to_numpy()
input_arr = ai4i_arr[:, ai4i_col['Air temperature [K]']:ai4i_col['Machine failure']]
target_arr = ai4i_arr[:, ai4i_col['Machine failure']]

### Normalization the data

In [None]:
import numpy as np

mean = np.mean(input_arr, axis = 0)
std = np.std(np.array(input_arr, dtype = np.float32), axis = 0)

input_arr = (input_arr - mean) / std

### Split train data and test data
- feature: (10000, 5) -> (8000, 5), (2000, 5)
- class: (10000,) -> (8000,), (2000,)

In [None]:
from sklearn.model_selection import train_test_split
input_arr = np.array(input_arr, dtype = np.float32)
target_arr = np.array(target_arr, dtype = np.int)
input_arr = input_arr.reshape(-1, 5, 1)

train_input, test_input, train_target, test_target = train_test_split(input_arr, target_arr, stratify = target_arr, test_size = 0.2)

In [None]:
print(train_input.shape, test_input.shape, train_target.shape, test_target.shape)

(8000, 5, 1) (2000, 5, 1) (8000,) (2000,)


In [None]:
from tensorflow import keras
cnn = keras.models.load_model('./best-cnn-model.h5')
rnn = keras.models.load_model('./best_rnn_model.h5')
lstm = keras.models.load_model('./best_lstm_model.h5')
gru = keras.models.load_model('./best_gru_model.h5')


### Test model

In [None]:
cnn_score = cnn.evaluate(test_input, test_target)



In [None]:
rnn_score = rnn.evaluate(test_input, test_target)



In [None]:
lstm_score = lstm.evaluate(test_input, test_target)



In [None]:
gru_score = gru.evaluate(test_input, test_target)



In [None]:
cnn_predict = cnn.predict(test_input).flatten()
rnn_predict = rnn.predict(test_input).flatten()
lstm_predict = lstm.predict(test_input).flatten()
gru_predict = gru.predict(test_input).flatten()

In [None]:
cnn_predict.shape

(2000,)

In [None]:
outputs = np.array([cnn_predict, rnn_predict, lstm_predict, gru_predict])
print(outputs.shape)

(4, 2000)


### Voted Ensemble

In [None]:
class SoftEnsemble:
    def __init__(self):
        super().__init__()
    
    def soft_ensemble(self, outputs):
        average = []
        for i in range(outputs.shape[1]):
            sum = 0
            for j in range(outputs.shape[0]):
                sum += outputs[j][i]
            average.append(sum / 4)
        average = np.array(average)
        return average
    
    def decision(self, average):
        decision = []
        for i in range(len(average)):
            if(average[i] >= 0.5):
                decision.append(1)
            else:
                decision.append(0)
        
        return decision

    def scoring(self, prediction, answer):
        score = 0
        for i in range(len(prediction)):
            if(prediction[i] == answer[i]):
                
                score += 1
        return score / len(prediction)

In [None]:
se = SoftEnsemble()
average = se.soft_ensemble(outputs)
decision = se.decision(average)
ensemble_score = se.scoring(decision, test_target)
print(ensemble_score)

0.9705


In [None]:
infile = open('ai4i_result.tsv', 'a')
infile.write('binary_test:\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\n'.format(cnn_score[1], rnn_score[1], lstm_score[1], gru_score[1], ensemble_score))

43

## Multiclass Classification

### Extract features and class
- features: Air temperature, Process temeprature, Rotatoinial speed, Torque, Tool wear
- class: TWF, HDF, PWF, OSF, RNF(detailed machine failure )

In [None]:
target_arr = ai4i_arr[:, ai4i_col['TWF']:]
print(input_arr.shape, target_arr.shape)

(10000, 5, 1) (10000, 5)


### Apply Priority Encoder to targets
- tensorflow's target should be 1 dim [ref](https://stackoverflow.com/questions/49083984/valueerror-can-not-squeeze-dim1-expected-a-dimension-of-1-got-3-for-sparse)
    - change one hot encoded target to int
    - ex. [0 0 1 0] -> 3
    - work as priority encoder
    - ex. [0 0 1 1] -> 3 (not 3.5 or 4)
    - change target shape (10000, 5) -> (10000)
    - 0: no failure
    - 1: TWF 
    - 2: HDF
    - 3: PWF
    - 4: OSF
    - 5: RNF
    - total 6 classes to recognize

In [None]:
target_int = []
one_hot = 0
for x in target_arr:
    for i in range(len(target_arr[0])):
        # failure situation
        if(x[i] == 1):
            target_int.append(i + 1)
            one_hot = 1
            break # priority encoder
    # normal situation
    if(one_hot == 0):
        target_int.append(0)
    one_hot = 0
        
target_int = np.array(target_int, dtype = np.int)

### Split train data and test data

In [None]:
train_input, test_input, train_target, test_target = train_test_split(input_arr, target_int, test_size = 0.2)
print(train_input.shape, test_input.shape, train_target.shape, test_target.shape)

(8000, 5, 1) (2000, 5, 1) (8000,) (2000,)


In [None]:
train_input = np.array(train_input, dtype = np.float32)
train_target = np.array(train_target, dtype = np.int)
test_input = np.array(test_input, dtype = np.float32)
test_target = np.array(test_target, dtype = np.int)

### Test model

In [None]:
cnn_m = keras.models.load_model('./best_cnn_m_model.h5')
rnn_m = keras.models.load_model('./best_rnn_m_model.h5')
lstm_m = keras.models.load_model('./best_lstm_m_model.h5')
gru_m = keras.models.load_model('./best_gru_m_model.h5')

In [None]:
cnn_m_score = cnn_m.evaluate(test_input, test_target)



In [None]:
rnn_m_score = rnn_m.evaluate(test_input, test_target)



In [None]:
lstm_m_score = lstm_m.evaluate(test_input, test_target)



In [None]:
gru_m_score = gru_m.evaluate(test_input, test_target)



In [None]:
cnn_m_predict = cnn_m.predict(test_input)
rnn_m_predict = rnn_m.predict(test_input)
lstm_m_predict = lstm_m.predict(test_input)
gru_m_predict = gru_m.predict(test_input)

In [None]:
outputs_mul = np.array([cnn_m_predict, rnn_m_predict, lstm_m_predict, gru_m_predict])
print(outputs_mul.shape)

(4, 2000, 6)


In [None]:
cnn_m_predict[0, :]

array([9.2525828e-01, 3.5425209e-02, 4.0870538e-10, 1.0156809e-04,
       3.8301304e-02, 9.1361691e-04], dtype=float32)

### Voted Ensemble

In [None]:
class SoftEnsembleMul:
    def __init__(self):
        super().__init__()

    def soft_ensemble_multi(self, outputs):
        average = []
        for i in range(outputs.shape[1]):
            sum = [0] * outputs.shape[2]
            for k in range(outputs.shape[2]):
                for j in range(outputs.shape[0]):                
                    sum[k] += outputs[j][i][k]
                sum[k] = sum[k] / outputs.shape[0]
            average.append(sum)
        average = np.array(average)
        return average
    
    def decision(self, average):
        decision = []
        for i in range(average.shape[0]):
            decision.append(average[i].argmax())        
        return decision

    def scoring(self, prediction, answer):
        score = 0
        for i in range(len(prediction)):            
            if(prediction[i] == answer[i]):
                if(prediction[i] != 0):
                    print(prediction[i], answer[i])
                
                score += 1
        return score / len(prediction)

In [None]:
se_m = SoftEnsembleMul()
average_m = se_m.soft_ensemble_multi(outputs_mul)
print(average_m.shape)
decision_m = se_m.decision(average_m)
print(len(decision_m))
ensemble_score_m = se_m.scoring(decision_m, test_target)
print(ensemble_score_m)

(2000, 6)
2000
3 3
2 2
3 3
4 4
0.9715


In [None]:
decision_arr = np.array(decision_m)
print(decision_arr.max())
print(np.unique(decision_arr))

4
[0 2 3 4]


In [None]:
infile.write('multi_test:\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\n'.format(cnn_m_score[1], rnn_m_score[1], lstm_m_score[1], gru_m_score[1], ensemble_score_m))
infile.close()