In [1]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchsummary import summary
from torch.utils.data import TensorDataset, DataLoader

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [3]:
def create_data(X, y):  
    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.1)
    return X_train, X_valid, y_train, y_valid  

def create_datasets(X1, X2, X3, X4, X5, X6, X7, X8, X9, y):
    X1 = torch.tensor(X1, dtype=torch.float32) 
    X2 = torch.tensor(X2, dtype=torch.float32) 
    X3 = torch.tensor(X3, dtype=torch.float32) 
    X4 = torch.tensor(X4, dtype=torch.float32) 
    X5 = torch.tensor(X5, dtype=torch.float32) 
    X6 = torch.tensor(X6, dtype=torch.float32)     
    X7 = torch.tensor(X7, dtype=torch.float32) 
    X8 = torch.tensor(X8, dtype=torch.float32) 
    X9 = torch.tensor(X9, dtype=torch.float32)     
    
    y = torch.tensor(y, dtype=torch.long)
    data_ds = TensorDataset(X1, X2, X3, X4, X5, X6, X7, X8, X9, y)
    return data_ds

def create_dataloaders(train_ds, valid_ds, bs=128):
    train_dl = DataLoader(train_ds, bs, shuffle=True)
    valid_dl = DataLoader(valid_ds, bs, shuffle=False)
    return train_dl, valid_dl  

In [4]:
class MLPClassifier(nn.Module):
    def __init__(self, input_dim):
        super(MLPClassifier, self).__init__()
        self.fc1 = nn.Sequential(nn.Linear(input_dim,1000), nn.ReLU())
        self.fc2 = nn.Sequential(nn.Linear(1000,100), nn.ReLU())
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        return x     


class MyEnsemble(nn.Module):
    def __init__(self, MLPClassifier, output_dim):
        super(MyEnsemble, self).__init__()        
        self.MLP = MLPClassifier  
        self.fc = nn.Sequential(nn.Linear(900,100), nn.ReLU())
        self.classifier = nn.Sequential(nn.Linear(100, output_dim), nn.Softmax(dim=1))
        
    def forward(self, x1, x2, x3, x4, x5, x6, x7, x8, x9):
        x1 = self.MLP(x1)
        x2 = self.MLP(x2)
        x3 = self.MLP(x3)
        x4 = self.MLP(x4)
        x5 = self.MLP(x5)
        x6 = self.MLP(x6)
        x7 = self.MLP(x7)
        x8 = self.MLP(x8)
        x9 = self.MLP(x9)        
        x = torch.cat((x1, x2, x3, x4, x5, x6, x7, x8, x9), dim=1)
        x = self.fc(x)
        x = self.classifier(x)
        return x

In [5]:
def rms(y):
    return np.sqrt(np.mean(y**2))    

**DATA PREPARATION**
---

In [7]:
work = []
signal = []
list_file = ['time_serie_1.p', 'time_serie_2.p', 'time_serie_3.p', 'time_serie_4.p', 'time_serie_5.p', 'time_serie_6.p']

for file in list_file:
  with open(file,'rb') as f:
        while True:
          try:
            u = pickle._Unpickler(f)
            u.encoding = 'latin1' 
            p = u.load()      
            A = p['acc']
            signal.append(A)
            work.append(p['jobname'])

          except EOFError:
            break            


In [150]:
df = pd.read_csv('compare_single.csv')
s = df['label_l'].to_list()
job = df['jobname'].to_list()

sensor = [2,5,68,11,14,17,20,23,26]
label = {}
for i in range(len(job)):
    lab = -1
    if s[i] in sensor:
        lab = s[i]

    label[int(job[i])] = int(lab)


In [151]:
y = []
for i in range(len(work)):
    y.append(label[int(work[i])])

y = np.array(y)    
signal0 = np.array(signal)[:,:6000,:]

list_id = np.where(y!=-1)

signal0 = signal0[list_id]
y = y[list_id]

le = LabelEncoder()
le.fit(np.unique(y))
y = le.transform(y)

In [152]:
Xmax = np.max(signal0)
Xmin = np.min(signal0)
signal0 = (signal0 - Xmin) / (Xmax-Xmin)

In [153]:
v_rms= rms(signal0)

In [154]:
X = signal0
yy = y

In [161]:
signal1 = signal0 + 0.001*v_rms*np.random.normal(0.0, 1.0, np.shape(signal0))
signal2 = signal0 + 0.001*v_rms*np.random.normal(0.0, 1.0, np.shape(signal0))
signal3 = signal0 + 0.001*v_rms*np.random.normal(0.0, 1.0, np.shape(signal0))
signal4 = signal0 + 0.001*v_rms*np.random.normal(0.0, 1.0, np.shape(signal0))
signal5 = signal0 + 0.001*v_rms*np.random.normal(0.0, 1.0, np.shape(signal0))
signal6 = signal0 + 0.001*v_rms*np.random.normal(0.0, 1.0, np.shape(signal0))
signal7 = signal0 + 0.001*v_rms*np.random.normal(0.0, 1.0, np.shape(signal0))

In [162]:
# X = np.concatenate((signal0, signal1, signal2, signal3),axis=0)
X = np.concatenate((signal0, signal1, signal2, signal3, signal4, signal5, signal6, signal7),axis=0)

In [163]:
# yy = np.concatenate((y,y,y,y),axis=0)
yy = np.concatenate((y,y,y,y,y,y,y,y),axis=0)

In [164]:
print(np.shape(X), np.shape(yy))

(5544, 6000, 9) (5544,)


In [165]:
X_train, X_valid, y_train, y_valid = create_data(X, yy)

X1_train = X_train[:,:,0] 
X2_train = X_train[:,:,1] 
X3_train = X_train[:,:,2] 
X4_train = X_train[:,:,3] 
X5_train = X_train[:,:,4] 
X6_train = X_train[:,:,5] 
X7_train = X_train[:,:,6] 
X8_train = X_train[:,:,7] 
X9_train = X_train[:,:,8] 

X1_valid = X_valid[:,:,0] 
X2_valid = X_valid[:,:,1] 
X3_valid = X_valid[:,:,2] 
X4_valid = X_valid[:,:,3] 
X5_valid = X_valid[:,:,4] 
X6_valid = X_valid[:,:,5] 
X7_valid = X_valid[:,:,6] 
X8_valid = X_valid[:,:,7] 
X9_valid = X_valid[:,:,8] 

train_ds = create_datasets(X1_train, X2_train, X3_train, X4_train, X5_train, X6_train, X7_train, X8_train, X9_train, y_train)
valid_ds = create_datasets(X1_valid, X2_valid, X3_valid, X4_valid, X5_valid, X6_valid, X7_valid, X8_valid, X9_valid, y_valid)
train_dl, valid_dl = create_dataloaders(train_ds, valid_ds)

In [166]:
input_dim = np.shape(X)[1]
output_dim = len(np.unique(yy))

MLP = MLPClassifier(input_dim)
model = MyEnsemble(MLP, output_dim)
model = model.cuda()

lr = 0.0001
n_epochs = 300
best_acc = 0

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

for epoch in range(1, n_epochs + 1):   
    for i, (x1_train, x2_train, x3_train, x4_train, x5_train, x6_train, x7_train, x8_train, x9_train, y_train) in enumerate(train_dl):
        model.train()
        x1_train, x2_train, x3_train, x4_train, x5_train, x6_train, x7_train, x8_train, x9_train, y_train = [t.cuda() for t in (x1_train, x2_train, x3_train, x4_train, x5_train, x6_train, x7_train, x8_train, x9_train, y_train)]
        optimizer.zero_grad()
        output = model(x1_train, x2_train, x3_train, x4_train, x5_train, x6_train, x7_train, x8_train, x9_train)
        loss = criterion(output, y_train)
        loss.backward()
        optimizer.step()
    
    model.eval()
    Ncorrect, Nsample = 0, 0
    for x1_val, x2_val, x3_val, x4_val, x5_val, x6_val, x7_val, x8_val, x9_val, y_val in valid_dl:
        x1_val, x2_val, x3_val, x4_val, x5_val, x6_val, x7_val, x8_val, x9_val, y_val = [t.cuda() for t in (x1_val, x2_val, x3_val, x4_val, x5_val, x6_val, x7_val, x8_val, x9_val, y_val)]
        out = model(x1_val, x2_val, x3_val, x4_val, x5_val, x6_val, x7_val, x8_val, x9_val)
        preds = F.log_softmax(out, dim=1).argmax(dim=1)
        Nsample += y_val.size(0)
        Ncorrect += (preds == y_val).sum().item()
    
    acc = Ncorrect / Nsample

    if epoch % 3 == 0:
        print(f'Epoch: {epoch:3d}. Loss: {loss.item():.4f}. Acc.: {acc:2.2%}')

#     if acc > best_acc:
#         best_acc = acc
#         torch.save(model.state_dict(), 'best.pth')
#         print(f'Epoch {epoch} best model saved with accuracy: {best_acc:2.2%}')


Epoch:   3. Loss: 2.0860. Acc.: 17.12%
Epoch:   6. Loss: 2.0798. Acc.: 17.12%
Epoch:   9. Loss: 2.0624. Acc.: 17.12%
Epoch:  12. Loss: 2.0747. Acc.: 17.12%
Epoch:  15. Loss: 2.0711. Acc.: 17.12%
Epoch:  18. Loss: 2.0843. Acc.: 17.12%
Epoch:  21. Loss: 2.0870. Acc.: 17.12%
Epoch:  24. Loss: 2.0759. Acc.: 17.12%
Epoch:  27. Loss: 2.0771. Acc.: 18.02%
Epoch:  30. Loss: 2.0776. Acc.: 17.30%
Epoch:  33. Loss: 2.0459. Acc.: 23.06%
Epoch:  36. Loss: 2.0328. Acc.: 27.03%
Epoch:  39. Loss: 1.9968. Acc.: 32.97%
Epoch:  42. Loss: 1.9265. Acc.: 33.51%
Epoch:  45. Loss: 1.9288. Acc.: 36.94%
Epoch:  48. Loss: 1.9286. Acc.: 36.58%
Epoch:  51. Loss: 1.9085. Acc.: 37.30%
Epoch:  54. Loss: 1.8620. Acc.: 44.14%
Epoch:  57. Loss: 1.9125. Acc.: 45.23%
Epoch:  60. Loss: 1.7566. Acc.: 46.31%
Epoch:  63. Loss: 1.7776. Acc.: 50.27%
Epoch:  66. Loss: 1.7866. Acc.: 50.63%
Epoch:  69. Loss: 1.8014. Acc.: 52.07%
Epoch:  72. Loss: 1.7639. Acc.: 52.79%
Epoch:  75. Loss: 1.7244. Acc.: 56.22%
Epoch:  78. Loss: 1.5860.

**VISUALIZATION**
---

In [72]:
nb_classes = output_dim

confusion_matrix = torch.zeros(nb_classes, nb_classes)

train_dl, valid_dl = create_dataloaders(train_ds, valid_ds, bs=len(y_valid))

with torch.no_grad():
    for x1_val, x2_val, x3_val, x4_val, x5_val, x6_val, x7_val, x8_val, x9_val, y_val in valid_dl:
        x1_val, x2_val, x3_val, x4_val, x5_val, x6_val, x7_val, x8_val, x9_val, y_val = [t.cuda() for t in (x1_val, x2_val, x3_val, x4_val, x5_val, x6_val, x7_val, x8_val, x9_val, y_val)]
        out = model(x1_val, x2_val, x3_val, x4_val, x5_val, x6_val, x7_val, x8_val, x9_val)
        _, preds = torch.max(out, 1)
        for t, p in zip(y_val.view(-1), preds.view(-1)):
                confusion_matrix[t.long(), p.long()] += 1

print(confusion_matrix)

tensor([[285.,   4.],
        [  2., 323.]])


In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

import seaborn as sns
import matplotlib.pyplot as plt

print(classification_report(y_valid, preds.tolist()))
cm =confusion_matrix(y_valid, preds.tolist())  

index = np.arange(0,output_dim)
columns = np.arange(0,output_dim)
# index = ['Healthy', 'Minor', 'Moderate', 'Severe']
# columns = ['Healthy', 'Minor', 'Moderate', 'Severe']


cm_df = pd.DataFrame(cm,columns,index)                      
plt.figure(figsize=(12,9))  
cm_df.index.name = 'Actual'
cm_df.columns.name = 'Predicted'
sns.set(font_scale=1.6)
sns.heatmap(cm_df, annot=True, cmap= "YlGnBu", fmt='g')

plt.savefig('severity.png')

In [None]:
plt.hist(yy)