# Coil Data Preparation

In [27]:
# Starting point:
#   data_norm.pkl :  
#       Row1 .- struct scale ZnMin and ZnMax
#       Row2 .- dosD  DB table
#       Row3 .- coils DB table
#       Row4 .- nclmaps (struct of coils with specific labels nzn and 


In [28]:
import sys, os, datetime, pickle, time
import string, pdb, tqdm
import random, cv2, keras, os.path
import pandas as pd
import numpy as np
import tensorflow as tf
#
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from timeit import default_timer as timer
from datetime import datetime, timedelta, date
from PIL import Image
from matplotlib import pyplot as plt
from IPython.display import Image
%matplotlib inline
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
#
tf.autograph.set_verbosity(0)
physical_devices = tf.config.experimental.list_physical_devices('GPU')
print(physical_devices)
config = tf.config.experimental.set_memory_growth(physical_devices[0], True)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
tf.get_logger().setLevel('ERROR')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [29]:
# Loading datasets
with open('data_norm.pkl', 'rb') as handle:
    GlblFrm = pickle.load(handle)
    dosD    = pickle.load(handle)
    coils   = pickle.load(handle)
    nclmaps = pickle.load(handle)
#

In [30]:
print('GlblFrm:',GlblFrm)
print('nclmaps keys:',nclmaps[list(nclmaps.keys())[0]].keys())
print('nclmaps props:',nclmaps[list(nclmaps.keys())[0]]['props'])

GlblFrm: {'NormMn': -279.38157, 'NormMx': 507.54791, 'MaxTiles': 264}
nclmaps keys: dict_keys([1234, 1243, 'props'])
nclmaps props: {'ntiles': 64, 'Len': 638.0, 'Wid': 1108.0, 'Label': 1, 'ZnMn': 138, 'ZnMx': 158, 'nsns': 9, 'callib_1234': {0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: []}, 'nchks': {0: 1, 1: 2, 2: 5, 3: 5, 4: 7, 5: 3, 6: 0, 7: 0, 8: 0}, 'nfltls': {0: 4, 1: 4, 2: 44, 3: 42, 4: 30, 5: 4, 6: 0, 7: 0, 8: 0}, 'callib_1243': {0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: []}, '1234_mx': 37.35920999999999, '1234_mn': -75.1079, '1243_mx': 54.614810000000006, '1243_mn': -10.365899999999996, 'MxVal': 54.614810000000006, 'MnVal': -75.1079}


In [31]:
coils['Label'].value_counts()

Label
1    547
0    261
3    212
2    140
Name: count, dtype: int64

### Building Sets

In [32]:
#  Selecting coils OK and NOK
lsids1 = coils.loc[coils['Label']==1,'SID'].tolist()
lsids2 = coils.loc[coils['Label']==2,'SID'].tolist()
#
# Select and extract 30 coil's sids for independent assessemnt
ass1 = random.sample(lsids1, 30)
ass2 = random.sample(lsids2, 30)
res1 = np.array(list(set(lsids1) - set(ass1)))
res2 = np.array(list(set(lsids2) - set(ass2)))
#
# The remaining are organized for building the model
random.shuffle(res1)
random.shuffle(res2)
train1, validate1, test1 = np.split(res1,[int(.7*len(res1)), int(.9*len(res1))])
train2, validate2, test2 = np.split(res2,[int(.7*len(res2)), int(.9*len(res2))])
#
# Building the full sets
train = train1.tolist() + train2.tolist()
valid = validate1.tolist() + validate2.tolist()
test  = test1.tolist()  + test2.tolist()
ass   = ass1   + ass2
#

In [33]:
def featureMap(id, nlcmaps):
    arr1 = nclmaps[id][1234]['nzne'].to_numpy()
    arr2 = nclmaps[id][1243]['nzne'].to_numpy()
    arr  = np.concatenate((arr1, arr2), axis=1)
    return(arr)
#
def prep_dataset(setd,nlcmaps):
    setd_f = []
    setd_l = []
    for i in setd:
        arrimg = featureMap(i, nclmaps)
        lbl = coils.loc[coils['SID']==i,'Label'].values[0]
        setd_f.append(arrimg)
        if lbl == 1:
            setd_l.append([1.,0.])
        if lbl == 2:
            setd_l.append([0.,1.])
    setd_f = np.array(setd_f)
    setd_l = np.array(setd_l)
    return([setd_f,setd_l])
#
def factory_rep(arr,step=0):
    # arr image havind 264 rows and 18 (9 face A+9 face B) columns of normalized data 
    # mirror per face over x axis
    # channels 0-3 => 5-8 and 5-8 => 0-3
    permut1 = list(range(8,4,-1))+[4] + list(range(3,-1,-1))+ list(range(17,13,-1))+[13]+list(range(12,8,-1))
    idx = np.empty_like(permut1)
    idx[permut1] = np.arange(len(permut1))
    arr1 = arr[:,idx]
    permut2 = list(range(263,131,-1))+ list(range(131,-1,-1))
    idx = np.empty_like(permut2)
    idx[permut2] = np.arange(len(permut2))    
    arr2 = arr[idx,:]
    arr3 = arr1[idx,:]
    res  = [arr, arr1, arr2, arr3]
    if step > 0:
        newa = arr
        end = arr.shape[0]-step
        for i in range(arr.shape[0] // step):
            permut = list(range(arr.shape[0]-step,arr.shape[0]))+list(range(0,end))
            idx = np.empty_like(permut)
            idx[permut] = np.arange(len(permut))
            newb= newa[idx,:]
            res.append(newb)
            newa= newb
    res = np.array(res)
    return(res)
#
#
def prep_dataset_aug(setd,nlcmaps,tlab=-1):
    setd_f = []
    setd_l = []
    for i in setd:
        arrimg = featureMap(i, nclmaps)
        lbl = coils.loc[coils['SID']==i,'Label'].values[0]
        if lbl == tlab: # if lower class => higher augmentation
            res = factory_rep(arrimg,step=8)
        else:
            res = factory_rep(arrimg)
        for j in range(res.shape[0]):
            setd_f.append(res[j,:,:])
            if lbl == 1:
                setd_l.append([1.,0.])
            if lbl == 2:
                setd_l.append([0.,1.])
    setd_f = np.array(setd_f)
    setd_l = np.array(setd_l)
    return([setd_f,setd_l])

In [34]:
# i=train[0]
# tmpimg = featureMap(i, nclmaps)
# tmpimg.shape

In [35]:
goal = './ZN_1D_imgs/orig/'
# for i in train + valid + test:
train_f, train_l = prep_dataset_aug(train,nclmaps,2)
np.savez(goal+'train.npz', features=train_f, labels=train_l)
#
valid_f, valid_l = prep_dataset(valid,nclmaps)
np.savez(goal+'validation.npz', features=valid_f, labels=valid_l)
#
test_f, test_l = prep_dataset(test,nclmaps)
np.savez(goal+'test.npz', features=test_f, labels=test_l)
#
ass_f, ass_l = prep_dataset(ass,nclmaps)
np.savez(goal+'assess.npz', features=ass_f, labels=ass_l)
#

In [36]:
#
npzfile = np.load(goal+'train.npz')
print(npzfile['features'].shape)
print(npzfile['labels'].shape)

(4293, 264, 18)
(4293, 2)


In [37]:
print(np.unique(npzfile['labels'][:,0],return_counts=True))
print(np.unique(npzfile['labels'][:,1],return_counts=True))

(array([0., 1.]), array([2849, 1444]))
(array([0., 1.]), array([1444, 2849]))


## Definition of the DQ_Model

In [38]:
#
# Definition of the DQCnnNet
#
#
class DQCnnNet(tf.keras.Model):
    """
    Original DQCnnNet
    """
    def __init__(self, inp_shape = (264,18)):
        super(DQCnnNet, self).__init__()
        self.inp_shape = inp_shape

        self.kernel_size_0 = 20
        self.kernel_size_1 = 6
        self.drop_rate = 0.4

        self.conv1 = tf.keras.layers.Conv1D(filters=32,
                                            kernel_size=self.kernel_size_0,
                                            activation='relu',
                                            padding= "same",
                                            input_shape=self.inp_shape)
        self.batch_n_1 = tf.keras.layers.BatchNormalization()
        self.conv2 = tf.keras.layers.Conv1D(filters=32,
                                            kernel_size=self.kernel_size_0,
                                            activation='relu',
                                            padding= "valid")
        self.batch_n_2 = tf.keras.layers.BatchNormalization()
        self.spatial_drop_1 = tf.keras.layers.SpatialDropout1D(self.drop_rate)
        self.conv3 = tf.keras.layers.Conv1D(filters=32,
                                            kernel_size=self.kernel_size_1,
                                            activation='relu',
                                            padding= "valid")
        self.avg_pool1 = tf.keras.layers.AvgPool1D(pool_size=2)
        self.conv4 = tf.keras.layers.Conv1D(filters=32,
                                            kernel_size=self.kernel_size_1,
                                            activation='relu',
                                            padding= "valid")
        self.conv5 = tf.keras.layers.Conv1D(filters=32,
                                            kernel_size=self.kernel_size_1,
                                            activation='relu',
                                            padding= "valid")
        self.spatial_drop_2 = tf.keras.layers.SpatialDropout1D(self.drop_rate)
        self.flat = tf.keras.layers.Flatten()
        self.dense1 = tf.keras.layers.Dense(296, activation='relu')
        self.dropout1 = tf.keras.layers.Dropout(self.drop_rate)
        self.dense2 = tf.keras.layers.Dense(148, activation='relu')
        self.dropout2 = tf.keras.layers.Dropout(self.drop_rate)
        self.dense3 = tf.keras.layers.Dense(74, activation='relu')
        self.dropout3 = tf.keras.layers.Dropout(self.drop_rate)
        self.out = tf.keras.layers.Dense(2, activation='softmax')

    def call(self, input_tensor):
        conv1 = self.conv1(input_tensor)
        batch_n_1 = self.batch_n_1(conv1)
        conv2 = self.conv2(batch_n_1)
        batch_n_2 = self.batch_n_2(conv2)
        spatial_drop_1 = self.spatial_drop_1(batch_n_2)
        conv3 = self.conv3(spatial_drop_1)
        avg_pool1 = self.avg_pool1(conv3)
        conv4 = self.conv4(avg_pool1)
        conv5 = self.conv5(conv4)
        spatial_drop_2 = self.spatial_drop_2(conv5)
        flat = self.flat(spatial_drop_2)
        dense1 = self.dense1(flat)
        dropout1 = self.dropout1(dense1)
        dense2 = self.dense2(dropout1)
        dropout2 = self.dropout2(dense2)
        #
        dense3   = self.dense3(dropout2)
        dropout3 = self.dropout3(dense3)
        return self.out(dropout3)
#

### Training the model

In [53]:
#
learning_rate   = 2e-4
BATCH_SIZE      = 50
STEPS_PER_EPOCH = train_l.size / BATCH_SIZE
SAVE_PERIOD     = 1
#
loss = tf.keras.losses.categorical_crossentropy
# loss = tf.keras.losses.binary_crossentropy
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
model = DQCnnNet()
save_path = os.path.join(os.getcwd(), 'ZN_1D_imgs/')
modelPath = os.path.join(os.getcwd(), 'ZN_1D_imgs/bestModel.h5')

model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])

checkpoint = ModelCheckpoint( # set model saving checkpoints
    modelPath, # set path to save model weights
    monitor='val_loss', # set monitor metrics
    verbose=1, # set training verbosity
    save_best_only=True, # set if want to save only best weights
    save_weights_only=False, # set if you want to save only model weights
    mode='auto', # set if save min or max in metrics
    save_freq= int(SAVE_PERIOD * STEPS_PER_EPOCH) # interval between checkpoints
    )

earlystopping = EarlyStopping(
    monitor='val_loss', # set monitor metrics
    min_delta=0.0001, # set minimum metrics delta
    patience=25, # number of epochs to stop training
    restore_best_weights=True, # set if use best weights or last weights
    )
callbacksList = [checkpoint, earlystopping] # build callbacks list
#%%
hist = model.fit(train_f, train_l, epochs=100, batch_size=BATCH_SIZE,
                validation_data=(valid_f, valid_l), callbacks=callbacksList) 

with open(os.path.join(save_path, "hist.pkl"), "wb") as file:
    pickle.dump(hist.history, file)

model.save(save_path)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100


In [54]:
len(train2)

77

In [55]:
np.unique(valid_l[:,0],return_counts=True)

(array([0., 1.]), array([ 22, 104]))

In [56]:
# path = "YOUR MODEL PATH"
# model = tf.keras.models.load_model(path, custom_objects={"CustomModel": DQCnnNet})
# input_shape = (None, 640, 2)
# model.build(input_shape)

In [57]:
model.save(save_path)

In [58]:
del model

In [59]:
model = tf.keras.models.load_model(save_path, custom_objects={"CustomModel": DQCnnNet})

In [60]:
testLoss, testAcc = model.evaluate(test_f, test_l)
print('\nAccuracy:', testAcc)
print('\nLoss: ', testLoss)


Accuracy: 0.7936508059501648

Loss:  0.5865824818611145


In [67]:
from sklearn.metrics import classification_report, confusion_matrix
ypredt= model.predict(test_f)



In [68]:
np.concatenate((ypredt,test_l),axis=1)

array([[9.83520448e-01, 1.64795872e-02, 1.00000000e+00, 0.00000000e+00],
       [9.99999523e-01, 4.28680380e-07, 1.00000000e+00, 0.00000000e+00],
       [9.91097569e-01, 8.90241750e-03, 1.00000000e+00, 0.00000000e+00],
       [8.91398311e-01, 1.08601682e-01, 1.00000000e+00, 0.00000000e+00],
       [9.99630451e-01, 3.69580666e-04, 1.00000000e+00, 0.00000000e+00],
       [7.95970023e-01, 2.04029992e-01, 1.00000000e+00, 0.00000000e+00],
       [9.99961972e-01, 3.80547026e-05, 1.00000000e+00, 0.00000000e+00],
       [1.83391094e-01, 8.16608846e-01, 1.00000000e+00, 0.00000000e+00],
       [9.30745780e-01, 6.92541674e-02, 1.00000000e+00, 0.00000000e+00],
       [9.98050213e-01, 1.94980484e-03, 1.00000000e+00, 0.00000000e+00],
       [7.87257910e-01, 2.12742090e-01, 1.00000000e+00, 0.00000000e+00],
       [3.09347481e-01, 6.90652490e-01, 1.00000000e+00, 0.00000000e+00],
       [4.59104270e-01, 5.40895700e-01, 1.00000000e+00, 0.00000000e+00],
       [9.05272663e-01, 9.47273597e-02, 1.00000000e

In [70]:
yTestClassT = np.argmax(test_l, axis=1)
yPredClassT = np.argmax(ypredt,axis=1)

In [71]:
print('\n Classification report Test\n\n',
  classification_report(
      yTestClassT,
      yPredClassT,
       target_names=["OK", "NoK"]
      )
  )

print('\n Confusion matrix Test\n\n',
  confusion_matrix(
      yTestClassT,
      yPredClassT,
      )
  )


 Classification report Test

               precision    recall  f1-score   support

          OK       0.95      0.79      0.86        52
         NoK       0.45      0.82      0.58        11

    accuracy                           0.79        63
   macro avg       0.70      0.80      0.72        63
weighted avg       0.87      0.79      0.81        63


 Confusion matrix Test

 [[41 11]
 [ 2  9]]


In [61]:
testLoss, testAcc = model.evaluate(ass_f, ass_l)
print('\nAccuracy:', testAcc)
print('\nLoss: ', testLoss)


Accuracy: 0.8333333134651184

Loss:  0.5959100127220154


In [62]:
from sklearn.metrics import classification_report, confusion_matrix
ypred = model.predict(ass_f)



In [63]:
np.concatenate((ypred,ass_l),axis=1)

array([[9.14371848e-01, 8.56281295e-02, 1.00000000e+00, 0.00000000e+00],
       [9.93306935e-01, 6.69307262e-03, 1.00000000e+00, 0.00000000e+00],
       [3.77716541e-01, 6.22283459e-01, 1.00000000e+00, 0.00000000e+00],
       [9.99545276e-01, 4.54663124e-04, 1.00000000e+00, 0.00000000e+00],
       [5.30799448e-01, 4.69200522e-01, 1.00000000e+00, 0.00000000e+00],
       [9.98821914e-01, 1.17807707e-03, 1.00000000e+00, 0.00000000e+00],
       [3.90517861e-01, 6.09482110e-01, 1.00000000e+00, 0.00000000e+00],
       [9.02728379e-01, 9.72715840e-02, 1.00000000e+00, 0.00000000e+00],
       [3.41671586e-01, 6.58328474e-01, 1.00000000e+00, 0.00000000e+00],
       [9.99201715e-01, 7.98276043e-04, 1.00000000e+00, 0.00000000e+00],
       [9.96552467e-01, 3.44757154e-03, 1.00000000e+00, 0.00000000e+00],
       [8.92485738e-01, 1.07514322e-01, 1.00000000e+00, 0.00000000e+00],
       [9.09464717e-01, 9.05352905e-02, 1.00000000e+00, 0.00000000e+00],
       [3.88021410e-01, 6.11978590e-01, 1.00000000e

In [64]:
yTestClass = np.argmax(ass_l, axis=1)
yPredClass = np.argmax(ypred,axis=1)

In [65]:
print('\n Classification report \n\n',
  classification_report(
      yTestClass,
      yPredClass,
       target_names=["OK", "NoK"]
      )
  )

print('\n Confusion matrix \n\n',
  confusion_matrix(
      yTestClass,
      yPredClass,
      )
  )


 Classification report 

               precision    recall  f1-score   support

          OK       0.83      0.83      0.83        30
         NoK       0.83      0.83      0.83        30

    accuracy                           0.83        60
   macro avg       0.83      0.83      0.83        60
weighted avg       0.83      0.83      0.83        60


 Confusion matrix 

 [[25  5]
 [ 5 25]]


In [66]:
model.summary()

Model: "dq_cnn_net_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_10 (Conv1D)          multiple                  11552     
                                                                 
 batch_normalization_4 (Bat  multiple                  128       
 chNormalization)                                                
                                                                 
 conv1d_11 (Conv1D)          multiple                  20512     
                                                                 
 batch_normalization_5 (Bat  multiple                  128       
 chNormalization)                                                
                                                                 
 spatial_dropout1d_4 (Spati  multiple                  0         
 alDropout1D)                                                    
                                                      