In [1]:
normal_folder = "/home/biswajit/data/ims/normal/"
inner_folder = "/home/biswajit/data/ims/inner/"
outer_folder = "/home/biswajit/data/ims/outer/"
ball_folder = "/home/biswajit/data/ims/ball/"

In [2]:
import os
import glob
import itertools

In [3]:
normal_files = glob.glob(normal_folder + "/*")
print("Total number of normal files: ", len(normal_files))

inner_files = glob.glob(inner_folder + "/*")
print("Total number of inner_fault files: ", len(inner_files))

outer_files = glob.glob(outer_folder + "/*")
print("Total number of outer_fault files: ", len(outer_files))

ball_files = glob.glob(ball_folder + "/*")
print("Total number of ball_fault files: ", len(ball_files))

Total number of normal files:  750
Total number of inner_fault files:  750
Total number of outer_fault files:  750
Total number of ball_fault files:  750


In [4]:
from sklearn.model_selection import train_test_split

In [5]:
test_files =  normal_files[650:750] + inner_files[650:750] + outer_files[650:750] + ball_files[650:750]

In [6]:
normal_files_train, normal_files_validation = train_test_split(normal_files[:650], test_size = 100, random_state = 32)
inner_files_train, inner_files_validation = train_test_split(inner_files[:650], test_size = 100, random_state = 323)
outer_files_train, outer_files_validation = train_test_split(outer_files[:650], test_size = 100, random_state = 123)
ball_files_train, ball_files_validation = train_test_split(ball_files[:650], test_size = 100, random_state = 285)

print("For normal:", len(normal_files_train), len(normal_files_validation))
print("For inner:", len(inner_files_train), len(inner_files_validation))
print("For outer:", len(outer_files_train), len(outer_files_validation))
print("For ball:", len(outer_files_train), len(outer_files_validation))

For normal: 550 100
For inner: 550 100
For outer: 550 100
For ball: 550 100


In [7]:
train_files = normal_files_train + inner_files_train + outer_files_train + ball_files_train
validation_files = normal_files_validation + inner_files_validation + outer_files_validation + ball_files_validation

print("Total train_files:", len(train_files))
print("Total validation_files:", len(validation_files))
print("Total test_files:", len(test_files))

Total train_files: 2200
Total validation_files: 400
Total test_files: 400


In [8]:
import numpy as np

In [9]:
np.random.shuffle(train_files)
np.random.shuffle(validation_files)

In [10]:
import tensorflow as tf
import pandas as pd
import re

In [11]:
def tf_data_generator(file_list,  batch_size = 4):
    i = 0
    while True:
        if i*batch_size >= len(file_list):  
            i = 0
            np.random.shuffle(file_list)
        else:
            file_chunk = file_list[i*batch_size:(i+1)*batch_size]
            data = []
            labels = []
            patterns = tf.constant([".*(normal)", ".*(inner)", ".*(outer)", ".*(ball)"])
            for file in file_chunk:
                temp = pd.read_csv(open(file,'r'), sep = "\s+", header = None)
                fault_columns = [0, 4, 2, 6]              # In this order, 0-normal, 4-inner, 2-outer, 6-ball
                num = np.int(np.floor(len(temp[0])/1024)) # As all columns have same number of entries
                j = 0
                for pattern in patterns:
                    if re.match(pattern.numpy(), tf.constant(file).numpy()):
                        labels = labels + list(np.repeat(j,num)) 
                        column_number = fault_columns[j]
                        break
                    j = j + 1   
                data = data + list(temp[column_number][0:num*1024].values.reshape(num,32,32,1))
                       
            data = np.asarray(data).reshape(-1,32,32,1)
            labels = np.asarray(labels)
            
            # Shuffle data
            index = np.random.permutation(len(data))
            data, labels = data[index], labels[index]
            
            yield data, labels
            i = i + 1

In [12]:
batch_size = 50
train_dataset = tf.data.Dataset.from_generator(tf_data_generator, args = [train_files, batch_size],
                                              output_shapes = ((None, 32, 32, 1), (None,)),
                                              output_types = (tf.float32, tf.float32))

validation_dataset = tf.data.Dataset.from_generator(tf_data_generator, args = [validation_files, batch_size],
                                                   output_shapes = ((None, 32, 32, 1), (None,)),
                                                   output_types = (tf.float32, tf.float32))

test_dataset = tf.data.Dataset.from_generator(tf_data_generator, args = [test_files, batch_size],
                                             output_shapes = ((None, 32, 32, 1), (None,)),
                                             output_types = (tf.float32, tf.float32))

In [13]:
train_dataset = train_dataset.prefetch(buffer_size = tf.data.experimental.AUTOTUNE)
validation_dataset = validation_dataset.prefetch(buffer_size = tf.data.experimental.AUTOTUNE)

In [14]:
from tensorflow.keras import layers

In [15]:
model = tf.keras.Sequential([
    layers.Conv2D(32,5,activation= 'relu', input_shape = (32,32,1)),
    layers.MaxPool2D(2),
    layers.Conv2D(16,5,activation = 'relu'),
    layers.MaxPool2D(2),
    layers.Flatten(),
    layers.Dense(120,activation = 'relu'),
    layers.Dense(84, activation = 'relu'),
    layers.Dense(16, activation = 'relu'),
    layers.Dense(4, activation = 'softmax')
])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 28, 28, 32)        832       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 10, 10, 16)        12816     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 16)          0         
_________________________________________________________________
flatten (Flatten)            (None, 400)               0         
_________________________________________________________________
dense (Dense)                (None, 120)               48120     
_________________________________________________________________
dense_1 (Dense)              (None, 84)                1

In [16]:
steps_per_epoch = np.ceil(len(train_files)/batch_size)
validation_steps = np.ceil(len(validation_files)/batch_size)
steps = np.ceil(len(test_files)/batch_size)
print(steps_per_epoch, validation_steps, steps)

44.0 8.0 8.0


In [17]:
model.compile(loss = 'sparse_categorical_crossentropy', optimizer = "adam", metrics = ["accuracy"])
model.fit(train_dataset, validation_data = validation_dataset, 
         steps_per_epoch= steps_per_epoch,
         validation_steps = validation_steps,
          epochs = 10)

Train for 44.0 steps, validate for 8.0 steps
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f838c152810>

In [18]:
test_loss, test_acc = model.evaluate(test_dataset, steps = steps)



Finally model can be saved using following command.

In [19]:
# model.save("IAI_IMS_final.h5")

We have commented the above line as different iterations may lead to different accuracies. We had saved a model with all its parameters when the test accuracy was 99.84%. Readers can run this notebook several times for different epochs and save their best model.