In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint
from tensorflow.keras.applications.resnet_v2 import ResNet50V2
from tensorflow.keras.utils import Sequence
import pickle
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, Input, GlobalAveragePooling2D, Flatten
import orjson
from threading import Thread, Lock
from queue import Queue

import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [2]:
with open('../dataset_test_files.pkl', 'rb') as fp:
    df_test = pickle.load(fp)
df_test.head()

Unnamed: 0,filename,kind,defect,size,class_name,class
3,D:\desenvolvimento\datasets\TRAIN1200 -1200_CO...,R,S,G,RS,3
4,D:\desenvolvimento\datasets\TRAIN1200 -1200_CO...,R,S,G,RS,3
11,D:\desenvolvimento\datasets\TRAIN1200 -1200_CO...,R,S,G,RS,3
15,D:\desenvolvimento\datasets\TRAIN1200 -1200_CO...,R,S,G,RS,3
20,D:\desenvolvimento\datasets\TRAIN1200 -1200_CO...,R,S,G,RS,3


In [3]:
with open('../dataset_train_files.pkl', 'rb') as fp:
    df_train = pickle.load(fp)
df_train.head()

Unnamed: 0,filename,kind,defect,size,class_name,class
0,D:\desenvolvimento\datasets\TRAIN1200 -1200_CO...,R,S,G,RS,3
1,D:\desenvolvimento\datasets\TRAIN1200 -1200_CO...,R,S,G,RS,3
2,D:\desenvolvimento\datasets\TRAIN1200 -1200_CO...,R,S,M,RS,3
5,D:\desenvolvimento\datasets\TRAIN1200 -1200_CO...,R,S,M,RS,3
6,D:\desenvolvimento\datasets\TRAIN1200 -1200_CO...,R,S,G,RS,3


In [4]:
class ImageGeneratorSplited(Sequence):
    def __init__(self, df, y_col='class_name', batch_size=64, split=0.5, queue_size=100, **kwargs):
        self.gen_da = ImageDataGenerator(**kwargs)
        self.gen_raw = ImageDataGenerator()
        self.length = int(len(df.index) / (batch_size * (1 - split)))
        self.batch_size = batch_size
        self.batch_da = int(batch_size * split)
        self.flow_da = self.gen_da.flow_from_dataframe(df, y_col=y_col, batch_size=self.batch_da)
        self.batch_raw = self.batch_size - self.batch_da
        self.flow_raw = self.gen_raw.flow_from_dataframe(df, y_col=y_col, batch_size=self.batch_raw)
        temp = next(self.flow_raw)
        self.shape = (self.length, *temp[0].shape[1:])
        self._next_da = Queue(queue_size)
        self._next_raw = Queue(queue_size)
        self._next_da_thread = self.prepare_da()
        self._next_raw_thread = self.prepare_raw()
        
    def __len__(self):
        return self.length
        
    def get_next_da(self):
        return self._next_da.get()
    
    def get_next_raw(self):
        return self._next_raw.get()
    
    def _prepare_next_da(self):
        while True:
            self._next_da.put(next(self.flow_da))
    
    def _prepare_next_raw(self):
        while True:
            self._next_raw.put(next(self.flow_raw))
            
    def prepare_da(self):
        t = Thread(target=self._prepare_next_da)
        t.start()
        return t
    
    def prepare_raw(self):
        t = Thread(target=self._prepare_next_raw)
        t.start()
        return t
    
    
    def __iter__(self):
        for _ in range(len(self)):
            yield self[None]

    def __getitem__(self, item):
        x_raw, y_raw = self.get_next_raw()
        x_da, y_da = self.get_next_da()
        return np.vstack((x_raw, x_da)), np.vstack((y_raw, y_da))
    

In [5]:
SPLIT = 0.7
gen_train = ImageGeneratorSplited(df_train,
    rotation_range=90,
    width_shift_range=0.2,
    height_shift_range=0.2,
    # zoom_range=[1, 3],
    horizontal_flip=True,
    vertical_flip=True,
    split=SPLIT,
    batch_size=64
)
print(gen_train)
gen_test = ImageDataGenerator().flow_from_dataframe(df_test, y_col='class_name', batch_size=128)
gen_test

Found 840 validated image filenames belonging to 4 classes.
Found 840 validated image filenames belonging to 4 classes.
<__main__.ImageGeneratorSplited object at 0x000001DA65635588>
Found 360 validated image filenames belonging to 4 classes.


<keras.preprocessing.image.DataFrameIterator at 0x1da66623148>

In [6]:
input_tensor = Input(shape=(256, 256, 3))
base_model = ResNet50V2(False, None, input_tensor=input_tensor)
base_model.summary()

Model: "resnet50v2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 256, 256, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 262, 262, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 128, 128, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
pool1_pad (ZeroPadding2D)       (None, 130, 130, 64) 0           conv1_conv[0][0]                 
_________________________________________________________________________________________

In [7]:
x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
# and a logistic layer -- let's say we have 4 classes
predictions = Dense(4, activation='softmax')(x)

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 256, 256, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 262, 262, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 128, 128, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
pool1_pad (ZeroPadding2D)       (None, 130, 130, 64) 0           conv1_conv[0][0]                 
______________________________________________________________________________________________

In [8]:
model.compile(optimizer='adam', # Rotina de otimização, que informa a ml como ajustar o valor dos parâmetros para minimizar o erro.
              loss='categorical_crossentropy', # Função de erro que diz o quão erradas estão nossas predições
              metrics=['accuracy']) # Lista de métricas para avaliar o nosso modelo

In [9]:
with open('model_da_resnet50v2.json', 'w') as fp:
    fp.write(model.to_json())

In [10]:
history = model.fit(
    gen_train, 
    epochs=100, 
    verbose=2, 
    validation_data=gen_test, 
    callbacks=[
    ModelCheckpoint(
        'model_da_resnet50v2.{epoch:03d}-{val_accuracy:.4f}.h5',
        monitor='val_accuracy',
        save_best_only=True,
        save_weights_only=True,
    ),
    TensorBoard(log_dir='logs_da_resnet50v2'),
])

Train for 43 steps, validate for 3 steps
Epoch 1/100
43/43 - 40s - loss: 1.4460 - accuracy: 0.3286 - val_loss: 4590.2284 - val_accuracy: 0.2500
Epoch 2/100
43/43 - 14s - loss: 1.2983 - accuracy: 0.4068 - val_loss: 4.8924 - val_accuracy: 0.2444
Epoch 3/100
43/43 - 19s - loss: 1.2097 - accuracy: 0.4499 - val_loss: 19.0616 - val_accuracy: 0.2500
Epoch 4/100
43/43 - 20s - loss: 1.1765 - accuracy: 0.4663 - val_loss: 31.6380 - val_accuracy: 0.2556
Epoch 5/100
43/43 - 19s - loss: 1.1482 - accuracy: 0.4802 - val_loss: 20.5391 - val_accuracy: 0.2500
Epoch 6/100
43/43 - 19s - loss: 1.1558 - accuracy: 0.4897 - val_loss: 38.5619 - val_accuracy: 0.2500
Epoch 7/100
43/43 - 21s - loss: 1.1157 - accuracy: 0.5045 - val_loss: 24.1424 - val_accuracy: 0.2500
Epoch 8/100
43/43 - 20s - loss: 1.0991 - accuracy: 0.5232 - val_loss: 10.8039 - val_accuracy: 0.2500
Epoch 9/100
43/43 - 21s - loss: 1.0901 - accuracy: 0.5172 - val_loss: 124.5974 - val_accuracy: 0.2500
Epoch 10/100
43/43 - 20s - loss: 1.0629 - accura

In [11]:
with open('train_da_history.json', 'wb') as fp:
    fp.write(orjson.dumps({k: np.array(v).tolist() for k, v in history.history.items()}))

In [13]:
history2 = model.fit(
    gen_train, 
    epochs=120, 
    initial_epoch=100,
    verbose=2, 
    validation_data=gen_test, 
    callbacks=[
    ModelCheckpoint(
        'model_da_resnet50v2.{epoch:03d}-{val_accuracy:.4f}.h5',
        monitor='val_accuracy',
        save_best_only=True,
        save_weights_only=True,
    ),
    TensorBoard(log_dir='logs_da_resnet50v2'),
])

Train for 43 steps, validate for 3 steps
Epoch 101/120
43/43 - 16s - loss: 0.4992 - accuracy: 0.8181 - val_loss: 1.6065 - val_accuracy: 0.4889
Epoch 102/120
43/43 - 16s - loss: 0.4208 - accuracy: 0.8421 - val_loss: 1.0885 - val_accuracy: 0.6361
Epoch 103/120
43/43 - 18s - loss: 0.3417 - accuracy: 0.8705 - val_loss: 0.8922 - val_accuracy: 0.7250
Epoch 104/120
43/43 - 19s - loss: 0.3717 - accuracy: 0.8533 - val_loss: 1.2079 - val_accuracy: 0.6306
Epoch 105/120
43/43 - 21s - loss: 0.4429 - accuracy: 0.8271 - val_loss: 2.2696 - val_accuracy: 0.5417
Epoch 106/120
43/43 - 21s - loss: 0.4449 - accuracy: 0.8338 - val_loss: 2.6457 - val_accuracy: 0.5333
Epoch 107/120
43/43 - 20s - loss: 0.3935 - accuracy: 0.8522 - val_loss: 1.7570 - val_accuracy: 0.5917
Epoch 108/120
43/43 - 20s - loss: 0.3576 - accuracy: 0.8600 - val_loss: 1.4980 - val_accuracy: 0.5917
Epoch 109/120
43/43 - 20s - loss: 0.3598 - accuracy: 0.8641 - val_loss: 1.1419 - val_accuracy: 0.6278
Epoch 110/120
43/43 - 20s - loss: 0.3111 

In [14]:
with open('train_da_history2.json', 'wb') as fp:
    fp.write(orjson.dumps({k: np.array(v).tolist() for k, v in history2.history.items()}))