In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint
from tensorflow.keras.applications.resnet_v2 import ResNet50V2
from tensorflow.keras.utils import Sequence
import pickle
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, Input, GlobalAveragePooling2D, Flatten
import orjson
from threading import Thread, Lock
from queue import Queue

import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [2]:
with open('../dataset_test_files.pkl', 'rb') as fp:
    df_test = pickle.load(fp)
df_test.head()

Unnamed: 0,filename,kind,defect,size,class_name,class
3,D:\desenvolvimento\datasets\TRAIN1200 -1200_CO...,R,S,G,RS,3
4,D:\desenvolvimento\datasets\TRAIN1200 -1200_CO...,R,S,G,RS,3
11,D:\desenvolvimento\datasets\TRAIN1200 -1200_CO...,R,S,G,RS,3
15,D:\desenvolvimento\datasets\TRAIN1200 -1200_CO...,R,S,G,RS,3
20,D:\desenvolvimento\datasets\TRAIN1200 -1200_CO...,R,S,G,RS,3


In [3]:
with open('../dataset_train_files.pkl', 'rb') as fp:
    df_train = pickle.load(fp)
df_train.head()

Unnamed: 0,filename,kind,defect,size,class_name,class
0,D:\desenvolvimento\datasets\TRAIN1200 -1200_CO...,R,S,G,RS,3
1,D:\desenvolvimento\datasets\TRAIN1200 -1200_CO...,R,S,G,RS,3
2,D:\desenvolvimento\datasets\TRAIN1200 -1200_CO...,R,S,M,RS,3
5,D:\desenvolvimento\datasets\TRAIN1200 -1200_CO...,R,S,M,RS,3
6,D:\desenvolvimento\datasets\TRAIN1200 -1200_CO...,R,S,G,RS,3


In [6]:
class ImageGeneratorSplited(Sequence):
    def __init__(self, df, y_col='class_name', batch_size=64, split=0.5, queue_size=100, **kwargs):
        self.gen_da = ImageDataGenerator(**kwargs)
        self.gen_raw = ImageDataGenerator()
        self.length = int(len(df.index) / (batch_size * (1 - split)))
        self.batch_size = batch_size
        self.batch_da = int(batch_size * split)
        self.flow_da = self.gen_da.flow_from_dataframe(df, y_col=y_col, batch_size=self.batch_da)
        self.batch_raw = self.batch_size - self.batch_da
        self.flow_raw = self.gen_raw.flow_from_dataframe(df, y_col=y_col, batch_size=self.batch_raw)
        temp = next(self.flow_raw)
        self.shape = (self.length, *temp[0].shape[1:])
        self._next_da = Queue(queue_size)
        self._next_raw = Queue(queue_size)
        self._next_da_thread = self.prepare_da()
        self._next_raw_thread = self.prepare_raw()
        
    def __len__(self):
        return self.length
        
    def get_next_da(self):
        return self._next_da.get()
    
    def get_next_raw(self):
        return self._next_raw.get()
    
    def _prepare_next_da(self):
        while True:
            self._next_da.put(next(self.flow_da))
    
    def _prepare_next_raw(self):
        while True:
            self._next_raw.put(next(self.flow_raw))
            
    def prepare_da(self):
        t = Thread(target=self._prepare_next_da)
        t.start()
        return t
    
    def prepare_raw(self):
        t = Thread(target=self._prepare_next_raw)
        t.start()
        return t
    
    
    def __iter__(self):
        for _ in range(len(self)):
            yield self[None]

    def __getitem__(self, item):
        x_raw, y_raw = self.get_next_raw()
        x_da, y_da = self.get_next_da()
        return np.vstack((x_raw, x_da)), np.vstack((y_raw, y_da))
    

In [7]:
SPLIT = 0.7
gen_train = ImageGeneratorSplited(df_train,
    rotation_range=90,
    width_shift_range=0.2,
    height_shift_range=0.2,
    # zoom_range=[1, 3],
    horizontal_flip=True,
    vertical_flip=True,
    split=SPLIT
)
print(gen_train)
gen_test = ImageGeneratorSplited(df_test, split=SPLIT)
gen_test

Found 840 validated image filenames belonging to 4 classes.
Found 840 validated image filenames belonging to 4 classes.
<__main__.ImageGeneratorSplited object at 0x000001F3DCAD5408>
Found 360 validated image filenames belonging to 4 classes.
Found 360 validated image filenames belonging to 4 classes.


<__main__.ImageGeneratorSplited at 0x1f3dcab5ec8>

In [8]:
input_tensor = Input(shape=(256, 256, 3))
base_model = ResNet50V2(False, None, input_tensor=input_tensor)
base_model.summary()

Model: "resnet50v2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 256, 256, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 262, 262, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 128, 128, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
pool1_pad (ZeroPadding2D)       (None, 130, 130, 64) 0           conv1_conv[0][0]                 
_________________________________________________________________________________________

In [9]:
x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
# and a logistic layer -- let's say we have 4 classes
predictions = Dense(4, activation='softmax')(x)

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 256, 256, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 262, 262, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 128, 128, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
pool1_pad (ZeroPadding2D)       (None, 130, 130, 64) 0           conv1_conv[0][0]                 
______________________________________________________________________________________________

In [10]:
model.compile(optimizer='adam', # Rotina de otimização, que informa a ml como ajustar o valor dos parâmetros para minimizar o erro.
              loss='categorical_crossentropy', # Função de erro que diz o quão erradas estão nossas predições
              metrics=['accuracy']) # Lista de métricas para avaliar o nosso modelo

In [11]:
with open('model_da_resnet50v2.json', 'w') as fp:
    fp.write(model.to_json())

In [12]:
history = model.fit(
    gen_train, 
    epochs=100, 
    verbose=2, 
    validation_data=gen_test, 
    callbacks=[
    ModelCheckpoint(
        'model_da_resnet50v2.{epoch:03d}-{val_accuracy:.4f}.h5',
        monitor='val_accuracy',
        save_best_only=True,
        save_weights_only=True,
    ),
    TensorBoard(log_dir='logs_da_resnet50v2'),
])

Train for 43 steps, validate for 18 steps
Epoch 1/100
43/43 - 40s - loss: 1.5183 - accuracy: 0.3151 - val_loss: 29857.3570 - val_accuracy: 0.2435
Epoch 2/100
43/43 - 16s - loss: 1.3064 - accuracy: 0.3945 - val_loss: 590.5282 - val_accuracy: 0.2444
Epoch 3/100
43/43 - 21s - loss: 1.2705 - accuracy: 0.4031 - val_loss: 224.0049 - val_accuracy: 0.2519
Epoch 4/100
43/43 - 21s - loss: 1.2484 - accuracy: 0.4113 - val_loss: 99.4598 - val_accuracy: 0.2537
Epoch 5/100
43/43 - 21s - loss: 1.2018 - accuracy: 0.4476 - val_loss: 5.7814 - val_accuracy: 0.2120
Epoch 6/100
43/43 - 21s - loss: 1.1479 - accuracy: 0.4829 - val_loss: 33.5133 - val_accuracy: 0.2565
Epoch 7/100
43/43 - 21s - loss: 1.1385 - accuracy: 0.4914 - val_loss: 15.5099 - val_accuracy: 0.2231
Epoch 8/100
43/43 - 21s - loss: 1.1727 - accuracy: 0.4790 - val_loss: 5.6113 - val_accuracy: 0.3630
Epoch 9/100
43/43 - 20s - loss: 1.0826 - accuracy: 0.5397 - val_loss: 2.2462 - val_accuracy: 0.3213
Epoch 10/100
43/43 - 21s - loss: 1.0820 - accur

In [13]:
with open('train_da_history.json', 'wb') as fp:
    fp.write(orjson.dumps({k: np.array(v).tolist() for k, v in history.history.items()}))

In [14]:
history2 = model.fit(
    gen_train, 
    epochs=100, 
    verbose=2, 
    validation_data=gen_test, 
    callbacks=[
    ModelCheckpoint(
        'model_da_resnet50v2.{epoch:03d}-{val_accuracy:.4f}.h5',
        monitor='val_accuracy',
        save_best_only=True,
        save_weights_only=True,
    ),
    TensorBoard(log_dir='logs_da_resnet50v2'),
])

Train for 43 steps, validate for 18 steps
Epoch 1/100
43/43 - 15s - loss: 0.3276 - accuracy: 0.8784 - val_loss: 2.2194 - val_accuracy: 0.5611
Epoch 2/100
43/43 - 15s - loss: 0.3461 - accuracy: 0.8705 - val_loss: 1.2041 - val_accuracy: 0.5963
Epoch 3/100
43/43 - 20s - loss: 0.3516 - accuracy: 0.8683 - val_loss: 1.9452 - val_accuracy: 0.4296
Epoch 4/100
43/43 - 21s - loss: 0.3264 - accuracy: 0.8689 - val_loss: 1.1884 - val_accuracy: 0.6389
Epoch 5/100
43/43 - 21s - loss: 0.3412 - accuracy: 0.8653 - val_loss: 1.0302 - val_accuracy: 0.6833
Epoch 6/100
43/43 - 21s - loss: 0.3014 - accuracy: 0.8896 - val_loss: 0.9010 - val_accuracy: 0.6704
Epoch 7/100
43/43 - 21s - loss: 0.2880 - accuracy: 0.8903 - val_loss: 1.1642 - val_accuracy: 0.6565
Epoch 8/100
43/43 - 22s - loss: 0.2641 - accuracy: 0.9001 - val_loss: 1.8715 - val_accuracy: 0.4630
Epoch 9/100
43/43 - 20s - loss: 0.2783 - accuracy: 0.8990 - val_loss: 0.7289 - val_accuracy: 0.7491
Epoch 10/100
43/43 - 21s - loss: 0.2533 - accuracy: 0.9091

In [15]:
with open('train_da_history2.json', 'wb') as fp:
    fp.write(orjson.dumps({k: np.array(v).tolist() for k, v in history2.history.items()}))