In [1]:
%matplotlib inline

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys, cv2, os, shutil, random, string

In [3]:
import keras, h5py
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.optimizers import SGD
from keras.utils import plot_model
from keras.callbacks import TensorBoard

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [4]:
class NeuClassificationSurfaceDefects:
    source_data_dir = 'sources/NEU-CLS'
    formatted_data = 'neu-data'
    
    train_dir = os.path.join(formatted_data, 'train')
    val_dir = os.path.join(formatted_data, 'val')
    test_dir = os.path.join(formatted_data, 'test')
    
    test_data_portion = 0.15
    val_data_portion = 0.15
    
    classes = ['rolled-in scale', 'patches', 'crazing', 'pitted surface', 'inclusion', 'scratches']
    short_classes = ['RS', 'Pa', 'Cr','PS', 'In', 'Sc']
    nb_images = 300
    
    img_width, img_height = 200, 200
    
    input_shape = (img_width, img_height, 3)
    total_classes = len(classes)
    
    nb_train_samples = 1254
    nb_validation_samples = 270    
    nb_test_samples = 276
    
    @classmethod
    def get_generators(cls, batch_size, use_samplewise=True):
        datagen = ImageDataGenerator(
            samplewise_center=use_samplewise, 
            samplewise_std_normalization=use_samplewise,
            featurewise_center=not use_samplewise,
            featurewise_std_normalization=not use_samplewise,
            rotation_range=40,
            width_shift_range=0.2,
            height_shift_range=0.2,
            rescale=1. / 255,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
            fill_mode='nearest')
        train_generator = datagen.flow_from_directory(
            cls.train_dir,
            target_size=(cls.img_width, cls.img_height),
            batch_size=batch_size,
            class_mode='categorical')
        val_generator = datagen.flow_from_directory(
            cls.val_dir,
            target_size=(cls.img_width, cls.img_height),
            batch_size=batch_size,
            class_mode='categorical')
        test_generator = datagen.flow_from_directory(
            cls.test_dir,
            target_size=(cls.img_width, cls.img_height),
            batch_size=batch_size,
            class_mode='categorical')
        return train_generator, val_generator, test_generator
    
    @classmethod
    def prepare_data(cls):
        
        def create_directory(dir_name):
            if os.path.exists(dir_name):
                shutil.rmtree(dir_name)
            os.makedirs(dir_name)
            for c in cls.classes: 
                os.makedirs(os.path.join(dir_name, c))
            print('Directory "' + dir_name + '" was created.')
        
        create_directory(cls.train_dir)
        create_directory(cls.val_dir)
        create_directory(cls.test_dir)
    
        def copy_images(start_index, end_index, source_dir, dest_dir):
            for i in range(start_index, end_index):
                for j in range(len(cls.classes)):
                    shutil.copy2(os.path.join(source_dir, cls.short_classes[j] + "_" + str(i) + ".bmp"), 
                                 os.path.join(dest_dir, cls.classes[j]))
                    
        start_val_data_idx = int(cls.nb_images * (1 - cls.val_data_portion - cls.test_data_portion))
        start_test_data_idx = int(cls.nb_images * (1 - cls.test_data_portion))
        print("Train[{}, {}], Val[{}, {}], Test[{}, {}]".format(1, start_val_data_idx - 1, start_val_data_idx, start_test_data_idx - 1, start_test_data_idx, cls.nb_images ))
        copy_images(1, start_val_data_idx, cls.source_data_dir, cls.train_dir)
        print("Train data prepared")
        copy_images(start_val_data_idx, start_test_data_idx, cls.source_data_dir, cls.val_dir)
        print("Val data prepared")
        copy_images(start_test_data_idx, cls.nb_images + 1, cls.source_data_dir, cls.test_dir)
        print("Test data prepared")

Запускать при изменении параметров в `NeuClassificationSurfaceDefects`

In [5]:
# NeuClassificationSurfaceDefects.prepare_data()

In [6]:
network_name = 'Network-{}'.format(''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10)))
model_dir = os.path.join('models', network_name)
model_file_path = os.path.join(model_dir, '{}.model'.format(network_name))
os.makedirs(model_dir)

### Основные параметры

In [7]:
batch_size = 8
epochs = 1000
steps_per_epoch = NeuClassificationSurfaceDefects.nb_train_samples // batch_size
validation_steps = NeuClassificationSurfaceDefects.nb_validation_samples // batch_size
input_shape = NeuClassificationSurfaceDefects.input_shape

При передаче параметра `use_samplewise=False`, будет использована featurewise-нормализация.

In [8]:
train_generator, val_generator, test_generator = NeuClassificationSurfaceDefects.get_generators(batch_size)

Found 1254 images belonging to 6 classes.
Found 270 images belonging to 6 classes.
Found 276 images belonging to 6 classes.


### Callbacks

In [9]:
check_point = keras.callbacks.ModelCheckpoint(model_file_path, 
                                              monitor='val_acc', verbose=1,
                                              save_best_only=True,
                                              save_weights_only=False,
                                              mode='auto', 
                                              period=1)
early_stopping = keras.callbacks.EarlyStopping(monitor='val_acc', 
                                               min_delta=0, 
                                               patience=10, 
                                               verbose=1, 
                                               mode='auto')
tb_callback = TensorBoard(log_dir=os.path.join(model_dir,'Graph'),
                          histogram_freq=0,
                          write_graph=True,
                          write_images=True)

### Архитектура сети

In [10]:
model = Sequential()

model.add(Conv2D(128, (3, 3), input_shape=input_shape, padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(6))
model.add(Activation('softmax'))


In [11]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [12]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 200, 200, 128)     3584      
_________________________________________________________________
activation_1 (Activation)    (None, 200, 200, 128)     0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 100, 100, 128)     0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 100, 100, 32)      36896     
_________________________________________________________________
activation_2 (Activation)    (None, 100, 100, 32)      0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 50, 50, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 50, 50, 64)        18496     
__________

In [13]:
model.fit_generator(
        train_generator,
        steps_per_epoch=steps_per_epoch,
        epochs=epochs,
        validation_data=val_generator,
        validation_steps=validation_steps,
        verbose=1, 
        callbacks=[check_point, 
                   early_stopping, 
                   tb_callback])

Epoch 1/1000

Epoch 00001: val_acc improved from -inf to 0.43182, saving model to models\Network-3OP0MWPAY7\Network-3OP0MWPAY7.model
Epoch 2/1000

Epoch 00002: val_acc improved from 0.43182 to 0.57955, saving model to models\Network-3OP0MWPAY7\Network-3OP0MWPAY7.model
Epoch 3/1000

Epoch 00003: val_acc did not improve
Epoch 4/1000

Epoch 00004: val_acc improved from 0.57955 to 0.71212, saving model to models\Network-3OP0MWPAY7\Network-3OP0MWPAY7.model
Epoch 5/1000

Epoch 00005: val_acc did not improve
Epoch 6/1000

Epoch 00006: val_acc did not improve
Epoch 7/1000

Epoch 00007: val_acc improved from 0.71212 to 0.73106, saving model to models\Network-3OP0MWPAY7\Network-3OP0MWPAY7.model
Epoch 8/1000

Epoch 00008: val_acc improved from 0.73106 to 0.79924, saving model to models\Network-3OP0MWPAY7\Network-3OP0MWPAY7.model
Epoch 9/1000

Epoch 00009: val_acc did not improve
Epoch 10/1000

Epoch 00010: val_acc did not improve
Epoch 11/1000

Epoch 00011: val_acc did not improve
Epoch 12/1000




Epoch 00042: val_acc did not improve
Epoch 43/1000

Epoch 00043: val_acc did not improve
Epoch 44/1000

Epoch 00044: val_acc did not improve
Epoch 45/1000

Epoch 00045: val_acc improved from 0.92045 to 0.94318, saving model to models\Network-3OP0MWPAY7\Network-3OP0MWPAY7.model
Epoch 46/1000

Epoch 00046: val_acc did not improve
Epoch 47/1000

Epoch 00047: val_acc did not improve
Epoch 48/1000

Epoch 00048: val_acc did not improve
Epoch 49/1000

Epoch 00049: val_acc did not improve
Epoch 50/1000

Epoch 00050: val_acc did not improve
Epoch 51/1000

Epoch 00051: val_acc did not improve
Epoch 52/1000

Epoch 00052: val_acc did not improve
Epoch 53/1000

Epoch 00053: val_acc did not improve
Epoch 54/1000

Epoch 00054: val_acc did not improve
Epoch 55/1000

Epoch 00055: val_acc did not improve
Epoch 00055: early stopping


<keras.callbacks.History at 0x26a78bb99e8>

In [14]:
model = keras.models.load_model(model_file_path)
scores = model.evaluate_generator(test_generator, NeuClassificationSurfaceDefects.nb_test_samples // batch_size)
acc = scores[1]*100
print("Точность на тестовых данных: %.2f%%" % (acc))


Точность на тестовых данных: 95.22%


In [15]:
with open(os.path.join(model_dir, 'README.md'), "w") as fh:
    print("Accuracy:  %.2f%%\n" % (acc), file=fh)
    model.summary(print_fn=lambda x: fh.write(x + '\n'))

In [16]:
with open('README.md', 'a') as fh:
    print('| %s | %.2f%% | [models/%s/%s.model](https://github.com/Alkapov/Steel-Surface-NN/tree/master/models/%s/) |\n' % (network_name, acc,  network_name, network_name, network_name), file=fh)