In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import random
import os


### Подключаем plaidml для видеокарт AMD

In [2]:
os.environ["KERAS_BACKEND"] = "plaidml.keras.backend"
import plaidml.keras
plaidml.keras.install_backend()

In [3]:
import keras
import keras.applications as kapp


In [5]:
print(os.listdir("C:/Users/admin/Google Диск/Colab Notebooks/"))

['004-regression.ipynb', '006-dogs-vs-cats.ipynb', '007-detection (1).ipynb', '007-detection (2).ipynb', '007-detection.ipynb', '007.hdf5', '007new.hdf5', '007_3.hdf5', 'c-d-1.hdf5', 'cats-dogs-vgg16_2.hdf5', 'cats-dogs-vgg16_4.hdf5', 'cats-dogs-vgg16_5.hdf5', 'cats-dogs-vgg16_6.hdf5', 'cnn_predictions.csv', 'Copy of 006-dogs-vs-cats (1).ipynb', 'Copy of 006-dogs-vs-cats.ipynb', 'Copy of Untitled0 (1).ipynb', 'Copy of Untitled0.ipynb', 'd and c.ipynb', 'data', 'sample_submission_stg1.csv', 'submit2.txt', 'submit3.txt', 'submit4.txt', 'submit5.txt', 'submit6.txt', 'test', 'test_stg1', 'train', 'Untitled0.ipynb']


In [6]:
filenames = os.listdir("C:/Users/admin/Google Диск/Colab Notebooks/train")
print(len(filenames))

25000


#### Создаем датафрейм с данныеми

In [7]:

categories = []
for filename in filenames:
    category = filename.split('.')[0]
    if category == 'dog':
        categories.append("1")
    else:
        categories.append("0")

df = pd.DataFrame({
    'filename': filenames,
    'category': categories
})
df.head()

Unnamed: 0,filename,category
0,cat.0.jpg,0
1,cat.1.jpg,0
2,cat.10.jpg,0
3,cat.100.jpg,0
4,cat.1000.jpg,0


In [8]:
from keras.models import Sequential
from keras import layers
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation,GlobalMaxPooling2D
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
#from keras.applications import VGG16
from keras.models import Model


### Подключаем VGG16 предобученую на imagenet

In [21]:
image_size = 224
input_shape = (image_size, image_size, 3)

epochs = 5
batch_size = 16

pre_trained_model = kapp.VGG16(input_shape=input_shape, include_top=False, weights="imagenet")

In [22]:
for layer in pre_trained_model.layers[:15]:
    layer.trainable = False

for layer in pre_trained_model.layers[15:]:
    layer.trainable = True
    
last_layer = pre_trained_model.get_layer('block5_pool')
last_output = last_layer.output
    

### Добовляем слои

In [23]:
# Flatten the output layer to 1 dimension
x = GlobalMaxPooling2D()(last_output)
# Add a fully connected layer with 512 hidden units and ReLU activation
x = Dense(512, activation='relu')(x)
# Add a dropout rate of 0.5
x = Dropout(0.5)(x)
# Add a final sigmoid layer for classification
x = layers.Dense(1, activation='sigmoid')(x)

model = Model(pre_trained_model.input, x)

model.compile(loss='binary_crossentropy',
              optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
              metrics=['accuracy'])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

### Делим выборку

In [24]:
train_df, validate_df = train_test_split(df, test_size=0.1)
train_df = train_df.reset_index()
validate_df = validate_df.reset_index()

# validate_df = validate_df.sample(n=100).reset_index() # use for fast testing code purpose
# train_df = train_df.sample(n=1800).reset_index() # use for fast testing code purpose

total_train = train_df.shape[0]
total_validate = validate_df.shape[0]

### Создаем генераторы

In [25]:
train_datagen = ImageDataGenerator(
    rotation_range=15,
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    width_shift_range=0.1,
    height_shift_range=0.1
)



### Генеротор обучающей выборки

In [26]:
train_generator = train_datagen.flow_from_dataframe(
    train_df, 
    "C:/Users/admin/Google Диск/Colab Notebooks/train/",
    x_col="filename",
    y_col="category",
    class_mode='binary',
    target_size=(image_size, image_size),
    batch_size=batch_size
    )

Found 22500 validated image filenames belonging to 2 classes.


### Генеротор валидационной выборки

In [27]:
validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_dataframe(
    validate_df, 
    "C:/Users/admin/Google Диск/Colab Notebooks/train/", 
    x_col='filename',
    y_col='category',
    class_mode='binary',
    target_size=(image_size, image_size),
    batch_size=batch_size
)

Found 2500 validated image filenames belonging to 2 classes.


### Обучаем модель

In [28]:
history = model.fit_generator(
    train_generator,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=total_validate//batch_size,
    steps_per_epoch=total_train//batch_size)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [34]:
loss, accuracy = model.evaluate_generator(validation_generator, total_validate//batch_size, workers=12)
print("Test: accuracy = %f  ;  loss = %f " % (accuracy, loss))

Test: accuracy = 0.967147  ;  loss = 0.083347 


In [29]:
model.save('C:/Users/admin/Google Диск/Colab Notebooks/c-d-2.hdf5')

### Создаем датафрейм тестовых данных

In [30]:
test_filenames = os.listdir("C:/Users/admin/Google Диск/Colab Notebooks/test")
test_df = pd.DataFrame({
    'filename': test_filenames
})
nb_samples = test_df.shape[0]

### Генератор тестовых данных

In [31]:
test_gen = ImageDataGenerator(rescale=1./255)
test_generator = test_gen.flow_from_dataframe(
    test_df, 
    "C:/Users/admin/Google Диск/Colab Notebooks/test/", 
    x_col='filename',
    y_col=None,
    class_mode=None,
    batch_size=batch_size,
    target_size=(image_size, image_size),
    shuffle=False
)

Found 12500 validated image filenames.


### Предсказание

In [32]:
predict = model.predict_generator(test_generator, steps=np.ceil(nb_samples/batch_size))
threshold = 0.5
test_df['category'] = np.where(predict > threshold, 1,0)

### Записываем в самбит

In [33]:
submission_df = test_df.copy()
submission_df['id'] = submission_df['filename'].str.split('.').str[0]
submission_df['label'] = submission_df['category']
submission_df.drop(['filename', 'category'], axis=1, inplace=True)
submission_df.to_csv('C:/Users/admin/Google Диск/Colab Notebooks/submission.csv', index=False)