# Kütüphaneleri import ediyoruz

In [None]:
import numpy as np 
import pandas as pd # CSV dosyası okuma

from keras.preprocessing.image import ImageDataGenerator, load_img
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import random

import os

# Train Verisini Oluşturma

In [None]:
filenames = os.listdir("../input/train/train")
categories = []
for filename in filenames:
    category = filename.split('.')[0]
    if category == 'dog':
        categories.append(1)
    else:
        categories.append(0)

df = pd.DataFrame({
    'filename': filenames,
    'category': categories
})
df.head()

### Kategori Toplamını Görüntüleme

In [None]:
df['category'].value_counts().plot.bar()

# Örnek Bir Resim Görüntüleme

In [None]:
sample = random.choice(filenames)
image = load_img("../input/train/train/"+sample)
plt.imshow(image)

# Model Oluşturma

Incetipon'un Kerastaki versiyonunu (InceptionV3) import ediyoruz, epochs = 5 ayarlıyoruz, 512 gizli katman ekliyoruz ve son olarak da fazladan bir sigmoid "relu" aktivasyonlu bir katman ekliyoruz. 

In [None]:
from keras.models import Sequential
from keras import layers
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation,GlobalMaxPooling2D
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.applications import InceptionV3
from keras.models import Model

image_size = 224
input_shape = (image_size, image_size, 3)

epochs = 5
batch_size = 16

pre_trained_model = InceptionV3(input_shape=input_shape, include_top=False, weights="imagenet")
    
for layer in pre_trained_model.layers[:15]:
    layer.trainable = False

for layer in pre_trained_model.layers[15:]:
    layer.trainable = True
    
last_layer = pre_trained_model.get_layer('block5_pool')
last_output = last_layer.output
    
# Flatten the output layer to 1 dimension
x = GlobalMaxPooling2D()(last_output)
# 512 gizli birimli katman ekleme
x = Dense(512, activation='relu')(x)
# Dropout oranımız: 0.5
x = Dropout(0.5)(x)
# Sınıflandırma için sigmoid katman ekleme
x = layers.Dense(1, activation='sigmoid')(x)

model = Model(pre_trained_model.input, x)

model.compile(loss='binary_crossentropy',
              optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
              metrics=['accuracy'])

model.summary()

# Test ve Train Veri Kümelerini Hazırlama

Burada % 10 validasyon, % 10 test, % 80 train yapacağımız için validasyona %10 (0.1) verdik.

In [None]:
train_df, validate_df = train_test_split(df, test_size=0.1)
train_df = train_df.reset_index()
validate_df = validate_df.reset_index()

total_train = train_df.shape[0]
total_validate = validate_df.shape[0]

# Training Oluşturucu

Resim verilerini train etmek adına oluşturduk ve bunu tensorflowun "flow_from_dataframe" metoduyla train oluşturucu olarak kullanmak istediğimiz formata dönüştürdük.

In [None]:
train_datagen = ImageDataGenerator(
    rotation_range=15,
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    width_shift_range=0.1,
    height_shift_range=0.1
)

train_generator = train_datagen.flow_from_dataframe(
    train_df, 
    "../input/train/train/", 
    x_col='filename',
    y_col='category',
    class_mode='binary',
    target_size=(image_size, image_size),
    batch_size=batch_size
)

# Validasyon Oluşturucu

Yukarıdaki işlemin aynısını validasyon için yapıyoruz fakat buradaki datagen değişkenimizin parametrelerini yukarıdaki gibi sınırlandırmamıza gerek yok çünkü validasyon sadece %10'luk bir dilimi kapsadığından performans kaybı göz ardı edilebilir, yani parametre sınırlandırmamıza gerek yoktur.

In [None]:
validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_dataframe(
    validate_df, 
    "../input/train/train/", 
    x_col='filename',
    y_col='category',
    class_mode='binary',
    target_size=(image_size, image_size),
    batch_size=batch_size
)

# Oluşturulan örnek resimleri görme

In [None]:
example_df = train_df.sample(n=1).reset_index(drop=True)
example_generator = train_datagen.flow_from_dataframe(
    example_df, 
    "../input/train/train/", 
    x_col='filename',
    y_col='category',
    class_mode='binary'
)
plt.figure(figsize=(12, 12))
for i in range(0, 9):
    plt.subplot(3, 3, i+1)
    for X_batch, Y_batch in example_generator:
        image = X_batch[0]
        plt.imshow(image)
        break
plt.tight_layout()
plt.show()

# Modeli Fit etme (Uyarlama)

In [None]:
# fine-tune işlemleri
history = model.fit_generator(
    train_generator,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=total_validate//batch_size,
    steps_per_epoch=total_train//batch_size)

In [None]:
loss, accuracy = model.evaluate_generator(validation_generator, total_validate//batch_size, workers=12)
print("Test: accuracy = %f  ;  loss = %f " % (accuracy, loss))

In [None]:
def plot_model_history(model_history, acc='acc', val_acc='val_acc'):
    fig, axs = plt.subplots(1,2,figsize=(15,5))
    axs[0].plot(range(1,len(model_history.history[acc])+1),model_history.history[acc])
    axs[0].plot(range(1,len(model_history.history[val_acc])+1),model_history.history[val_acc])
    axs[0].set_title('Model Accuracy')
    axs[0].set_ylabel('Accuracy')
    axs[0].set_xlabel('Epoch')
    axs[0].set_xticks(np.arange(1,len(model_history.history[acc])+1),len(model_history.history[acc])/10)
    axs[0].legend(['train', 'val'], loc='best')
    axs[1].plot(range(1,len(model_history.history['loss'])+1),model_history.history['loss'])
    axs[1].plot(range(1,len(model_history.history['val_loss'])+1),model_history.history['val_loss'])
    axs[1].set_title('Model Loss')
    axs[1].set_ylabel('Loss')
    axs[1].set_xlabel('Epoch')
    axs[1].set_xticks(np.arange(1,len(model_history.history['loss'])+1),len(model_history.history['loss'])/10)
    axs[1].legend(['train', 'val'], loc='best')
    plt.show()
    
plot_model_history(history)

In [None]:
Y_val = validate_df['category']
y_pred =  model.predict_generator(validation_generator)

In [None]:
threshold = 0.5
y_final = np.where(y_pred > threshold, 1,0)

In [None]:
y_final.size

In [None]:
import seaborn as sns
from sklearn.metrics import confusion_matrix
# Validasyon kümesinden tahmin yap

# confusion matrix hesabı
confusion_mtx = confusion_matrix(Y_val, y_final) 
# confusion matrix görselleştirme
f,ax = plt.subplots(figsize=(8, 8))
sns.heatmap(confusion_mtx, annot=True, linewidths=0.01,cmap="Greens",linecolor="gray", fmt= '.1f',ax=ax)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()

In [None]:
from sklearn.metrics import classification_report

# Sınıflandırma raporu oluştur
report = classification_report(Y_val, y_final, target_names=['0','1'])

print(report)

# Test Verisini Hazırlama

In [None]:
test_filenames = os.listdir("../input/test1/test1")
test_df = pd.DataFrame({
    'filename': test_filenames
})
nb_samples = test_df.shape[0]

# Test oluşturucu işlemleri

In [None]:
test_gen = ImageDataGenerator(rescale=1./255)
test_generator = test_gen.flow_from_dataframe(
    test_df, 
    "../input/test1/test1/", 
    x_col='filename',
    y_col=None,
    class_mode=None,
    batch_size=batch_size,
    target_size=(image_size, image_size),
    shuffle=False
)

# Tahmin

In [None]:
predict = model.predict_generator(test_generator, steps=np.ceil(nb_samples/batch_size))
threshold = 0.5
test_df['category'] = np.where(predict > threshold, 1,0)

# Tahmin edilmiş sonucu görelim

In [None]:
sample_test = test_df.sample(n=9).reset_index()
sample_test.head()
plt.figure(figsize=(12, 12))
for index, row in sample_test.iterrows():
    filename = row['filename']
    category = row['category']
    img = load_img("../input/test1/test1/"+filename, target_size=(256, 256))
    plt.subplot(3, 3, index+1)
    plt.imshow(img)
    plt.xlabel(filename + '(' + "{}".format(category) + ')')
plt.tight_layout()
plt.show()

# Sonuç

Sonuç olarak 1 saat 23 dakikada model Google Colab üzerinde train edilmiş ve %87 accuracy değerine ulaşılmıştır.

<font color=yellow>100/100 [==============================] - ETA: 0s - loss: nan - accuracy: 0.8732  WARNING:tensorflow:Early stopping conditioned on metric `val_acc` which is not available. Available metrics are: loss,accuracy,val_loss,val_accuracy</font>

In [None]:
submission_df = test_df.copy()
submission_df['id'] = submission_df['filename'].str.split('.').str[0]
submission_df['label'] = submission_df['category']
submission_df.drop(['filename', 'category'], axis=1, inplace=True)
submission_df.to_csv('submission_13010030.csv', index=False)

plt.figure(figsize=(10,5))
sns.countplot(submission_df['label'])
plt.title("(Test data)")

# SHAP ile açıklama

SHAP'ın PermutationExplainer metoduyla resimleri sınıflandırmadaki hataları görselleştirme işlemleri yapılmıştır.

In [None]:
masker = shap.maskers.Image("blur(28,28)", X_train[0].shape)

explainer = shap.PermutationExplainer(model, masker, output_names=class_labels)

In [None]:
Y_preds = model.predict(X_test[:4])

Y_preds = Y_preds.argsort()[:, ::-1]
Y_labels = [[class_labels[val] for val in row] for row in Y_preds]
Y_labels=np.array(Y_labels)

In [None]:
shap.image_plot(shap_values, labels=Y_labels)
shap.image_plot(shap_values[:,:,:,:,:5], labels=Y_labels[:,:5])

# LIME ile Açıklama

Aşağıda Colab'ın CUDA özelliğinden faydalanarak LIME ile labelleri (etiketleri) açıklama işlemleri yapılmıştır.

In [None]:
def batch_predict(images):
    device = "cuda"
    model.eval()
    model.to(device)
    batch = torch.stack([transform(img.to_pil()) for img in images])
    batch = batch.to(device)
    logits = model(batch)
    probs = torch.nn.functional.softmax(logits, dim=1)
    return probs.detach().cpu().numpy()

explainer = LimeImage(predict_function=batch_predict)
explanations = explainer.explain(img, hide_color=0, num_samples=1000)
explanations.ipython_plot(index=0, class_names=idx2label)