In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os, cv2, re, random
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/dogs-vs-cats-redux-kernels-edition/sample_submission.csv
/kaggle/input/dogs-vs-cats-redux-kernels-edition/train.zip
/kaggle/input/dogs-vs-cats-redux-kernels-edition/test.zip


In [2]:
import zipfile
zip_df = zipfile.ZipFile("/kaggle/input/dogs-vs-cats-redux-kernels-edition/train.zip", 'r')
zip_df.extractall("/kaggle/working/")
zip_df.close()

In [3]:
zip_df = zipfile.ZipFile("/kaggle/input/dogs-vs-cats-redux-kernels-edition/test.zip", 'r')
zip_df.extractall("/kaggle/working/")
zip_df.close()

In [4]:
#  we need all the images to be at the same size as we don't want errors in the input shape
img_width = 150  
img_height = 150

TRAIN_DIR = './train/'  # here is the trianing directory to the folder where we have the images
TEST_DIR = './test/'  # here is the testing directory to the folder where we have the images

#this will return a list containing all the images inside the directory you passed.
# print(os.listdir(TRAIN_DIR))

# These two lines will make two lists one for training and one for testing each containing
# all the directory for every image.
train_images_dogs_cats = [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR)] 
test_images_dogs_cats = [TEST_DIR+i for i in os.listdir(TEST_DIR)]

In [5]:
import re

# Helper function to sort the image files based on the numeric value in each file name.
# you don't have to apply them and the code will work and everything will be fine
def atoi(text):
    return int(text) if text.isdigit() else text

def natural_keys(text):
    return [ atoi(c) for c in re.split('(\d+)', text) ]

# print(train_images_dogs_cats[:5])
train_images_dogs_cats.sort(key=natural_keys)

test_images_dogs_cats.sort(key=natural_keys)

In [6]:
# There are 25000 images for training and testing 12500 for dogs 12500 for cats.
train_images_dogs_cats = train_images_dogs_cats[0:1000] + train_images_dogs_cats[12500:13500] 
# valid_images_dogs_cats = train_images_dogs_cats[3000:4000] + train_images_dogs_cats[15500:16500]

In [7]:
# img = train_images_dogs_cats[0]
# print(img)
# img = cv2.imread(img)
# img
# # cv2.imshow('image', img)

In [8]:
def prepare_data(list_of_images):
    """
    Returns two arrays: 
    x is an array of resized images
    y is an array of labels
    """
    x = [] # images as arrays
    y = [] # labels

    # this will append each image in x list with resizing it.
    for image in list_of_images:
        x.append(cv2.resize(cv2.imread(image), (img_width,img_height), interpolation=cv2.INTER_CUBIC))
    
    # As we can't deal with categorical labels we need to change the labels to numerical such 0, 1
    # This code does this for us as it changes dog to 1, cat to 0
    for i in list_of_images:
        if 'dog' in i:
            y.append(1)
        elif 'cat' in i:
            y.append(0)

    return x, y

In [9]:
X, Y = prepare_data(train_images_dogs_cats)

In [10]:
from sklearn.model_selection import train_test_split

X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.3, shuffle=1)

In [11]:
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(
        rescale=1 / 255.0,
        rotation_range=20,
        zoom_range=0.3,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        horizontal_flip=True,
        fill_mode="nearest",
        validation_split=0.20
 )

val_datagen = ImageDataGenerator(
        rescale=1 / 255.0,
        rotation_range=20,
        zoom_range=0.3,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        horizontal_flip=True,
        fill_mode="nearest",
        validation_split=0.20
        )

In [12]:
train_generator = train_datagen.flow(np.array(X_train), Y_train, batch_size=10)

validation_generator = val_datagen.flow(np.array(X_val), Y_val, batch_size=10)

In [13]:
from tensorflow.keras.applications.inception_v3 import InceptionV3
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.optimizers import RMSprop

def create_inception_model():
    base_model = InceptionV3(input_shape = (150, 150, 3), include_top = False, weights = 'imagenet') 
    
    for layer in base_model.layers:
        layer.trainable = False
        
    x = layers.Flatten()(base_model.output)
    x = layers.Dense(512, activation='relu')(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(256, activation='relu')(x)
    # Add a final sigmoid layer with 1 node for classification output
    x = layers.Dense(1, activation='sigmoid')(x)
    model = tf.keras.models.Model(base_model.input, x)
    model.compile(optimizer = RMSprop(learning_rate=0.0001), loss = 'binary_crossentropy', metrics = ['accuracy'])
    return model

In [14]:
inception_model = create_inception_model()

2022-07-06 23:21:12.124138: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-06 23:21:12.310693: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-06 23:21:12.311606: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-06 23:21:12.313150: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


In [15]:
# inception_model.summary()

In [16]:
history = inception_model.fit(
            train_generator,
            steps_per_epoch=50,
            epochs=5,
            validation_data=validation_generator,
            validation_steps=30,
            verbose=2)

2022-07-06 23:21:17.585154: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/5


2022-07-06 23:21:22.810828: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


50/50 - 17s - loss: 1.0373 - accuracy: 0.7980 - val_loss: 0.1332 - val_accuracy: 0.9367
Epoch 2/5
50/50 - 4s - loss: 0.4877 - accuracy: 0.8600 - val_loss: 0.2819 - val_accuracy: 0.8967
Epoch 3/5
50/50 - 5s - loss: 0.4396 - accuracy: 0.8740 - val_loss: 0.5661 - val_accuracy: 0.8533
Epoch 4/5
50/50 - 4s - loss: 0.4626 - accuracy: 0.8780 - val_loss: 0.3163 - val_accuracy: 0.8900
Epoch 5/5
50/50 - 5s - loss: 0.4338 - accuracy: 0.8940 - val_loss: 0.2504 - val_accuracy: 0.9400


In [17]:
X_test, Y_test = prepare_data(test_images_dogs_cats)  # Y_test in this case will be []

In [18]:
test_datagen = ImageDataGenerator(rescale=1. / 255)

test_generator = val_datagen.flow(np.array(X_test), batch_size=30)

prediction_probabilities = inception_model.predict_generator(test_generator, verbose=1)


counter = range(1, len(test_images_dogs_cats) + 1)

solution = pd.DataFrame({"id": counter, "label":list(prediction_probabilities)})

cols = ['label']



for col in cols:
    solution[col] = solution[col].map(lambda x: str(x).lstrip('[').rstrip(']')).astype(float)



solution.to_csv("dogsVScats.csv", index = False)




