In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import shutil
import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# To Extract the Zip Files ( Unpack )

In [None]:
path = '/kaggle/content'
# Extract files
import zipfile
with zipfile.ZipFile('/kaggle/input/dogs-vs-cats/train.zip', 'r') as zipf:
    zipf.extractall(path)

In [None]:


train_path = path + '/train'
train_dog_data = train_path + '/dog'
train_cat_data = train_path + '/cat'

valid_path = path + '/valid'
valid_dog_data = valid_path + '/dog'
valid_cat_data = valid_path + '/cat'

In [None]:
print(len(os.listdir(train_path)))
#print(len(os.listdir(valid_path)))         ## as we see , there is an error because there is not valid .

In [None]:
#print(os.listdir(train_path))   # it has all the images named like ( 'cat.2914.jpg',)

## Split cats and dogs images to ( train and valid datasets) 

In [None]:
from sklearn.model_selection import train_test_split
# Split cats and dogs images to train and valid datasets
img_filenames = os.listdir(train_path)
print('Num of images:', len(img_filenames))

dog_filenames = [fn for fn in img_filenames if fn.startswith('dog')]
cat_filenames = [fn for fn in img_filenames if fn.startswith('cat')]

dataset_filenames = train_test_split(
    dog_filenames, cat_filenames, test_size=0.1, shuffle=True, random_state=42
)

train_dog_total, valid_dog_total, train_cat_total, valid_cat_total = [len(fns) for fns in dataset_filenames]
train_total = train_dog_total + train_cat_total
valid_total = valid_dog_total + valid_cat_total
print('Train: {}, test: {}'.format(train_total, valid_total))

## After Dividing the Data , we need to put it in folders 

In [None]:
# Move images
make_dirs = [train_dog_data, valid_dog_data, train_cat_data, valid_cat_data]
for dir, fns in zip(make_dirs, dataset_filenames):
    os.makedirs(dir, exist_ok=True)
    for fn in tqdm.tqdm(fns):
        shutil.move(os.path.join(train_path, fn), dir)
    print('elements in {}: {}'.format(dir, len(os.listdir(dir))))

In [None]:
img_size = 224
batch_size= 64

## **Preprocessing¶**
decode images \
rescale image layers from [0..255] to [0,1]

In [None]:
train_gen = ImageDataGenerator(                       # For data augmantention + loading  
        rescale=1./255,
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        shear_range=0.2,
        fill_mode="nearest")            # repeating the last pixel (row) 

In [None]:
valid_gen = ImageDataGenerator(rescale=1./255)

In [None]:
train_set = train_gen.flow_from_directory(
        directory = train_path ,            # Training Data path 
        batch_size=batch_size,
        target_size=(img_size,img_size),
        class_mode="binary")

valid_set = valid_gen.flow_from_directory(
            directory=valid_path,
             batch_size=batch_size,
             target_size=(img_size,img_size),
            class_mode="binary")

## Visulize some images to make sure that you read the data correctly 

In [None]:
# show 15 images
some_pets = next(train_set)[0][:15]        # next  : means the next Batch 
fig, axes = plt.subplots(3, 5, figsize=(20, 20))
for img, ax in zip(some_pets, axes.flatten()):
    ax.imshow(img)
plt.tight_layout()
plt.show()

## Visualize Data Augmantation :

In [None]:
import matplotlib.pyplot as plt

def show_img(image):
    fig,axes = plt.subplots( 1,5 , figsize=(20,20) )
    axes = axes.flatten()
    for img ,ax in zip(image,axes):
        ax.imshow(img)
    plt.tight_layout()
    plt.show()

In [None]:
# Do augmantation on the first picture only , 5 times randomly .
aug_img = [train_set[0][0][0] for i in range(5)]
show_img(aug_img)

## Create the Model : 

In [None]:
# Import Feature Vector from Tensorflow hub
import tensorflow as tf 
import tensorflow_hub as hub
img_size = 224
model = tf.keras.Sequential([
    hub.KerasLayer("https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/feature_vector/5",
                   trainable=False,output_shape=[1280]), 
    #outputshape : zay el flatten el b3mlha fe a5er el model
    # trainable= false : means that im using the hyperparameters of the model.
# Note : im training the last layer only , because the hyperparameters in the previous layers are fixed.
    tf.keras.layers.Dense(2, activation='softmax')
])
model.build([None, 224, 224, 3])  # Batch input shape.


In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(0.00115),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
# as we see here, the Trainable Params : 2,562  parameter Only 
model.summary()

In [None]:
hist = model.fit(
    train_set,
    validation_data=valid_set,
    epochs=4
)

In [None]:
def show_graphs(hist):
    plt.figure(figsize=(12, 8))

    plt.subplot(1, 2, 1)
    plt.plot(hist.history['accuracy'], label='train')
    plt.plot(hist.history['val_accuracy'], label='valid')
    plt.legend(loc='lower right')
    plt.title('Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(hist.history['loss'], label='train')
    plt.plot(hist.history['val_loss'], label='valid')
    plt.legend(loc='upper left')
    plt.title('Loss (sparse_categorical_crossentropy)')

    plt.show()
    
show_graphs(hist)

In [None]:
test_data = path + '/test'

# Extract files
import zipfile
with zipfile.ZipFile('/kaggle/input/dogs-vs-cats/test1.zip', 'r') as zipf:
    zipf.extractall(test_data)

In [None]:
test_generator = ImageDataGenerator(rescale=1./255)

test_set = test_generator.flow_from_directory(
    directory=test_data,
    target_size=(img_size, img_size),
    batch_size=1,
    class_mode='binary',
    shuffle=False
)
test_set.reset()

test_total = len(test_set.filenames)
predict = model.predict_generator(test_set, steps=test_total, verbose=1)


In [None]:
submission = pd.read_csv('/kaggle/input/dogs-vs-cats/sampleSubmission.csv', index_col='id')
submission['label'] = np.argmax(predict,axis=1)
submission.to_csv('D:\\TekoMoro\Cats_Dogs\Cat_dogs_MahmoudOsama_sub.csv')