# Example of CNN Object detection
- Purpose of this notebook is to be a reference for CNN Object Detection
- We also want to be able to export the trained CNN parameters and hyper parameters into a data file so that we can run the CNN to detect objects

## Other Notes
- Using tensorflow and scikit-learn
- Dataset of cats vs dogs from kaggle (https://www.kaggle.com/c/dogs-vs-cats/data)

In [38]:
# Preprocessing script to run to organize training and testing images into their respective directories
# From extracting the dataset, we have the following data:
# |-train/
# |   |-(25000 files named cat.#.jpg and dog.#.jpg mixed together)
# |-test1/
# |   |-(images of cats and dogs named #.jpg)
# -----------------------------------------------------------
# We want to put the cat images and dog images in different directories in the train/ directory, like this:
# |-train/
# |   |-train/
# |   |   |-dogs/
# |   |   |   |-(80% of dog.[random number between 0 and 12499].jpg, of 10000 images)
# |   |   |-cats/
# |   |   |   |-(80% of cat.[random number between 0 and 12499].jpg, of 10000 images)
# |   |-test/
# |   |   |-dogs/
# |   |   |   |-(20% of dog.[random number between 0 and 12499].jpg, of 2500 images)
# |   |   |-cats/
# |   |   |   |-(20% of cat.[random number between 0 and 12499].jpg, of 2500 images)
# . . .
# The python script here does that job: (only run it once after extracting the dataset for the first time)

# Load the data
# Guide from https://www.tensorflow.org/tutorials/images/classification
import os
import random

PATH = os.path.join(os.path.dirname(os.path.realpath('../datasets/')), 'datasets/dogsvscats/')
parent_dir = os.path.join(PATH, 'train')
train_dir = os.path.join(parent_dir, 'train')
test_dir = os.path.join(parent_dir, 'test') 
train_dogs_dir = os.path.join(train_dir, 'dogs') 
train_cats_dir = os.path.join(train_dir, 'cats')  
test_dogs_dir = os.path.join(test_dir, 'dogs') 
test_cats_dir = os.path.join(test_dir, 'cats')  

# Create training directories
if not os.path.exists(train_dir):
    os.mkdir(train_dir)
if not os.path.exists(test_dir):
    os.mkdir(test_dir)
if not os.path.exists(train_dogs_dir):
    os.mkdir(train_dogs_dir)
if not os.path.exists(train_cats_dir):
    os.mkdir(train_cats_dir)
if not os.path.exists(test_dogs_dir):
    os.mkdir(test_dogs_dir)
if not os.path.exists(test_cats_dir):
    os.mkdir(test_cats_dir)

# Move random images in their respective locations
counting_index = 0

# Move 5000 random images to test directory (2500 * 2 = 5000)
while counting_index < 2500:
    img_index = random.randint(0, 12499)
    dog_img_filename = 'dog.' + str(img_index) + '.jpg'
    cat_img_filename = 'cat.' + str(img_index) + '.jpg'
    dog_img_path = os.path.join(parent_dir, dog_img_filename)
    cat_img_path = os.path.join(parent_dir, cat_img_filename)
    
    if os.path.exists(dog_img_path) and os.path.exists(cat_img_path):
        os.rename(dog_img_path, os.path.join(test_dogs_dir, dog_img_filename))
        os.rename(cat_img_path, os.path.join(test_cats_dir, cat_img_filename))
        counting_index += 1

# Move the rest of the files in their respecive train directories
os.system('mv ' + parent_dir + '/cat.*.jpg ' + train_cats_dir + '/')
os.system('mv ' + parent_dir + '/dog.*.jpg ' + train_dogs_dir + '/')


    

0

In [22]:
# Import necessary Python, sklearn and/or tensorflow/keras modules for loading the dataset
import tensorflow
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import matplotlib.pyplot as plt

