In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from PIL import Image
import tensorflow as tf
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import time
import os
import shutil

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(tf.__version__)
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

In [None]:
%%bash
ls -lrt ../input/flowers/flowers/daisy

In [None]:
# Where do we start!?
# 1. Access images
# 2. Load images, and check on how they look (a sample set) (<Create a function>)
# 3. Split dataset (<Create a function>)
# 4. Preprocessing:
#  a. Resize images to a standard size
#  b. Apply a few transforms on the loaded standard size images
# 4. Convert image into a format that can be fed into a CNN network
# 5. Train model
# 6. Test model

In [None]:
# Function to show a sample of the flowers
def show_samples(directory):
    lstImages = os.listdir(directory)
    print('Number of Images {}'.format(len(lstImages)))
    k = 0
    for i in range(4):
        f, ax = plt.subplots(1, 4, figsize = (15, 15))
        for j in range(4):
            img = Image.open(directory+lstImages[k])
            img = img.resize((300, 300))
            ax[j].imshow(img)
            k = k + 1

        plt.show()
        time.sleep(1)

In [None]:
# Function to split dataset (into Train and Test datasets)
def create_test_train_split(rootDir):
    # Remove directories if they already exist
    if 'Train' in os.listdir():
        shutil.rmtree('Train')
    if 'Test' in os.listdir():
        shutil.rmtree('Test')
        
    # Create two different directories to hold Test and Train images
    os.mkdir('Train', 777)
    os.mkdir('Test', 777)
    
    # Determine subdirectories in the source directory
    folders = os.listdir(rootDir)
    
    # Create as many subdirectories in the New folder structure as in the source structure
    for r in range(len(folders)):
        # Determine file names in the source
        path = rootDir + folders[r] + '/'
        lstFiles = os.listdir(path)
    
        testFlowers = []
        trainFlowers = []
        # Creation of new subdirectories
        os.mkdir('Test/'+folders[r])
        os.mkdir('Train/'+folders[r])
        
        # Pick each image from the source directory and copy it into the destination
        for i in range(len(lstFiles)):
            # Only '.jpg' images are being considered for this model
            if '.jpg' not in lstFiles[i]:
                continue
            else:
                # First 80% files being marked for training
                if i < int(len(lstFiles) * 0.8):
                    testFlowers.append(lstFiles[i])
                    shutil.copy(path+lstFiles[i], 'Train/'+folders[r]+'/')
                # Next 20% for testing
                else:
                    trainFlowers.append(lstFiles[i])
                    shutil.copy(path+lstFiles[i], 'Test/'+folders[r]+'/')

In [None]:
fixedPath = '../input/flowers/flowers/'
flowerType = 'tulip/' # Input in lower letters
targetDir = fixedPath + flowerType

In [None]:
show_samples(targetDir)

In [None]:
create_test_train_split(fixedPath)

-----------------------------------------------------MODEL TRAINING--------------------------------------------------

In [None]:
def load_images(HEIGHT, WIDTH, MODE):
    # Initialize the first element of our array with zeros
    arrImg = np.zeros(HEIGHT*WIDTH*3)
    # Reshape it to (1, HEIGHT, WIDTH, 3). The first dimension will be the image count
    arrImg = arrImg.reshape((1, HEIGHT, WIDTH, 3))
    # We will delete this first element at a later point in time

    # The first folder
    folder0 = MODE + '/'
    # Second folder
    folders1 = os.listdir(folder0)
    folders1.sort()
    
    lstLabel = []
    global flower_type_mapping
    flower_type_mapping = {}
    flower_type = 0
    # There will be multiple second folders
    for f in folders1:
        folder1 = f + '/'
        # There will be multiple image files
        files = os.listdir(folder0+folder1)
        flower_type_mapping[flower_type] = f

        # Load each image file and convert it into a numpy array
        for i in files:
            lstLabel.append(flower_type)
            # Resize image as we load it
            img = Image.open(folder0+folder1+i).resize((HEIGHT, WIDTH))
            # Change dimension of the numpy array from (HEIGHT, WIDTH, 3) to (1, HEIGHT, WIDTH, 3)
            # The way np.append works, its important to mention "axis  = 0"
            arrImg = np.append(arrImg, np.expand_dims(np.asarray(img), axis = 0), axis = 0)
            
        flower_type = flower_type + 1

    arrLabel = np.array(lstLabel)
    # Remove the first dummy value in our array
    arrImg = np.delete(arrImg, 0, axis = 0)
    
    return arrImg, arrLabel

In [None]:
# Setting Height and Width to 28 for testing, since 299 was taking a long time
# May be with better hardware a comparison can be made as to what happens with 299 pixels
HEIGHT = 28
WIDTH = 28

# Mode is case sensitive. It takes either "Train" or "Test"
MODE = 'Train'
x_images, y_labels = load_images(HEIGHT, WIDTH, MODE)

In [None]:
print(x_images.shape)
print(y_labels.shape)

In [None]:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Conv2D(filters = 10, kernel_size = (3, 3), padding = 'same', activation = tf.nn.relu))
model.add(tf.keras.layers.MaxPool2D(pool_size = (2, 2)))
model.add(tf.keras.layers.Conv2D(filters = 20, kernel_size = (3, 3), padding = 'same', activation = tf.nn.relu))
model.add(tf.keras.layers.MaxPool2D(pool_size = (2, 2)))

model.add(tf.keras.layers.Flatten())

model.add(tf.keras.layers.Dense(256, activation = tf.nn.relu))
model.add(tf.keras.layers.Dropout(rate = 0.25))
model.add(tf.keras.layers.Dense(5, activation = tf.nn.softmax))

model.compile(loss = 'categorical_crossentropy', optimizer = 'SGD')

In [None]:
model.fit(x_images, y_labels, batch_size = 32, epochs = 10)

In [None]:
MODE = 'Test'
x_images, y_labels = load_images(HEIGHT, WIDTH, MODE)
print(x_images.shape)
print(y_labels.shape)

In [None]:
score = model.evaluate(x_images, y_labels, batch_size = 32)

In [None]:
correct_prediction = []
wrong_prediction = []
for i in range(len(x_images)):
    sample = np.expand_dims(x_images[i], axis = 0)
    prediction = model.predict(sample)
    prediction = prediction.reshape(5,)
    lst_prediction = prediction.astype(int).tolist()
    
    if lst_prediction.index(1) == y_labels[i]:
        correct_prediction.append(1)
    else:
        wrong_prediction.append(0)
    
print('Number of correct predictions: {}'.format(len(correct_prediction)))
print('Number of wrong predictions: {}'.format(len(wrong_prediction)))

In [None]:
!ls Test/
len(os.listdir('Train/daisy/'))