In [0]:
# COMMENT OUT FOR KAGGLE

#!mkdir /content/input
#!mkdir /content/input/evaluate
#!mkdir /content/input/natural_images
#!wget "https://ai-camp-content.s3.amazonaws.com/natural_images.zip" -P /content/input
#!wget "https://ai-camp-content.s3.amazonaws.com/evaluate.zip" -P /content/input
#!unzip /content/input/natural_images.zip -d /content/input/natural_images 
#!unzip /content/input/evaluate.zip -d /content/input/evaluate 

In [0]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import keras as ks # neural network models

# For working with images
import cv2 as cv
import matplotlib.image as mpimg
import tqdm

# Potentially useful tools - you do not have to use these
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.models import Sequential, Model 
from keras import applications
from keras import optimizers
from keras.layers import Activation, Convolution2D, Flatten, Dense, Dropout, MaxPooling2D
from keras.optimizers import SGD

import os

# Input data files are available in the "../input/" directory.
# Any results you write to the current directory are saved as output.

In [0]:
# CONSTANTS
# You may not need all of these, and you may find it useful to set some extras

CATEGORIES = ['airplane','car','cat','dog','flower','fruit','motorbike','person']

IMG_WIDTH = 100
IMG_HEIGHT = 100
TRAIN_PATH = '/content/input/natural_images/natural_images/'
TEST_PATH = '/content/input/evaluate/evaluate/'

In [0]:
# To find data:
folders = os.listdir(TRAIN_PATH)

images = []

#Generates labels based on the folder they have come from 
for folder in folders:
    files = os.listdir(TRAIN_PATH + folder)
    images += [(folder, file, folder + '/' + file) for file in files]

image_locs = pd.DataFrame(images, columns=('class','filename','file_loc'))

# data structure is three-column table
# first column is class, second column is filename, third column is image address relative to TRAIN_PATH
image_locs.head()

#Declare a numpy array to hold all 
image_class = np.array(image_locs['class'])

#Split the data into training and testimg data


### Over to you

Now you must create your own solution to the problem. To get the file containing your results, you have to `commit` the kernel and then navigate to [kaggle.com/kernels](https://www.kaggle.com/kernels/), and the 'Your Work' tab, where you will find a list of your notebooks. Click on it and scroll down to the `Output` section.

In [0]:
#Consider using a KNN algorithm 

#define the number of classes and the desired size of each image 
num_categories = len(CATEGORIES)
image_width = 150
image_height = 150 #potential variable to be changed 

#Define an image data generator that will diversify the training data:
datagen = ImageDataGenerator(horizontal_flip = True, rescale = 1./255, rotation_range = 360)

In [0]:
#Create the model:

def create_model():
  #Channels first to force the (Height, Width, Depth) format 
  data_format = 'channels_first'
  
  #create a sequential model 
  model = Sequential()
  
  #Add layers 
  #Convolution layer to identify key features 
  model.add(Convolution2D(32, (3, 3), input_shape=(image_width, image_height, 3)))
  
  #Activation layer of relu to map all negative values to 0 and keep all positive ones
  model.add(Activation('relu'))
  
  #Pooling layer to reduce the dimensions of the image 
  model.add(MaxPooling2D(data_format=data_format, pool_size=(2,2)))
  
  #Add another convolution layer
  model.add(Convolution2D(32, (3, 3), input_shape=(image_width, image_height, 3)))
  
  #Add another relu activation 
  model.add(Activation('relu'))
  
  #Add max pooling
  model.add(MaxPooling2D(data_format=data_format, pool_size=(2,2)))
  
  #Add another convolution layer
  #model.add(Convolution2D(32, (3,3), input_shape=(image_width, image_height, 3)))
    
  #Add another activation of relu
  #model.add(Activation('relu'))
    
  #Add a max pooling layer
  #model.add(MaxPooling2D(data_format=data_format, pool_size=(2,2)))
  
  #Add a flatten layer to reduce the output from the previous layer into an array with dimension 1 
  model.add(Flatten())
  
  #Add a generic dense layer 
  model.add(Dense(32))
  
  #Add activation 
  model.add(Activation('relu'))
  
  #Add a dropout layer 
  model.add(Dropout(0.5))
  
  #Add a final dense layer 
  model.add(Dense(8))
  
  #Add sigmoid activation
  model.add(Activation('sigmoid'))
  
  #Compile the model
  model.compile(optimizer = 'SGD',
               loss = 'categorical_crossentropy',
               metrics = ['accuracy'])
  
  return model

In [19]:
#Defining the batch size and train/validation samples
batch_size = 14
train_samples = len(images)

#This is the augmentation configuration we will use for training 
train_datagen = ImageDataGenerator(
  rescale = 1./255,
  shear_range = 0.2,
  zoom_range = 0.2,
  horizontal_flip = True,
  validation_split = 0.2)

#Passing the images through augmentation
train_generator = train_datagen.flow_from_directory(
  TRAIN_PATH,
  target_size = (image_width, image_height),
  batch_size = batch_size, 
  class_mode = 'categorical',
  subset = 'training')

validation_generator = train_datagen.flow_from_directory(TRAIN_PATH,
  target_size = (image_width, image_height),
  batch_size = batch_size, 
  class_mode = 'categorical',
  subset = 'validation')


Found 5362 images belonging to 8 classes.
Found 1337 images belonging to 8 classes.


In [0]:
model = create_model()
steps_epoch = 5362/batch_size

model.fit_generator(train_generator,
                   steps_per_epoch = steps_epoch,
                   epochs = 10, 
                   validation_data = validation_generator,
                   validation_steps = 1337)

Epoch 1/10

In [0]:
# Example values:
filenames = ['test001','test002','test003','test004']
predictions = ['car','cat','fruit','motorbike']

In [0]:
# Save results

# results go in dataframe: first column is image filename, second column is category name
# category names are: airplane, car, cat, dog, flower, fruit, motorbike, person
df = pd.DataFrame()
df['filename'] = filenames
df['label'] = predictions
df = df.sort_values(by='filename')

df.to_csv('results.csv', header=True, index=False)