In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)import tensorflow as tf
import tensorflow as tf
import random 
import math
from sklearn.model_selection import train_test_split
import os
import sys
from tensorflow.keras.applications.nasnet import NASNetLarge

#Def variables
pathTraining= '../input/dataset/Dataset/train'
images= []
labels = []
breeds=[]
images_val=[]
labels_val=[]
BATCH_SIZE=64
EPOCHS=150
INITIALRATE=0.01
random.seed(456)
seed=456
random.random()

#Preprocessing function
def preprocess(img_path, label):
    img_raw = tf.read_file(img_path)
    image = tf.image.decode_jpeg(img_raw, channels=3)
    image = tf.image.resize_images(image, [331, 331])
    image /= 255.0  # normalize to [0,1] range
    label = tf.cast(label, tf.int64)
    label = tf.one_hot(label,35)
    return image,label

#Data augmentation function
def augment(image, label):
    image = tf.image.random_flip_left_right(image, seed=seed)
    image = tf.image.random_saturation(image,0.8,1.8,seed=seed)
    image = tf.image.random_brightness(image, 0.08,seed=seed)
    image = tf.image.random_contrast(image, 0.8, 1.5,seed=seed)
    return image, label

#Get all dataset values
def getDataset():
    x=0
    for r, d, f in os.walk(pathTraining):
        for directory in d:
            #Get path of the dir
            path=pathTraining+"/"+directory
            #Append the breed name
            breeds.append(directory)
            for r,d,f in os.walk(path):
                for file in f:
                    images.append(os.path.join(r, file))
                    labels.append(x)
            x+=1
    return images,labels

#Learning rate decay function 
def step_decay(epoch):
   drop = 0.5
   epochs_drop = 10.0
   lrate = INITIALRATE * math.pow(drop,  
           math.floor((1+epoch)/epochs_drop))
   return lrate

#Save the model with the highest accuracy and lowest loss
best_val_acc = 0
best_val_loss = sys.float_info.max 
def saveModel(epoch,logs):
    global best_val_acc
    global best_val_loss
    val_acc = logs['val_acc']
    val_loss = logs['val_loss']
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        model.save("big.h5")
        print('Save model')
    elif val_acc == best_val_acc:
        if val_loss < best_val_loss:
            best_val_loss=val_loss
            model.save("big.h5")
            print('Save model with low ')
#Split the dataset in train and validation set
images,labels=getDataset()
imagesTrain, imagesVal, labelsTrain, labelsVal = train_test_split(images, labels, test_size=0.20)
datasetTrain = tf.data.Dataset.from_tensor_slices((imagesTrain, labelsTrain))
datasetVal = tf.data.Dataset.from_tensor_slices((imagesVal, labelsVal))
trainSize = len(imagesTrain)
valSize=len(imagesVal)
#Shuffle datasets
datasetTrain.shuffle(len(imagesTrain),seed=seed)
datasetVal.shuffle(len(imagesVal),seed=seed)
#Apply preprocessing and augmentation functions(only for the training set)
datasetTrain = datasetTrain.map(preprocess)
datasetTrain = datasetTrain.map(augment)
datasetVal = datasetVal.map(preprocess)
#Set batch size parameter and epochs
datasetTrain = datasetTrain.batch(BATCH_SIZE, drop_remainder=True)
datasetTrain = datasetTrain.repeat(EPOCHS)
datasetVal = datasetVal.batch(BATCH_SIZE, drop_remainder=True)
datasetVal = datasetVal.repeat(EPOCHS)
#Inizialize the iterator
iterator_train = datasetTrain.make_initializable_iterator()
iterator_val=  datasetVal.make_initializable_iterator()

In [None]:
#Define the architecture
convBase =tf.keras.applications.nasnet.NASNetLarge(input_shape=None, include_top=False, weights='imagenet', input_tensor=None, pooling=None , classes=1000)
model = tf.keras.Sequential()
model.add(convBase)
#Set the convBase layer not trainable
model.layers[0].trainable=False
model.add(tf.keras.layers.GlobalMaxPooling2D())
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(35,kernel_regularizer=tf.keras.regularizers.l2(0.01)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.GaussianNoise(0.5))
model.add(tf.keras.layers.Activation('softmax'))
#Compile the model
model.compile(loss='categorical_crossentropy',optimizer=tf.keras.optimizers.Adagrad(lr=INITIALRATE, epsilon=None, decay=0.0),metrics=['accuracy'])
#Plot the model architecture in a file
tf.keras.utils.plot_model(model, to_file='model.png',show_shapes=True)
tf.keras.backend.get_session().run(iterator_train.initializer)
tf.keras.backend.get_session().run(iterator_val.initializer)
#Train model
model.fit(iterator_train,validation_data=iterator_val,steps_per_epoch=math.floor(trainSize/BATCH_SIZE),validation_steps=math.floor(valSize/BATCH_SIZE),callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_acc', min_delta=0, patience=10, verbose=0, mode='auto', baseline=None, restore_best_weights=True),tf.keras.callbacks.LearningRateScheduler(step_decay),tf.keras.callbacks.LambdaCallback(on_epoch_end=saveModel)],epochs=EPOCHS)
#Load the best model
del model
model = tf.keras.models.load_model('big.h5')
#Evaluate the model on validation set
model.evaluate(iterator_val, batch_size=BATCH_SIZE, verbose=1, sample_weight=None, steps=math.floor(valSize/BATCH_SIZE), callbacks=None)

In [None]:
#Create the file for the submission
imagesT = []
labelsT = []
pathTest='../input/dataset/Dataset/test'
x = 0;
#We append a fake label only for creating the iterator,
#but this label doesn't be considerated in the prediction of the class
for r, d, f in os.walk(pathTest):
    for file in f:
        imagesT.append(os.path.join(r, file))
        labelsT.append(x)           
    x+=1
datasetTest = tf.data.Dataset.from_tensor_slices((imagesT, labelsT))
datasetTest = datasetTest.map(preprocess)
datasetTest = datasetTest.batch(1, drop_remainder=True)
datasetTest = datasetTest.repeat(1)
iteratorTest = datasetTest.make_initializable_iterator()
tf.keras.backend.get_session().run(iteratorTest.initializer)
testSize=len(imagesT)
#Predict the classes
predictionClasses = model.predict(iteratorTest, steps=testSize)
classes=[]
fileIds=[]
#Get the file id
for i in range(testSize):
    image = imagesT[i]
    imageList = image.split("/")
    imageId = (imageList[-1])[:-4]
    fileIds.append(imageId)
#Get the predicted breed
for j in range(len(predictionClasses)):
    classes.append(breeds[np.argmax(predictionClasses[j])])

#Submission
submission = pd.DataFrame({'id':fileIds,'breed':classes})
submission.head()
filename = 'result.csv'
submission.to_csv(filename,index=False)
