In [None]:
# Import all libs.
import os
import cv2
import matplotlib.pyplot as plt
import pandas as pd
import statistics
import numpy as np
import json
from keras.models import Sequential, Model
from keras.applications import InceptionV3
from keras.layers import Dense, Flatten, Activation, Dropout, GlobalAveragePooling2D
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers, applications
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping
from keras import backend as K 

In [None]:
# READ Annotation File
annot_file = '../input/inaturalist-2019-fgvc6/train2019.json'
with open(annot_file) as f:
        train_annot = json.load(f)


In [None]:
df_train_raw = pd.DataFrame(train_annot['annotations'])[['image_id','category_id']]
df_train_img = pd.DataFrame(train_annot['images'])[['id', 'file_name']].rename(columns={'id':'image_id'})
df_train_cons = pd.merge(df_train_img, df_train_raw, on='image_id')
df_train_cons['category_id']=df_train_cons ['category_id'].astype(str)


In [None]:
print('Number of train images ' + str(len(df_train_cons['image_id'])))
print('Number of classes      ' + str(len(df_train_cons['category_id'].unique())))

x, y = np.unique(df_train_cons['category_id'], return_counts=True)


In [None]:
# finding statistical values on no. of images
# and 1st max. and 
import statistics

y.mean(),y.std(),y.std()/y.mean(),y.max(),y.min(),y.sum(),np.argmax(y),np.argmin(y),statistics.mode(y)



In [None]:
List = pd.DataFrame({"id":x,"count":y})
List.to_csv("ImageCount.csv",index=False)

In [None]:

fig, ax = plt.subplots(figsize=(16,10), facecolor='white', dpi= 150)

plt.title(' Number of Training Images vs Classes')
plt.xlabel('Classes')
plt.ylabel('Number of Training Images per Class')
ax.stem( y, linefmt= ' ',basefmt=' ')


In [None]:
valid_annot = '../input/inaturalist-2019-fgvc6/val2019.json'
with open(valid_annot) as f:
        valid_annot = json.load(f)

In [None]:
df_valid_raw = pd.DataFrame(valid_annot['annotations'])[['image_id','category_id']]
df_valid_img = pd.DataFrame(valid_annot['images'])[['id', 'file_name']].rename(columns={'id':'image_id'})
df_valid_cons = pd.merge(df_valid_img, df_valid_raw, on='image_id')
df_valid_cons['category_id']=df_valid_cons['category_id'].astype(str)


In [None]:
print('Number of Valid images ' + str(len(df_valid_cons['image_id'])))
print('Number of classes      ' + str(len(df_valid_cons['category_id'].unique())))

xi, yi = np.unique(df_valid_cons['category_id'], return_counts=True)


In [None]:
fig, ax = plt.subplots(figsize=(16,10), facecolor='white', dpi= 150)

plt.title(' Number of Validation Images vs Classes')
plt.xlabel('Classes')
plt.ylabel('Number of Validation Images per Class')
ax.stem( yi, linefmt= ' ',basefmt=' ')


In [None]:
IMG_SIZE = 299
nCLASSES = 1010
BATCH_SIZE = 32
N_EPOCHS = 20


In [None]:
train_datagen=ImageDataGenerator(rescale=1./255, 
    horizontal_flip = True,    
    zoom_range = 0.3,
    width_shift_range = 0.3,
    height_shift_range=0.3
    )
train_generator=train_datagen.flow_from_dataframe(
    dataframe=df_train_cons,
    directory= "../input/inaturalist-2019-fgvc6/train_val2019",
    x_col="file_name",
    y_col="category_id",
    batch_size=BATCH_SIZE,
    shuffle=True,
    class_mode="categorical",    # mode="raw" for regression
    target_size=(IMG_SIZE,IMG_SIZE))


In [None]:
valid_datagen = ImageDataGenerator(rescale=1./255)

valid_generator=valid_datagen.flow_from_dataframe(
    dataframe=df_valid_cons,
    directory="../input/inaturalist-2019-fgvc6/train_val2019",
    x_col="file_name",
    y_col="category_id",
    batch_size=BATCH_SIZE,
    shuffle=True,
    class_mode="categorical",    
    target_size=(IMG_SIZE,IMG_SIZE))


In [None]:
train_steps = int(np.ceil(len(train_generator)/BATCH_SIZE))
valid_steps = int(np.ceil(len(valid_generator)/BATCH_SIZE))

In [None]:
original_model = applications.InceptionV3(weights=None, 
                              include_top=True, 
                              input_shape=(IMG_SIZE, IMG_SIZE, 3))
original_model.load_weights('../input/inceptionv3/inception_v3_weights_ai.h5')


In [None]:
original_model.summary()

In [None]:
bottleneck_input  = original_model.get_layer(index=0).input

In [None]:
# get second to last model layer.
bottleneck_output = original_model.get_layer(index=-2).output
bottleneck_model  = Model(inputs=bottleneck_input,outputs=bottleneck_output)


In [None]:
bottleneck_model.summary()

In [None]:
new_model = Sequential()
new_model.add(bottleneck_model)
# new_model.add(Dense(1024, activation='ReLU', input_dim=2048))
# model.add(Dropout(rate=0.2))
# new_model.add(Dense(1024, activation='ReLU'))
# model.add(Dropout(rate=0.2))
new_model.add(Dense(nCLASSES, activation='softmax', input_dim=2048))


In [None]:
new_model.summary()


In [None]:
# Callbacks

checkpoint = ModelCheckpoint("mINCEPTION.h5", monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
ES = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=1, mode='auto')

# Adam episilon (default), decay (default)
new_model.compile(optimizers.Adam(lr=0.0001,beta_1=0.9,beta_2=0.999),loss='categorical_crossentropy',metrics=['accuracy'])


In [None]:

perf = new_model.fit_generator(generator=train_generator,                   
                                    steps_per_epoch = train_steps,
                                    validation_data = valid_generator,                    
                                    validation_steps = valid_steps,
                                    epochs = N_EPOCHS,
                                    callbacks = [checkpoint, ES],
                                    verbose=2)


In [None]:
# plot perf. graph


with open('perf.json', 'w') as f:
    json.dump(perf.history, f)

df_metric = pd.DataFrame(perf.history)
df_metric[['loss', 'val_loss']].plot()
df_metric[['acc', 'val_acc']].plot()



In [None]:
test_annot = '../input/inaturalist-2019-fgvc6/test2019.json'
with open(test_annot) as f:
        test_annot = json.load(f)
        

In [None]:
df_test_img = pd.DataFrame(test_annot['images'])[['id', 'file_name']].rename(columns={'id':'image_id'})


In [None]:
# prediction generator

test_datagen = ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_dataframe(      
        dataframe = df_test_img,    
        directory = "../input/inaturalist-2019-fgvc6/test2019",    
        x_col="file_name",
        target_size = (IMG_SIZE,IMG_SIZE),
        batch_size = 1,
        shuffle = False,
        class_mode = None
        )


In [None]:
label2index = (train_generator.class_indices)
idx2label = dict((i,j) for j,i in label2index.items())


In [None]:
test_generator.reset()
prediction = new_model.predict_generator(test_generator, steps = len(test_generator.filenames))
# prediction = new_model.predict_generator(test_generator, steps = 5)


In [None]:
predicted_classes = np.argmax(prediction,axis=1)
prediction = [idx2label[j] for j in predicted_classes]


In [None]:
prediction


In [None]:
filenames = test_generator.filenames
results = pd.DataFrame({"file_name":filenames,"predicted":prediction})
df_result = pd.merge(df_test_img, results, on='file_name')[['image_id','predicted']].rename(columns={'image_id':'id'})


In [None]:
df_result.to_csv("submission.csv",index=False)
