<a href="https://colab.research.google.com/github/chiranjeet14/ML_Projects/blob/master/Competition/holiday_season_detect_objects.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Installing dependencies and notebook gpu setup

In [1]:
# !pip uninstall -y tensorflow &> /dev/null
# !pip install tensorflow &> /dev/null
# !pip install --upgrade tensorflow &> /dev/null

## Importing dependencies for the project

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import os
import PIL
import tensorflow as tf
import pandas as pd
import numpy as np
import datetime

%matplotlib inline
tf.__version__

# from tensorflow.keras.preprocessing import image_dataset_from_directory

'2.4.0'

## Data download

In [3]:
import pathlib
dataset_url = "https://he-s3.s3.amazonaws.com/media/hackathon/hackerearth-deep-learning-challenge-holidays/holiday-season-11-2c924626/14feeca248c811eb.zip"
data_dir = tf.keras.utils.get_file('holiday-season', origin=dataset_url, extract=True)
data_dir = pathlib.Path(data_dir)
print("Data directory : " + str(data_dir))
# sub_directories = [x for x in data_dir.iterdir() if x.is_dir()]
# print(sub_directories)
# data_dir

Data directory : /root/.keras/datasets/holiday-season


In [4]:
# data_dir = pathlib.Path(data_dir)
# print("Data directory : " + str(data_dir))
# # sub_directories = [x for x in data_dir.iterdir() if x.is_dir()]
# # print(sub_directories)
# # data_dir

In [5]:
data_dir = pathlib.Path('/root/.keras/datasets/dataset')
sub_directories = [x for x in data_dir.iterdir() if x.is_dir()]
print(sub_directories)

[PosixPath('/root/.keras/datasets/dataset/train'), PosixPath('/root/.keras/datasets/dataset/test')]


In [6]:
train_folder = data_dir.joinpath('train')
test_folder = data_dir.joinpath('test')
print("Train dir : " + str(train_folder))
print("Test dir : " + str(test_folder))

Train dir : /root/.keras/datasets/dataset/train
Test dir : /root/.keras/datasets/dataset/test


In [7]:
train_files = [e for e in train_folder.iterdir() if e.is_file()]
test_files = [e for e in test_folder.iterdir() if e.is_file()]
print("Train images count : " + str(len(train_files)))
print("Test images count : " + str(len(test_files)))

Train images count : 6469
Test images count : 3489


In [8]:
train_csv_path = data_dir.joinpath('train.csv')
#test_csv_path = data_dir.joinpath('test.csv')
df_train = pd.read_csv(train_csv_path)
# df_test = pd.read_csv(test_csv_path)
df_train.head()

Unnamed: 0,Image,Class
0,image3476.jpg,Miscellaneous
1,image5198.jpg,Candle
2,image4183.jpg,Snowman
3,image1806.jpg,Miscellaneous
4,image7831.jpg,Miscellaneous


## Creating training and validation set

In [9]:
IMG_SIZE = (224, 224) # (img_height, img_width)
IMG_SHAPE = IMG_SIZE + (3,)

In [10]:
BATCH_SIZE = 32

preprocess_input = tf.keras.applications.inception_resnet_v2.preprocess_input

train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(#rescale = 1./255,
                                                                rotation_range=30,
                                                                width_shift_range=0.2,
                                                                height_shift_range=0.2,
                                                                shear_range = 0.2,
                                                                zoom_range = 0.2,
                                                                horizontal_flip = True,
                                                                vertical_flip = True,
                                                                preprocessing_function=preprocess_input,
                                                                validation_split=0.08,
                                                                )

# valid_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1./255,
#                                                                preprocessing_function=preprocess_input,
#                                                                )
# validation and test images will be similar

test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(#rescale = 1./255,
                                                               preprocessing_function=preprocess_input,
                                                               )

# train_generator = train_datagen.flow_from_directory('data/train',
#                                                     target_size=IMG_SIZE,
#                                                     # color_mode='rgb',
#                                                     batch_size=BATCH_SIZE,
#                                                     class_mode='categorical',
#                                                     shuffle=True,
#                                                     )

train_generator = train_datagen.flow_from_dataframe(dataframe=df_train,
                                                    directory=train_folder,
                                                    x_col="Image",
                                                    y_col="Class",
                                                    batch_size=BATCH_SIZE,
                                                    shuffle=True,
                                                    class_mode='categorical',
                                                    target_size=IMG_SIZE,
                                                    subset="training",
                                                    )

validation_generator = train_datagen.flow_from_dataframe(dataframe=df_train,
                                                        directory=train_folder,
                                                        x_col="Image",
                                                        y_col="Class",
                                                        batch_size=BATCH_SIZE,
                                                        shuffle=True,
                                                        class_mode='categorical',
                                                        target_size=IMG_SIZE,
                                                        subset="validation",
                                                        )

test_generator = test_datagen.flow_from_directory('/root/.keras/datasets/dataset',
                                                  batch_size=10,
                                                  # only read images from `test` directory
                                                  classes=['test'],
                                                  # don't generate labels
                                                  class_mode=None,
                                                  # don't shuffle
                                                  shuffle=False,
                                                  # use same size as in training
                                                  target_size=IMG_SIZE,
                                                  )

Found 5952 validated image filenames belonging to 6 classes.
Found 517 validated image filenames belonging to 6 classes.
Found 3489 images belonging to 1 classes.


## Creating the base model and add some extra layers to adjust to our model

In [11]:
base_model = tf.keras.applications.InceptionResNetV2(input_shape=IMG_SHAPE,
                                               include_top=False,
                                               weights='imagenet',
                                               )

### Base model layers to train ?

In [12]:
base_model.trainable = True
fine_tune_layers = 100

In [13]:
# if base_model.trainable == True:
#   total_base_model_layers = len(base_model.layers)
#   print("Number of layers in the base model: ", str(total_base_model_layers))

#   for layer in base_model.layers[:total_base_model_layers-fine_tune_layers]:
#     layer.trainable = False

In [14]:
x = base_model.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(1024,activation='relu',kernel_regularizer=tf.keras.regularizers.L2(0.03),bias_regularizer=tf.keras.regularizers.L2(0.03))(x)
x = tf.keras.layers.Dense(1024,activation='relu',kernel_regularizer=tf.keras.regularizers.L2(0.03),bias_regularizer=tf.keras.regularizers.L2(0.03))(x)
x = tf.keras.layers.Dense(1024,activation='relu',kernel_regularizer=tf.keras.regularizers.L2(0.03),bias_regularizer=tf.keras.regularizers.L2(0.03))(x)
x = tf.keras.layers.Dropout(0.5)(x)
x = tf.keras.layers.Dense(512,activation='relu',kernel_regularizer=tf.keras.regularizers.L2(0.03),bias_regularizer=tf.keras.regularizers.L2(0.03))(x)
x = tf.keras.layers.Dense(512,activation='relu',kernel_regularizer=tf.keras.regularizers.L2(0.03),bias_regularizer=tf.keras.regularizers.L2(0.03))(x)
x = tf.keras.layers.Dense(512,activation='relu',kernel_regularizer=tf.keras.regularizers.L2(0.03),bias_regularizer=tf.keras.regularizers.L2(0.03))(x)
x = tf.keras.layers.Dropout(0.3)(x)
preds = tf.keras.layers.Dense(6,activation='softmax')(x)
model = tf.keras.models.Model(inputs=base_model.input,outputs=preds)

## Training the model

In [15]:
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.001, momentum=0.9),loss='categorical_crossentropy',metrics=['accuracy'])
# model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001, epsilon=0.1), loss='categorical_crossentropy', metrics = ['accuracy'])
# model.compile(optimizer=tf.keras.optimizers.Adam(), loss='categorical_crossentropy', metrics = ['accuracy'])

In [None]:
STEP_SIZE_TRAIN = train_generator.n//train_generator.batch_size
STEP_SIZE_VALID = validation_generator.n//validation_generator.batch_size
EPOCHS = 100

# Save the model according to the conditions
checkpoint = tf.keras.callbacks.ModelCheckpoint("test_model.h5", monitor='val_accuracy', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', save_freq='epoch')
early = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', min_delta=0, patience=10, verbose=0, mode='auto')

# Training the model
history = model.fit(train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=validation_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=EPOCHS,
                    callbacks = [checkpoint, early],
                    )
model.save('model.h5')

Epoch 1/100

## Predicting on Test Data

In [None]:
predictions = model.predict_generator(test_generator,verbose=1)

## Predictions to output.csv

In [None]:
test_filenames = []
for filename in test_generator.filenames: 
    filename = filename.replace('test/','')
    test_filenames.append(filename)

In [None]:
preds_cls_idx = predictions.argmax(axis=-1)

In [None]:
import numpy as np

idx_to_classes = {v: k for k, v in train_generator.class_indices.items()}
preds_classes = np.vectorize(idx_to_classes.get)(preds_cls_idx)
filenames_to_classes = list(zip(test_filenames, preds_classes))

In [None]:
# filenames_to_classes

In [None]:
data = pd.DataFrame((zip(test_filenames, preds_classes)),columns=['Image','Class'])
data.to_csv('output.csv',index=False)

In [None]:
df_demo = pd.read_csv('output.csv')
df_demo.head()