# Initializing Libraries

In [1]:
import pandas as pd
import numpy as np
import os.path
import matplotlib.pyplot as plt
import seaborn as sns
import gc

from PIL import Image

%matplotlib inline
from keras.applications import ResNet50,ResNet101
import cv2
from tqdm import tqdm
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import backend as K
from keras import applications
from keras.models import Model
from keras import optimizers
from keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
from keras.layers.normalization import BatchNormalization
from keras.utils import np_utils
from keras.callbacks import EarlyStopping
from keras.preprocessing import image

In [2]:
from sklearn.model_selection import StratifiedKFold , KFold ,RepeatedKFold
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Getting the Data

In [3]:
train = pd.read_csv('/kaggle/input/hackerearth-deep-learning-challenge-holidayseason/dataset/train.csv')
train.head()

Unnamed: 0,Image,Class
0,image3476.jpg,Miscellaneous
1,image5198.jpg,Candle
2,image4183.jpg,Snowman
3,image1806.jpg,Miscellaneous
4,image7831.jpg,Miscellaneous


# Another dataset

In [4]:
# As we are going to divide dataset
df = train.copy()

In [5]:
# Increasing the size of dataset without disturbing their corresponding ratios 

Misce = train[train["Class"]=='Miscellaneous']
Chris_tree = train[train["Class"]=='Christmas_Tree']
Jacket = train[train["Class"]=='Jacket']
Candle = train[train["Class"]=='Candle']
Airplane = train[train["Class"]=='Airplane']
Snowman = train[train["Class"]=='Snowman']

df = pd.concat([df,Misce])
df = pd.concat([df,Chris_tree])
df = pd.concat([df,Jacket])
df = pd.concat([df,Candle])
df = pd.concat([df,Airplane])
df = pd.concat([df,Snowman])

In [6]:
df.shape

(12938, 2)

# Setting Path For Images Folder

In [7]:
TRAIN_PATH = '../input/hackerearth-deep-learning-challenge-holidayseason/dataset/train'
TEST_PATH = '../input/hackerearth-deep-learning-challenge-holidayseason/dataset/test'

# Model

In [8]:
def get_model(IMG_SIZE):
    base_model =applications.ResNet50(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
    add_model = Sequential()
    add_model.add(Flatten(input_shape=base_model.output_shape[1:]))
    add_model.add(Dropout(0.3))
    add_model.add(Dense(64, activation='relu'))
    add_model.add(Dropout(0.4))

    add_model.add(Dense(6, activation='softmax'))

    model = Model(inputs=base_model.input, outputs=add_model(base_model.output))

    model.compile(loss='categorical_crossentropy', optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
                  metrics=['accuracy'])
    return model
#     model.summary()

# Preparation for kfolds

In [9]:
# Storing the average of all predictions

main_pred = []
data_kfold = pd.DataFrame()

In [10]:
# Creating X, Y for training 

train_y = df.Class
train_x = df.drop(['Class'],axis=1)

**I took 7 splits as we have 6 labels and even for worst case at least 1 label will have 2 occurence**

Increase EPOCHS variable if you are going for competition

In [11]:
IMG_SIZE = 224
BATCH_SIZE = 32
EPOCHS = 10
N_SPLIT = 7

# Training And Predition

In [None]:
train_datagen = ImageDataGenerator(rescale = 1./255,
                                   zoom_range=0.3, rotation_range=50,
 width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, 
 horizontal_flip=True, fill_mode="nearest")
validation_datagen = ImageDataGenerator(rescale = 1./255)


kfold = StratifiedKFold(n_splits=N_SPLIT,shuffle=True,random_state=42)
j = 0
for train_idx, val_idx in list(kfold.split(train_x,train_y)):
    x_train_df = df.iloc[train_idx]
    x_valid_df = df.iloc[val_idx]
    j+=1


    training_set = train_datagen.flow_from_dataframe(dataframe=x_train_df, directory=TRAIN_PATH,
                                                 x_col="Image", y_col="Class",
                                                 class_mode="categorical",
                                                 target_size=(IMG_SIZE,IMG_SIZE), batch_size=BATCH_SIZE)
    
    validation_set = validation_datagen.flow_from_dataframe(dataframe=x_valid_df, directory=TRAIN_PATH,
                                                 x_col="Image", y_col="Class",
                                                 class_mode="categorical",
                                                 target_size=(IMG_SIZE,IMG_SIZE), batch_size=BATCH_SIZE)
    
    model_test = get_model(IMG_SIZE)
    
    
    history = model_test.fit_generator( training_set,
                                        validation_data=validation_set,
                                        epochs = EPOCHS,
                                        steps_per_epoch=x_train_df.shape[0] // BATCH_SIZE
                                        )
    
    y_pred = []
    name = []                      
    labels = (training_set.class_indices)
    labels = dict((v,k) for k,v in labels.items())
    for i in os.listdir('../input/hackerearth-deep-learning-challenge-holidayseason/dataset/test/'):
        name.append(i)
        i='../input/hackerearth-deep-learning-challenge-holidayseason/dataset/test/'+i
        img=image.load_img(i,target_size=(IMG_SIZE,IMG_SIZE,3))
        img=image.img_to_array(img)/255
        pred=model_test.predict(img.reshape(1,IMG_SIZE,IMG_SIZE,3))
        y_pred.append(labels[np.argmax(pred[0])])
                                       
    data_kfold[j] = y_pred
    gc.collect()

Found 11089 validated image filenames belonging to 6 classes.
Found 1849 validated image filenames belonging to 6 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Found 11089 validated image filenames belonging to 6 classes.
Found 1849 validated image filenames belonging to 6 classes.




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
data_kfold

# Creating DataFrame

In [None]:
gc.collect()

In [None]:
name = []
for i in os.listdir('../input/hackerearth-deep-learning-challenge-holidayseason/dataset/test/'):
    name.append(i)
ans = pd.DataFrame(name,columns = ['Image'])

In [None]:
ans["Class"] = -1

In [None]:
# Taking The Label with Maximum Occurences

import collections 
for i in range(len(data_kfold)):
    co = collections.Counter(data_kfold.loc[i])
    
    co = sorted(co.items(),key=lambda x: x[1],reverse=True)
    ans.Class.loc[i] = co[0][0]

In [None]:
ans

## Submission

In [None]:
ans.to_csv('Kfold_submission.csv',index=False)