In [1]:
import pandas as pd

In [2]:
train_df=pd.read_csv('../input/hackereath-holiday-season-deep-learning-contest/dataset/train.csv')

In [3]:
train_df.head()

Unnamed: 0,Image,Class
0,image3476.jpg,Miscellaneous
1,image5198.jpg,Candle
2,image4183.jpg,Snowman
3,image1806.jpg,Miscellaneous
4,image7831.jpg,Miscellaneous


In [4]:
train_df['Class'].value_counts()

Miscellaneous     2801
Christmas_Tree    1539
Jacket             640
Candle             593
Airplane           535
Snowman            361
Name: Class, dtype: int64

In [5]:
from sklearn.model_selection import StratifiedKFold
from keras.preprocessing import image

In [6]:
train_df['Kfold']=-1

In [7]:
train_df.head()

Unnamed: 0,Image,Class,Kfold
0,image3476.jpg,Miscellaneous,-1
1,image5198.jpg,Candle,-1
2,image4183.jpg,Snowman,-1
3,image1806.jpg,Miscellaneous,-1
4,image7831.jpg,Miscellaneous,-1


In [8]:
train_df=train_df.sample(frac=1).reset_index(drop=True)

In [9]:
train_df.tail()

Unnamed: 0,Image,Class,Kfold
6464,image8004.jpg,Miscellaneous,-1
6465,image7891.jpg,Miscellaneous,-1
6466,image6963.jpg,Miscellaneous,-1
6467,image6619.jpg,Miscellaneous,-1
6468,image9955.jpg,Miscellaneous,-1


In [10]:
y=train_df['Class']

In [11]:
kf=StratifiedKFold(n_splits=5)

In [12]:
for f,(t_,v_) in enumerate(kf.split(X=train_df,y=y)):
    train_df.loc[v_,'Kfold']=f

In [13]:
train_df.head()

Unnamed: 0,Image,Class,Kfold
0,image2443.jpg,Airplane,0
1,image7600.jpg,Snowman,0
2,image7401.jpg,Candle,0
3,image7468.jpg,Miscellaneous,0
4,image1619.jpg,Christmas_Tree,0


In [14]:
train=train_df[train_df['Kfold']!=4]

In [15]:
valid=train_df[train_df['Kfold']==4]

In [16]:
valid.tail()

Unnamed: 0,Image,Class,Kfold
6464,image8004.jpg,Miscellaneous,4
6465,image7891.jpg,Miscellaneous,4
6466,image6963.jpg,Miscellaneous,4
6467,image6619.jpg,Miscellaneous,4
6468,image9955.jpg,Miscellaneous,4


In [17]:
valid['Class'].value_counts()

Miscellaneous     561
Christmas_Tree    307
Jacket            128
Candle            118
Airplane          107
Snowman            72
Name: Class, dtype: int64

# As the data is large so it will be better to use datagenerator, so I am using keras Imagedatagenerator and we can do data augmentation in this step

In [18]:
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1./255)
#         shear_range=0.2,
#         zoom_range=0.2,
#         horizontal_flip=True,
#         width_shift_range=0.1,
#         height_shift_range=0.1)

train_generator=train_datagen.flow_from_dataframe(dataframe=train,
                                            directory="../input/hackereath-holiday-season-deep-learning-contest/dataset/train/",
                                            x_col="Image",
                                            y_col="Class",
                                            subset="training",
                                            batch_size=128,
                                            seed=42,
                                            shuffle=True,
                                            class_mode="categorical",
                                            target_size=(331,331))

Found 5176 validated image filenames belonging to 6 classes.


In [19]:
from keras.preprocessing.image import ImageDataGenerator

valid_datagen = ImageDataGenerator(rescale=1./255)
#         shear_range=0.2,
#         zoom_range=0.2,
#         horizontal_flip=True,
#         width_shift_range=0.1,
#         height_shift_range=0.1)

valid_generator=valid_datagen.flow_from_dataframe(dataframe=valid,
                                            directory="../input/hackereath-holiday-season-deep-learning-contest/dataset/train/",
                                            x_col="Image",
                                            y_col="Class",
                                            subset="training",
                                            batch_size=128,
                                            seed=42,
                                            shuffle=True,
                                            class_mode="categorical",
                                            target_size=(331,331))

Found 1293 validated image filenames belonging to 6 classes.


# Loading nasnet large model and setting all layers except last 35 as non trainable so we can generalise our model on our data and also do transfer learning for better results

In [20]:
from keras.applications.nasnet import NASNetLarge
# from keras.applications.resnet50 import preprocess_input,decode_predictions
from keras.layers import Input, Conv2D, MaxPool2D, Dense, Flatten
from keras.models import Model
from keras.utils import to_categorical

In [21]:
resnet=NASNetLarge(include_top=True,weights='imagenet')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/nasnet/NASNet-large.h5


In [22]:
x=resnet.layers[-2].output
fc1=Dense(6,activation='softmax')(x)

In [23]:
my_model=Model(inputs=resnet.input,outputs=fc1)

In [24]:
my_model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 331, 331, 3) 0                                            
__________________________________________________________________________________________________
stem_conv1 (Conv2D)             (None, 165, 165, 96) 2592        input_1[0][0]                    
__________________________________________________________________________________________________
stem_bn1 (BatchNormalization)   (None, 165, 165, 96) 384         stem_conv1[0][0]                 
__________________________________________________________________________________________________
activation (Activation)         (None, 165, 165, 96) 0           stem_bn1[0][0]                   
_______________________________________________________________________________________

In [25]:
from keras.optimizers import Adam

In [26]:
adam=Adam(learning_rate=0.0001)

In [27]:
for l in my_model.layers[:-5]:
    #print(l)
    l.trainable = False
my_model.compile(optimizer='adam',loss ="categorical_crossentropy",metrics=["accuracy"])

In [28]:
my_model.fit_generator(train_generator,steps_per_epoch=5176//128,validation_data=valid_generator,validation_steps=1293//128,epochs=2)

Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x7ff04ecf3350>

In [29]:
my_model.save('model.h5')

In [30]:
test_datagen = ImageDataGenerator(rescale=1./255)

In [31]:
test_generator = test_datagen.flow_from_directory(
    directory='../input/hackereath-holiday-season-deep-learning-contest/dataset/test/',
    target_size=(331, 331),
    color_mode="rgb",
    batch_size=1,
    class_mode=None,
    shuffle=False,
    seed=42
)

Found 0 images belonging to 0 classes.


# above test datagenerator code is not working, reason is that we must have another folder inside the test image folder that contains all test images for datagenerator(read_from_directory), So let's do this

In [32]:
import os

In [33]:
name=[]
y_pred=[]

In [34]:
labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())

In [35]:
import numpy as np

In [36]:
s=0
for i in os.listdir('../input/hackereath-holiday-season-deep-learning-contest/dataset/test/'):
    name.append(i)
    i='../input/hackereath-holiday-season-deep-learning-contest/dataset/test/'+i
    img=image.load_img(i,target_size=(331,331,3))
    img=image.img_to_array(img)/255
    pred=my_model.predict(img.reshape(1,331,331,3))
    y_pred.append(labels[np.argmax(pred[0])])
    s+=1
    if s%100==0:
        print(s)  

100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400


In [37]:
data=pd.DataFrame((zip(name,y_pred)),columns=['Image','Class'])

In [38]:
data.head()

Unnamed: 0,Image,Class
0,image7761.jpg,Miscellaneous
1,image3202.jpg,Miscellaneous
2,image688.jpg,Snowman
3,image233.jpg,Candle
4,image4332.jpg,Christmas_Tree


In [39]:
data.to_csv('result.csv',index=False)


In [40]:
data.shape

(3489, 2)