In [1]:
import warnings 
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, MaxPool2D, BatchNormalization,Dropout
from tensorflow.keras.applications import VGG16
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping,ModelCheckpoint

In [2]:
sample_sub = pd.read_csv('/content/drive/MyDrive/MIIA Porthole/sample_submission.csv')
train_df = pd.read_csv('/content/drive/MyDrive/MIIA Porthole/train_ids_labels.csv')
test_df = pd.read_csv('/content/drive/MyDrive/MIIA Porthole/test_ids_only.csv')
data_folder = '/content/drive/MyDrive/MIIA Porthole/all_data/'

In [3]:
train_df['Image_ID'] = train_df['Image_ID'].apply(lambda x: x+'.JPG')
test_df['Image_ID'] = test_df['Image_ID'].apply(lambda x: x+'.JPG')

In [4]:
train_df['Label'] = train_df['Label'].astype('str')

In [5]:
train_df.head()

Unnamed: 0,Image_ID,Label
0,cVOfkSdqnWXUerr.JPG,0
1,EhnvIDPXFFjUhkR.JPG,0
2,tYKqoStvHsryFhS.JPG,1
3,eSpjlsZIwOMLmUS.JPG,1
4,uzxhIXjNENLyHwZ.JPG,1


In [6]:
train_df['Label'].nunique()

2

In [7]:
train_df.columns

Index(['Image_ID', 'Label'], dtype='object')

In [8]:
test_df.head()

Unnamed: 0,Image_ID
0,nRhHxqLhDlJsiXc.JPG
1,gbSntVovxgHQrmO.JPG
2,nqOobGmvgEOiPms.JPG
3,oIkPTooLcIbZKlF.JPG
4,eSKxsTTJDQzkjgD.JPG


In [9]:
datagen = ImageDataGenerator(rescale=1./255.,validation_split=0.15,horizontal_flip=True,vertical_flip=True,rotation_range=90,brightness_range=(0.8,1.2))
test_datagen = ImageDataGenerator(rescale=1./255.)

In [10]:
train_generator = datagen.flow_from_dataframe(
    subset='training',
    dataframe=train_df,
    directory=data_folder,
    x_col='Image_ID',
    y_col='Label',
    batch_size=64,
    class_mode='binary',
    seed=42,
    target_size=(128,128)   
)
validation_generator = datagen.flow_from_dataframe(
    subset='validation',
    dataframe=train_df,
    directory=data_folder,
    x_col="Image_ID",
    y_col="Label",
    batch_size=64,
    class_mode='binary',
    seed=42,
    target_size=(128,128)  
)
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    directory=data_folder,
    x_col='Image_ID',
    seed=42,
    batch_size=1,
    shuffle=False,
    class_mode=None,
    target_size=(128,128)  
)

Found 3423 validated image filenames belonging to 2 classes.
Found 603 validated image filenames belonging to 2 classes.
Found 1650 validated image filenames.


In [11]:
vgg_model = VGG16(input_shape=(128,128,3),
               weights='imagenet',
               include_top=False)


model = Sequential([
    BatchNormalization(input_shape=(128,128,3)),
    vgg_model,
    Flatten(),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy',optimizer=SGD(learning_rate=0.01),metrics=['accuracy'])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization (BatchNo (None, 128, 128, 3)       12        
_________________________________________________________________
vgg16 (Functional)           (None, 4, 4, 512)         14714688  
_________________________________________________________________
flatten (Flatten)            (None, 8192)              0         
_________________________________________________________________
dropout (Dropout)            (None, 8192)              0         
_________________________________________________________________
dense (Dense)                (None, 1)                 8193      
Total params: 14,722,893
Trainable params: 14,722,887
Non-trainable params: 6
_________________________________________________________________


In [13]:
early_stop = EarlyStopping(patience=4,monitor='val_loss')
modelcheckpoints = ModelCheckpoint(filepath='trainmodel.hdf5',save_best_only=True,save_weights_only=True)

In [14]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=validation_generator.n//validation_generator.batch_size
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size

In [33]:
model.fit(train_generator,
          steps_per_epoch=STEP_SIZE_TRAIN,
          validation_data=validation_generator,
          validation_steps=STEP_SIZE_VALID,
          epochs=20,
          callbacks=[early_stop,modelcheckpoints]
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20


<tensorflow.python.keras.callbacks.History at 0x7fe5d15bc358>

In [34]:
test_generator.reset()
result = model.predict_generator(test_generator,
                                steps=STEP_SIZE_TEST)

In [35]:
sample_sub['Label'] = result
# sample_sub.drop('Unnamed: 1',1,inplace=True)
sample_sub.to_csv('/content/drive/MyDrive/MIIA Porthole/sub4.csv',index=False)