In [55]:
#Import library
from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras.layers import Dense, Activation, Conv2D, Flatten, Dropout, MaxPooling2D, MaxPool2D, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras import regularizers
from tensorflow.keras import optimizers
from sklearn.metrics import f1_score
from tensorflow.keras import layers
from tensorflow import keras
from keras.callbacks import ModelCheckpoint
from tensorflow.keras import regularizers

import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import zipfile
import tensorflow as tf

In [56]:
# #Extract compressed dataset
# with zipfile.ZipFile('/content/train.zip', 'r') as zip_ref:
#     zip_ref.extractall('/content/')

# with zipfile.ZipFile('/content/test.zip', 'r') as zip_ref:
#     zip_ref.extractall('/content/')

In [57]:
#Load dataset
traindf=pd.read_csv('../input/socs-hackathon-ai-preliminary-phase/train.csv',dtype=str)
testdf=pd.read_csv('../input/socs-hackathon-ai-preliminary-phase/test.csv',dtype=str)

In [58]:
#Create data generator
#Divide train data into train and validation data
# img_size = (224, 224)
datagen=ImageDataGenerator(
        rescale=1. / 255,
#         horizontal_flip=True,
#         vertical_flip=True,
#         zoom_range=0.1,
        validation_split=0.2)

train_generator=datagen.flow_from_dataframe(
      dataframe=traindf,
      directory="../input/socs-hackathon-ai-preliminary-phase/train/train",
      x_col="data",
      y_col="label",
      subset="training",
      batch_size=32,
      seed=42,
#       image_size=img_size,
      shuffle=True,
      class_mode="categorical",
      target_size=(224, 224))

valid_generator=datagen.flow_from_dataframe(
      dataframe=traindf,
      directory="../input/socs-hackathon-ai-preliminary-phase/train/train",
      x_col="data",
      y_col="label",
      subset="validation",
      batch_size=32,
      seed=42,
#       image_size=img_size,
      shuffle=True,
      class_mode="categorical",
      target_size=(224, 224))

test_datagen=ImageDataGenerator(rescale=1./255.)

test_generator=test_datagen.flow_from_dataframe(
      dataframe=testdf,
      directory="../input/socs-hackathon-ai-preliminary-phase/test/test",
      x_col="data",
      y_col=None,
      batch_size=1,
      seed=42,
#       image_size=img_size,
      shuffle=False,
      class_mode=None,
      target_size=(224, 224))

Found 11152 validated image filenames belonging to 11 classes.
Found 2788 validated image filenames belonging to 11 classes.
Found 9829 validated image filenames.


In [59]:
#Create evaluation metric function (F1-score)
from keras import backend as K

def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))


In [60]:
# AUTOTUNE = tf.data.AUTOTUNE

# train_ds = train_generator.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
# val_ds = val_generator.cache().prefetch(buffer_size=AUTOTUNE)
# test_ds = test_generator.cache().prefetch(buffer_size=AUTOTUNE)

In [61]:
# data_augmentation = keras.Sequential(
#   [
#     layers.RandomFlip("horizontal"),
#     layers.RandomRotation(0.1),
#     layers.RandomZoom(0.1),
#   ]
# )

In [62]:
# preprocess_input = keras.applications.vgg16.preprocess_input

In [63]:
IMG_SHAPE = (224, 224, 3)
base_model = keras.applications.vgg16.VGG16(input_shape=IMG_SHAPE,
                                               include_top=False,
                                               weights='imagenet')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [64]:
base_model.trainable = True

In [65]:
#Define the machine learning model
inputs = keras.Input(shape=(224, 224, 3))
# x = data_augmentation(inputs)
# x = preprocess_input(inputs)
x = base_model(inputs)
x = layers.Flatten(name="flatten")(x)
x = layers.Dense(1024, activation="relu")(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(11, activation="softmax")(x)

model1 = keras.Model(inputs, outputs)

In [66]:
model1.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
vgg16 (Functional)           (None, 7, 7, 512)         14714688  
_________________________________________________________________
flatten (Flatten)            (None, 25088)             0         
_________________________________________________________________
dense_2 (Dense)              (None, 1024)              25691136  
_________________________________________________________________
dropout_1 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 11)                11275     
Total params: 40,417,099
Trainable params: 40,417,099
Non-trainable params: 0
_______________________________________________

In [67]:
model1.compile(optimizers.Adam(learning_rate=0.0001, decay=1e-6),loss="categorical_crossentropy",metrics=["accuracy", f1_m])

In [68]:
checkpoint = ModelCheckpoint("model1_weights.h5", monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

In [69]:
#Train the model
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
hist1=model1.fit(train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=valid_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=50,
                    callbacks = callbacks_list
)

Epoch 1/50

Epoch 00001: val_accuracy improved from -inf to 0.77299, saving model to model1_weights.h5
Epoch 2/50

Epoch 00002: val_accuracy improved from 0.77299 to 0.84734, saving model to model1_weights.h5
Epoch 3/50

Epoch 00003: val_accuracy improved from 0.84734 to 0.87105, saving model to model1_weights.h5
Epoch 4/50

Epoch 00004: val_accuracy improved from 0.87105 to 0.88326, saving model to model1_weights.h5
Epoch 5/50

Epoch 00005: val_accuracy improved from 0.88326 to 0.91056, saving model to model1_weights.h5
Epoch 6/50

Epoch 00006: val_accuracy did not improve from 0.91056
Epoch 7/50

Epoch 00007: val_accuracy improved from 0.91056 to 0.91990, saving model to model1_weights.h5
Epoch 8/50

Epoch 00008: val_accuracy did not improve from 0.91990
Epoch 9/50

Epoch 00009: val_accuracy did not improve from 0.91990
Epoch 10/50

Epoch 00010: val_accuracy did not improve from 0.91990
Epoch 11/50

Epoch 00011: val_accuracy did not improve from 0.91990
Epoch 12/50

Epoch 00012: val_

In [70]:
IMG_SHAPE = (224, 224, 3)
base_model2 = keras.applications.mobilenet.MobileNet(input_shape=IMG_SHAPE,
                                               include_top=False,
                                               weights='imagenet')

In [71]:
base_model2.trainable = True

In [72]:
#Define the machine learning model
inputs = keras.Input(shape=(224, 224, 3))
# x = data_augmentation(inputs)
# x = preprocess_input(inputs)
x = base_model2(inputs)
x = layers.Flatten(name="flatten")(x)
x = layers.Dense(1024, activation="relu")(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(11, activation="softmax")(x)

model2 = keras.Model(inputs, outputs)

In [73]:
# model2 = Sequential()
# # model.add(keras.applications.ResNet50(include_top=False, weights='imagenet', input_shape=(224, 224, 3)))
# model2.add(Conv2D(32, (2, 2), padding='same',
#                  input_shape=(224,224,3)))
# model2.add(Activation('relu'))
# model2.add(BatchNormalization())
# model2.add(Conv2D(32, (2, 2)))
# model2.add(Activation('relu'))
# model2.add(BatchNormalization())
# model2.add(MaxPooling2D(pool_size=(2, 2)))
# model2.add(Dropout(0.25))
# model2.add(Conv2D(64, (2, 2), padding='same'))
# model2.add(Activation('relu'))
# model2.add(BatchNormalization())
# model2.add(Conv2D(64, (2, 2)))
# model2.add(Activation('relu'))
# model2.add(BatchNormalization())
# model2.add(MaxPooling2D(pool_size=(2, 2)))
# model2.add(Dropout(0.25))
# model2.add(Flatten())
# model2.add(Dense(512))
# model2.add(Activation('relu'))
# model2.add(Dropout(0.5))
# model2.add(Dense(11, activation='softmax'))

In [74]:
model2.summary()

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
mobilenet_1.00_224 (Function (None, 7, 7, 1024)        3228864   
_________________________________________________________________
flatten (Flatten)            (None, 50176)             0         
_________________________________________________________________
dense_4 (Dense)              (None, 1024)              51381248  
_________________________________________________________________
dropout_2 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_5 (Dense)              (None, 11)                11275     
Total params: 54,621,387
Trainable params: 54,599,499
Non-trainable params: 21,888
__________________________________________

In [75]:
model2.compile(optimizers.Adam(learning_rate=0.0001, decay=1e-6),loss="categorical_crossentropy",metrics=["accuracy", f1_m])

In [76]:
checkpoint = ModelCheckpoint("model2_weights.h5", monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

In [77]:
#Train the model
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
hist2=model2.fit(train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=valid_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=50,
                    callbacks = callbacks_list
)

Epoch 1/50

Epoch 00001: val_accuracy improved from -inf to 0.77730, saving model to model2_weights.h5
Epoch 2/50

Epoch 00002: val_accuracy improved from 0.77730 to 0.85309, saving model to model2_weights.h5
Epoch 3/50

Epoch 00003: val_accuracy improved from 0.85309 to 0.86746, saving model to model2_weights.h5
Epoch 4/50

Epoch 00004: val_accuracy improved from 0.86746 to 0.88182, saving model to model2_weights.h5
Epoch 5/50

Epoch 00005: val_accuracy improved from 0.88182 to 0.89116, saving model to model2_weights.h5
Epoch 6/50

Epoch 00006: val_accuracy improved from 0.89116 to 0.89332, saving model to model2_weights.h5
Epoch 7/50

Epoch 00007: val_accuracy did not improve from 0.89332
Epoch 8/50

Epoch 00008: val_accuracy did not improve from 0.89332
Epoch 9/50

Epoch 00009: val_accuracy improved from 0.89332 to 0.90050, saving model to model2_weights.h5
Epoch 10/50

Epoch 00010: val_accuracy improved from 0.90050 to 0.90553, saving model to model2_weights.h5
Epoch 11/50

Epoch 0

In [92]:
IMG_SHAPE = (224, 224, 3)
base_model3 = keras.applications.resnet50.ResNet50(input_shape=IMG_SHAPE,
                                               include_top=False,
                                               weights='imagenet')

In [93]:
base_model3.trainable = True

In [94]:
#Define the machine learning model
inputs = keras.Input(shape=(224, 224, 3))
# x = data_augmentation(inputs)
# x = preprocess_input(inputs)
x = base_model3(inputs)
x = layers.Flatten(name="flatten")(x)
x = layers.Dense(1024, activation="relu")(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(11, activation="softmax")(x)

model3 = keras.Model(inputs, outputs)

In [95]:
model3.summary()

Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_9 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
resnet50 (Functional)        (None, 7, 7, 2048)        23587712  
_________________________________________________________________
flatten (Flatten)            (None, 100352)            0         
_________________________________________________________________
dense_6 (Dense)              (None, 1024)              102761472 
_________________________________________________________________
dropout_3 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_7 (Dense)              (None, 11)                11275     
Total params: 126,360,459
Trainable params: 126,307,339
Non-trainable params: 53,120
________________________________________

In [96]:
model3.compile(optimizers.Adam(learning_rate=0.0001, decay=1e-6),loss="categorical_crossentropy",metrics=["accuracy", f1_m])

In [97]:
checkpoint = ModelCheckpoint("model3_weights.h5", monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

In [98]:
#Train the model
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
hist3=model3.fit(train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=valid_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=50,
                    callbacks = callbacks_list
)

Epoch 1/50

Epoch 00001: val_accuracy improved from -inf to 0.23635, saving model to model3_weights.h5




Epoch 2/50

Epoch 00002: val_accuracy did not improve from 0.23635
Epoch 3/50

Epoch 00003: val_accuracy improved from 0.23635 to 0.59842, saving model to model3_weights.h5
Epoch 4/50

Epoch 00004: val_accuracy improved from 0.59842 to 0.83800, saving model to model3_weights.h5
Epoch 5/50

Epoch 00005: val_accuracy improved from 0.83800 to 0.87213, saving model to model3_weights.h5
Epoch 6/50

Epoch 00006: val_accuracy improved from 0.87213 to 0.91200, saving model to model3_weights.h5
Epoch 7/50

Epoch 00007: val_accuracy improved from 0.91200 to 0.92170, saving model to model3_weights.h5
Epoch 8/50

Epoch 00008: val_accuracy improved from 0.92170 to 0.92780, saving model to model3_weights.h5
Epoch 9/50

Epoch 00009: val_accuracy did not improve from 0.92780
Epoch 10/50

Epoch 00010: val_accuracy did not improve from 0.92780
Epoch 11/50

Epoch 00011: val_accuracy did not improve from 0.92780
Epoch 12/50

Epoch 00012: val_accuracy did not improve from 0.92780
Epoch 13/50

Epoch 00013: 

In [78]:
model1.load_weights("./model1_weights.h5")

In [79]:
model2.load_weights("./model2_weights.h5")

In [99]:
model3.load_weights("./model3_weights.h5")

In [80]:
#Generate evaluation metric (validation loss, validation accuracy, validation f1-score)
model1.evaluate_generator(generator=valid_generator,steps=STEP_SIZE_VALID)



[0.2884693741798401, 0.9461206793785095, 0.9464455842971802]

In [81]:
#Generate evaluation metric (validation loss, validation accuracy, validation f1-score)
model2.evaluate_generator(generator=valid_generator,steps=STEP_SIZE_VALID)

[0.3655073046684265, 0.9443247318267822, 0.9443247318267822]

In [100]:
#Generate evaluation metric (validation loss, validation accuracy, validation f1-score)
model3.evaluate_generator(generator=valid_generator,steps=STEP_SIZE_VALID)



[0.3197309374809265, 0.9461206793785095, 0.9461206793785095]

In [107]:
from sklearn.metrics import accuracy_score

models = [model1, model2, model3]

preds = [model.predict(test_generator) for model in models]
preds = np.array(preds)
summed = np.sum(preds, axis=0)

# argmax across classes
ensemble_prediction = np.argmax(summed, axis=1)

In [108]:
#Define the labels prediction based on the index predictions
labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in ensemble_prediction]

In [109]:
#Save the prediction using submission format
filenames=test_generator.filenames
results=pd.DataFrame({"data":filenames,"label":predictions})
results.to_csv("results.csv",mode='w', columns=['label'], index_label='index', index=True)