In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras import Model, Sequential
from keras.layers import Input, GlobalAveragePooling2D, BatchNormalization, Dropout, Dense
from keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.applications import EfficientNetB3, ResNet152V2
from tensorflow.keras.applications.resnet_v2 import preprocess_input

In [None]:
# data paths
data_path = '../input/state-farm-distracted-driver-detection'
train_path = os.path.join(data_path, 'imgs/train')
test_path = os.path.join(data_path, 'imgs/test')

In [None]:
# hyperparameters
BATCH_SIZE = 32
IMAGE_SIZE = (224, 224)
NUM_CLASSES = 10

In [None]:
# train and validation datasets
train_gen = ImageDataGenerator(
    width_shift_range=0.3,
    height_shift_range=0.3,
    shear_range=0.3,
    zoom_range=0.4,
    validation_split=0.2
)

test_gen = ImageDataGenerator()

train_ds = train_gen.flow_from_directory(
    train_path,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    seed=123,
    subset='training'
)

val_ds = train_gen.flow_from_directory(
    train_path,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    seed=123,
    subset='validation'
)

In [None]:
# define transfer learning model, can change the architecture or underlying model here
def create_model():
    input_layer = Input(shape=(224,224,3))
    proc_layer = preprocess_input(input_layer)
    net = ResNet152V2(include_top=False, weights='imagenet')(proc_layer)
    x = GlobalAveragePooling2D()(net)
    x = BatchNormalization()(x)
    x = Dropout(0.2)(x)
    output_layer = Dense(units=NUM_CLASSES, activation='softmax')(x)
    
    model = Model(inputs=input_layer, outputs=output_layer)
    return model

In [None]:
# build model
model = create_model()
model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=1e-4),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

In [None]:
# define callbacks
cp_callback = ModelCheckpoint(
    'model_weights',
    save_best_only=True,
    monitor='val_loss',
    mode='min',
    verbose=1
)
es_callback = EarlyStopping(
    monitor='val_loss',
    patience=5,
    verbose=1
)

In [None]:
# train the model
history = model.fit(
    train_ds,
    epochs=50,
    validation_data=val_ds,
    callbacks=[es_callback, cp_callback]
)

In [None]:
# plot loss and accuracy
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Loss')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['train', 'val'])

plt.subplot(1,2,2)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Accuracy')
plt.xlabel('epoch')
plt.ylabel('acc')
plt.legend(['train', 'val'])

In [None]:
# create test dataset

test_path = os.path.join(data_path, 'imgs')

test_ds = test_gen.flow_from_directory(
    test_path,
    shuffle=False,
    target_size=IMAGE_SIZE,
    classes=['test'],
    batch_size=BATCH_SIZE
)

In [None]:
# predict on test data
preds = model.predict(test_ds)

In [None]:
# save predictions to submission file
test_imgs = os.path.join(data_path, "imgs/test")

test_fns = sorted(os.listdir(test_imgs))
pred_df = pd.DataFrame(columns=['img', 'c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'])
for i in range(len(preds)):
    pred_df.loc[i, 'img'] = test_fns[i]
    pred_df.loc[i, 'c0':'c9'] = preds[i]
    
pred_df.to_csv('submission.csv', index=False)