# Import Libraries

In [None]:
import sklearn
import keras
import sys
import time
import pandas as pd
import numpy as np
import nltk
import tensorflow as tf
import math
import cv2


from numpy import asarray
from numpy import clip

from PIL import Image

from keras.preprocessing.text import one_hot
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import LSTM, Activation, Dropout, Flatten, Dense, Bidirectional
from keras.layers.pooling import GlobalAveragePooling2D
from keras.layers import Conv2D, MaxPooling2D, MaxPooling1D, Input, BatchNormalization, concatenate
from keras.layers import Flatten
from keras.layers.embeddings import Embedding
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator

from matplotlib import pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_recall_fscore_support
from sklearn.utils import shuffle

import skimage
from skimage import io
from skimage.transform import resize
from skimage.util import random_noise

from numpy.random import RandomState

# Helper Functions

In [None]:
from nltk.stem import WordNetLemmatizer 
from nltk.corpus import stopwords
import re

lemmatizer = WordNetLemmatizer() 
stopwords = nltk.corpus.stopwords.words('english')

def preprocess(data):
    newData = []
    for title in data:
        title = re.sub(r'[0-9]+', '', title)
        new = " "
        for word in title.split(' '):
            
            if word not in stopwords:
                new += lemmatizer.lemmatize(word) + ' '
        newData.append(new)
        
    return newData

In [None]:
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision


def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [None]:
def getMaximumLen(data):
    maxL = 0
    for tweet in data:
        l = 0
        for word in tweet.split(' '):
            l += 1
        if (l>maxL):
            maxL = l    
    return maxL

In [None]:
def normalize_image(pixels):
    for test_image in data['Image Path']:
        pixels = pixels.astype('float32')
        mean, std = pixels.mean(), pixels.std()
        pixels = (pixels - mean) / std
        pixels = clip(pixels, 0, 1.0)
    return pixels

In [None]:
def resize_image(image_path,size):
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (size, size), interpolation=cv2.INTER_AREA)  
    return img

In [None]:
def data_augmentation(imagepath):
    images = []
    for i in range(len(imagepath)):
        image_mod = []
        # read image, img is numpy array
        img = resize_image(imagepath[i],74)
        # append initial image to X
        img = normalize_image(img)
        image_mod.append(img)
        
        # APPLY AUGMENTATIONS
        
        # rotate image in all directions
        image_mod.append(np.rot90(img))
        image_mod.append(np.rot90(np.rot90(img)))
        image_mod.append(np.rot90(np.rot90(np.rot90(img))))
        
        # flip image horizontally and vertically
        image_mod.append(np.fliplr(img)) #horizontal
        image_mod.append(np.flipud(img)) #vertical
        
        # add random noise to image
        image_mod.append(random_noise(img))
        
        # blur image
        image_mod.append(cv2.GaussianBlur(img, (11,11), 0))
        
        # zoom image
        lx, ly = img.shape[0], img.shape[1]
        cropped = img[lx // 4: - lx // 4, ly // 4: - ly // 4]
        image_mod.append(cv2.resize(cropped, (74, 74), interpolation=cv2.INTER_AREA))

        one = np.hstack((image_mod[0],image_mod[1],image_mod[2]))
        two = np.hstack((image_mod[3],image_mod[4],image_mod[5]))
        three = np.hstack((image_mod[6],image_mod[7],image_mod[8]))
        output = np.vstack((one, two, three))
        images.append(output)
        
    return images

# Load and Split Data

In [None]:
columns = ["Image Path", "Title", "Category ID", "Category"]
data = pd.read_csv('train.csv', encoding = "ISO-8859-1", header=None,
                   usecols=[1,3,6,5], names=columns)

In [None]:
data

In [None]:
paths = data['Image Path']
x = data['Title'].str.lower()
y = to_categorical(LabelEncoder().fit_transform(data['Category']), num_classes = 30 )
x = preprocess(x)

# LSTM Pre-Processing

In [None]:
x

In [None]:
t = Tokenizer()
t.fit_on_texts(x)

In [None]:
wordIndex = t.word_index

In [None]:
wordIndex

In [None]:
vocab_size = len(wordIndex) + 1

In [None]:
vocab_size

In [None]:
encodedX = t.texts_to_sequences(x)
maxlen = getMaximumLen(x)
X = pad_sequences(encodedX, maxlen=maxlen, padding='post')

In [None]:
from tqdm import tqdm

embedding_vector = {}
f = open('glove.6B.100d.txt')

In [None]:
import _pickle

embeddings_index = dict()
for line in tqdm(f):
    values = line.split(" ")
    key = values[0]
    c = np.asarray(values[1:], dtype='float32')
    embeddings_index[key] = c 


In [None]:
embeddings_index

In [None]:
embeddings_matrix = np.zeros((vocab_size, 100))

In [None]:
for word, i in t.word_index.items():
    vector = embeddings_index.get(word)
    if vector is not None:
         embeddings_matrix[i] = vector
    else:
        embeddings_matrix[i] = np.random.randn(100)

In [None]:
xtrain, xval, ytrain, yval, pathtrain, pathval = train_test_split(X, y, paths, train_size=0.8, random_state=1)

# CNN Pre-Processing

In [None]:
imagetrain = []
for i in pathtrain:
    img = resize_image('images/'+i,222)
    imagetrain.append(img)

In [None]:
imagetrain = np.array(imagetrain)

In [None]:
imageval = []
for i in pathval:
    img = resize_image('images/'+i,222)
    imageval.append(img)

In [None]:
imageval = np.array(imageval)

# Load Pre-trained VGG

In [None]:
# import tensorflow as tf
# config = tf.ConfigProto()
# config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
# config.gpu_options.per_process_gpu_memory_fraction = 0.9
# #config.log_device_placement = True 
# config.intra_op_parallelism_threads=16
# config.inter_op_parallelism_threads=16
# sess = tf.Session(config=config)

# from keras import backend as K
# K.set_session(sess)
# print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

In [None]:
image_size = 222

In [None]:
from keras import models
from keras import layers
from keras import optimizers
from keras import regularizers

In [None]:
from keras.applications.vgg16 import VGG16

base_model = VGG16(weights='imagenet', include_top=False, input_shape=(image_size, image_size, 3))

In [None]:
# Freeze all the layers
for layer in base_model.layers:
    layer.trainable = False

# Check the trainable status of the individual layers
for layer in base_model.layers:
    print(layer, layer.trainable)

# Multi-Model Training

In [None]:
import functools

top3_acc = functools.partial(keras.metrics.top_k_categorical_accuracy, k=3)

top3_acc.__name__ = 'top3_acc'

In [None]:
# CNN Branch

cnn_input = Input(shape=(222,222,3),name='cnn_input')
vgg = base_model()(cnn_input)
vgg = layers.Flatten()(vgg)
vgg = layers.BatchNormalization()(vgg)
vgg = layers.Dense(1024,  kernel_regularizer=regularizers.l2(0.001), activation='relu')(vgg)
vgg = layers.BatchNormalization()(vgg)
vgg = layers.Dropout(0.4)(vgg)
vgg = layers.Dense(512, activation='relu')(vgg)
vgg = layers.BatchNormalization()(vgg)
vgg = layers.Dropout(0.4)(vgg)
vgg = layers.Dense(256,  kernel_regularizer=regularizers.l2(0.001), activation='relu')(vgg)
vgg = layers.Dropout(0.5)(vgg)

# LSTM Branch

lstm_input = Input(shape=(maxlen,), name='lstm_input')
l0 = Embedding(vocab_size, 100, weights=[embeddings_matrix], name='l0')(lstm)
l1 = Bidirectional(LSTM(100, return_sequences=True, dropout=0.3, recurrent_dropout=0.3, name='l1'))(l0)
l2 = keras.layers.GlobalMaxPool1D()(name='l2')(l1)
l3 = Dense(50, activation="relu", name='l3')(l2)

merge = concatenate([vgg, l3],name='merge',axis=-1)
d1 = Dense(32, activation="relu", name='d1')(merge)
predictions = Dense(30, activation='softmax', name='predictions')(d1)

model = Model(inputs=[cnn_input, lstm_input], outputs=predictions)

model.summary()

In [None]:
optimizer = keras.optimizers.RMSprop()
model.compile(optimizer, loss='categorical_crossentropy', metrics=['accuracy',top3_acc, f1_m])

In [None]:
history = model.fit([imagetrain, xtrain], ytrain, 
                validation_data=([imageval, xval], yval), 
                epochs=10, batch_size=128, 
                callbacks=[EarlyStopping(monitor='val_loss', patience=10),
                           ModelCheckpoint(filepath='bestmodel.h5', monitor='val_loss', save_best_only=True)]
                   )

val_acc = history.history['val_acc']
val_f1_m = history.history['val_f1_m']
    
model.save("multimodel.h5")

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# Testing

In [None]:
test = pd.read_csv('test.csv', header=None, usecols=[1,2,3], names=columns)

In [None]:
test

In [None]:
pathtest = test['Image Path']
xtest = preprocess(test['Title'].str.lower())
ytest = to_categorical(LabelEncoder().fit_transform(test['Category']), num_classes=30)

In [None]:
imagetest = []
for i in pathtest:
    img = resize_image('iamges/'+i,222)
    img = normalize_image(img)
    imageval.append(img)

In [None]:
imagetest = np.array(imagetest)

In [None]:
Xtest = t.texts_to_sequences(xtest)
Xtest = pad_sequences(Xtest, maxlen=maxlen, padding='post')