In [2]:
# Imports
import pyarrow.parquet as pq
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
import cv2, os, gc, datetime, json

# os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

import tensorflow as tf
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpu_devices[0], True)

# Keras imports
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model, load_model, model_from_json
from tensorflow.keras.models import clone_model
from tensorflow.keras.layers import Dense, Conv2D, Flatten, GlobalAveragePooling2D, MaxPool2D, Dropout, BatchNormalization, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
from tensorflow.keras.applications.resnet_v2 import ResNet50V2, ResNet152V2
from tensorflow.keras.applications.resnet import ResNet50, ResNet152
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.nasnet import NASNetLarge, NASNetMobile
from tensorflow.keras.metrics import Accuracy, Precision, Recall
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, TensorBoard, ModelCheckpoint
from tensorflow.keras.optimizers import Adam, SGD, RMSprop, Adadelta, Nadam, Adagrad

In [3]:
test_path = 'data/images/test/'
train_path = 'data/images/train/'

HEIGHT = 137
WIDTH = 236
SIZE = 224
BATCH_SIZE = 128

MODEL_PATH = "saved_model/CustomResNet_4_19-01-2020_08:47:03.cfg"
WEIGHTS_PATH = "saved_model/CustomResNet_4_19-01-2020_08:47:03.h5"

In [4]:
test_csv_path = "./data/train.csv"
df = pd.read_csv(test_csv_path)

In [5]:
with open(MODEL_PATH, 'r') as f:
    model_config = f.read()
model_config = json.loads(model_config)
model = Model.from_config(model_config)
model.load_weights(WEIGHTS_PATH)
print("Cheers! Model Loaded!")

Cheers! Model Loaded!


In [6]:
# Model Summary
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 1) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 224, 224, 32) 320         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 224, 224, 32) 128         conv2d[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu (LeakyReLU)         (None, 224, 224, 32) 0           batch_normalization[0][0]        
______________________________________________________________________________________________

In [28]:
%%time
df['target'] = 1
for index in tqdm(range(0, len(df), 3)):
    filename = df.iloc[index]['image_id']
    path = os.path.join(train_path, f"{filename}.jpg")
    image = cv2.resize(cv2.imread(path, 0), (SIZE, SIZE), cv2.INTER_AREA)/255
    image = np.expand_dims(image, axis=2)
    root, vowel, consonant = model.predict(np.expand_dims(image, axis=0))
    i = index
    df.set_value(i, 'target', np.argmax(consonant))
    df.set_value(i+1, 'target', np.argmax(root))
    df.set_value(i+2, 'target', np.argmax(vowel))

HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


CPU times: user 152 ms, sys: 20.1 ms, total: 173 ms
Wall time: 164 ms


  if __name__ == '__main__':
  # Remove the CWD from sys.path while we load stuff.
  # This is added back by InteractiveShellApp.init_path()


In [14]:
%%time

BATCH_SIZE = 2048

target_preds = list()
im_paths = list()
for index in tqdm(range(0, len(df), 3)):
    filename = df.iloc[index]['image_id']
    path = os.path.join(train_path, f"{filename}.jpg")
    im_paths.append(path)

temp_list = list()
for im_path in tqdm(im_paths):
    image = cv2.resize(cv2.imread(im_path, 0), (SIZE, SIZE), cv2.INTER_AREA)/255
    image = np.expand_dims(image, axis=2)
    temp_list.append(image)
    
    if len(temp_list) == BATCH_SIZE:
        image_batch = np.array(temp_list)
        roots, vowels, consonants = model.predict(image_batch)
        
        for consonant, root, vowel in zip(consonants, roots, vowels):
            target_preds.extend([np.argmax(consonant), np.argmax(root), np.argmax(vowel)])
    
        temp_list = list()

image_batch = np.array(temp_list)
roots, vowels, consonants = model.predict(image_batch)

for consonant, root, vowel in zip(consonants, roots, vowels):
    target_preds.extend([np.argmax(consonant), np.argmax(root), np.argmax(vowel)])
    
df['target'] = target_preds[:-1]

HBox(children=(FloatProgress(value=0.0, max=66947.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=66947.0), HTML(value='')))


CPU times: user 6h 1min 22s, sys: 24min 41s, total: 6h 26min 4s
Wall time: 31min 53s


In [61]:
df.drop(['image_id', 'component'], axis=1, inplace=True)

In [63]:
df.to_csv('submissions.csv', index=False)