In [1]:
import numpy as np
import pandas as pd


In [2]:
def load_df(path):
    train_df = pd.read_csv(path)
    X = train_df['id']
    y = train_df['landmark_id']
    
    return train_df, X, y

path = 'train_200.csv'

train_df, X, y = load_df(path)

In [3]:
NUM_CLASSES = train_df['landmark_id'].unique().shape[0]
NUM_CLASSES

1066

In [4]:
# map selected landmark ids to classes
# {landmark_id: class} 
# {995:0, 12345:1, ....}
landmarks = train_df['landmark_id'].unique()
landmark_to_idx = {}
i = 0
for k in landmarks:
    landmark_to_idx[k] = i
    i += 1

In [5]:
from sklearn.model_selection import StratifiedShuffleSplit

# split train & (validation + test)
split_rule = StratifiedShuffleSplit(n_splits=1, test_size=0.22, random_state=9)

for train_id, test_id in split_rule.split(X, y):
    X_train, X_val_test = X.iloc[train_id], X.iloc[test_id]
    y_train, y_val_test = y.iloc[train_id], y.iloc[test_id]

# split validation & test
split_rule2 = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=9)
for train_id, test_id in split_rule2.split(X_val_test, y_val_test):
    X_valid, X_test = X_val_test.iloc[train_id], X_val_test.iloc[test_id]
    y_valid, y_test = y_val_test.iloc[train_id], y_val_test.iloc[test_id]

In [6]:
print('X_train shape: ', X_train.shape)
print('y_train shape: ', y_train.shape)
print('X_valid shape: ', X_valid.shape)
print('y_valid shape: ', y_valid.shape)
print('X_test shape: ', X_test.shape)
print('y_test shape: ', y_test.shape)

X_train shape:  (630286,)
y_train shape:  (630286,)
X_valid shape:  (88887,)
y_valid shape:  (88887,)
X_test shape:  (88887,)
y_test shape:  (88887,)


In [7]:
valid_id_list = list(zip(list(X_valid), list(y_valid)))
train_id_list = list(zip(list(X_train), list(y_train)))

In [8]:
import os
import random
import shutil
import tarfile
import cv2
import numpy as np
#from keras.utils import Sequence
from tensorflow.python.keras.utils.data_utils import Sequence
#import keras



class DataGen(Sequence):
    def __init__(self, id_list, landmark_to_idx, batch_size=128, verbose=1):
        self.batch_size=batch_size
        self.id_list = id_list
        self.landmark_to_idx = landmark_to_idx


    def __getitem__(self, index):
        batch_id_list = random.sample(self.id_list, self.batch_size)
        landmark_to_idx = self.landmark_to_idx
        #num_classes = self.num_classes
        
        output = []
        label_idx = []
        for ix, ids in enumerate(batch_id_list):
            img_id = ids[0]
            ldmk_id = ids[1]
            path = 'train/'+str(ldmk_id)+'/'+img_id+'.jpg'
            try: 
                im = cv2.imread(path)
                im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
                if im.size != 0:
                    output.append(im)
                    ldmk_idx = landmark_to_idx[ldmk_id]
                    label_idx.append(ldmk_idx)
            except:
                continue
        
        x = np.array(output)
        y = np.zeros((len(output), NUM_CLASSES))
        for i in range(len(label_idx)):
            y[i,label_idx[i]] = 1.
        
        return x,y
            
    def on_epoch_end(self):
        return

    def __len__(self):
        #return len(valid_urls_list) // self.batch_size
        return 10

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [9]:
validation_generator = DataGen(valid_id_list, landmark_to_idx)
training_generator = DataGen(train_id_list, landmark_to_idx)

In [10]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Input, Flatten
from tensorflow.keras.optimizers import Adam
print(tf.__version__)

2.0.0-alpha0


In [11]:
from tensorflow.keras.applications.vgg16 import VGG16
vgg16 = VGG16(include_top=False, weights='imagenet', input_shape=(128, 128, 3))

In [36]:
for layer in vgg16.layers[:5]:
    layer.trainable = False

x = vgg16.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(NUM_CLASSES, activation="softmax")(x)

model = Model(vgg16.input, predictions)
    
model.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['categorical_accuracy'])

model.summary()

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 128, 128, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 128, 128, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 128, 128, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 64, 64, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 64, 64, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 64, 64, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 32, 32, 128)       0   

In [37]:
model.fit_generator(generator=training_generator,
                    validation_data=validation_generator,
                    use_multiprocessing=True, 
                    epochs=80,
                    workers=8,
                    verbose=1)

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80


Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80

error: OpenCV(4.1.1) /io/opencv/modules/imgproc/src/color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'


In [12]:
from tensorflow.keras.applications.resnet50 import ResNet50

resnet = ResNet50(include_top=False, weights='imagenet', input_shape=(128, 128, 3))



In [13]:
for layer in resnet.layers[-5:]:
    layer.trainable = False
    
out = Flatten()(resnet.output)
out = Dense(NUM_CLASSES, activation='softmax')(out)
model2 = Model(resnet.input, out)
model2.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 128, 128, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 134, 134, 3)  0           input_2[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 64, 64, 64)   9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalizationV1) (None, 64, 64, 64)   256         conv1[0][0]                      
______________________________________________________________________________________________

In [14]:
opt = Adam(0.0001)
model2.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['categorical_accuracy'])
model2.fit_generator(generator=training_generator,
                    validation_data=validation_generator,
                    epochs=80,
                    use_multiprocessing=True,
                    workers=8,
                    verbose=1)

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80


<tensorflow.python.keras.callbacks.History at 0x7f9f427c1910>