Some imports...

In [1]:
from PIL import Image
import glob
import numpy as np
import pickle as p

import keras
from keras import layers
from keras.models import Model
from keras.applications.resnet50 import preprocess_input
from keras import applications
from keras import optimizers

Using TensorFlow backend.


Load, preprocess and shuffle data...

In [2]:
files_clean = glob.glob('Datasets/OkSignLanguage/clean_pics/*.jpg')
files_garbage = glob.glob('Datasets/OkSignLanguage/garbage_pics/*.jpg')

img_clean = np.zeros([len(files_clean), 224, 224, 3])
for i in range(len(img_clean)):
    img_clean[i] = Image.open(files_clean[i])

img_garbage = np.zeros([len(files_clean), 224, 224, 3])
for i in range(len(img_clean)):
    img_garbage[i] = Image.open(files_garbage[i])

targets = np.concatenate((np.ones(len(files_clean)), np.zeros(len(files_clean))), axis=0)
data = np.concatenate((img_clean, img_garbage), axis=0)

data = preprocess_input(data)

rnd_index = np.random.permutation(np.arange(len(targets)))
data = data[rnd_index]
targets = targets[rnd_index]

Defining the network. Here we can also add new architectures...

In [7]:
class BaseModel(Model):
    '''Simplistic model with base of pretrained resnet50 followed by one 2-Layer prediction head'''
    def __init__(self, input_shape, dim_fc_layer):
        
        resnet = applications.resnet50.ResNet50(
            include_top=False, weights='imagenet',input_shape=input_shape)
        
        for layer in resnet.layers:
                layer.trainable = False
        
        input_layer = layers.Input(input_shape, name='Input')
        fc_layer_input = resnet(input_layer)
        
        fc_layer_input = layers.Flatten()(fc_layer_input)
        
        for i in range(len(dim_fc_layer)):
            fc_layer_input = layers.Dense(
                units=dim_fc_layer[i],
                name='FullyConnected' + str(i),
                activation='relu'
                )(fc_layer_input)
            
        predictions = layers.Dense(
                units=1,
                name='OutputFC',
                activation='sigmoid'
                )(fc_layer_input)
        
        super().__init__(inputs=[input_layer], outputs=predictions)
        
    def compile(self, lr=0.001):
        return super().compile(
            optimizer=optimizers.Adam(lr=lr),
            loss='binary_crossentropy'
        )
        
        

Load model...

In [8]:
model = BaseModel(data[0].shape, [1000])
model.compile()

Training the model...

In [10]:
history = [] # history of training for later plotting 
N = len(targets)
batch_size = 5

last_index = 0 # index of last batch
for i in range(N):
    if (i % batch_size == 0 and i != 0) or i == N - 1:
        
        pred_score = model.predict(data[last_index:i])
        preds = np.reshape(pred_score > 0.5, (len(targets[last_index:i])))
        
        acc = np.sum(preds == targets[last_index:i]) / len(targets[last_index:i])
        print('Sample ' + str(last_index) + '-' + str(i-1) + ' | Accuracy: ' + str(acc))
        history.append([acc, targets[last_index:i], preds, pred_score])
        
        # doing the actual training on the current batch
        model.fit(
            data[last_index:i],
            targets[last_index:i],
            epochs=1,
            batch_size=5
        )
        
        last_index = i

Sample 0-4 | Accuracy: 0.2
(5, 224, 224, 3)
(5, 1)
Epoch 1/1
Sample 5-9 | Accuracy: 0.4
(5, 224, 224, 3)
(5, 1)
Epoch 1/1
Sample 10-14 | Accuracy: 0.8
(5, 224, 224, 3)
(5, 1)
Epoch 1/1
Sample 15-19 | Accuracy: 0.6
(5, 224, 224, 3)
(5, 1)
Epoch 1/1


KeyboardInterrupt: 

In [9]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Input (InputLayer)           (None, 224, 224, 3)       0         
_________________________________________________________________
resnet50 (Model)             (None, 1, 1, 2048)        23587712  
_________________________________________________________________
flatten_1 (Flatten)          (None, 2048)              0         
_________________________________________________________________
FullyConnected0 (Dense)      (None, 1000)              2049000   
_________________________________________________________________
OutputFC (Dense)             (None, 1)                 1001      
Total params: 25,637,713
Trainable params: 2,050,001
Non-trainable params: 23,587,712
_________________________________________________________________
