In [20]:
import numpy as np
import os
import pickle
from tqdm import tqdm
import keras
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.layers import Input, Dense, add, LSTM, Embedding, Dropout, Conv2D, MaxPooling2D, BatchNormalization, Flatten
from tensorflow.keras.models import Model, Sequential
import tensorflow as tf

In [21]:
base_dir = r'D:\University Files\Assignments\7th Semester\Machine Learning\Project\VizWiz Dataset'
train_img_dir = r'D:\University Files\Assignments\7th Semester\Machine Learning\Project\VizWiz Dataset\train'
val_img_dir = r'D:\University Files\Assignments\7th Semester\Machine Learning\Project\VizWiz Dataset\val'
test_img_dir = r'D:\University Files\Assignments\7th Semester\Machine Learning\Project\VizWiz Dataset\test'
train_annot_dir = r'D:\University Files\Assignments\7th Semester\Machine Learning\Project\VizWiz Dataset\annotations\train.json'
val_annot_dir = r'D:\University Files\Assignments\7th Semester\Machine Learning\Project\VizWiz Dataset\annotations\val.json'
test_annot_dir = r'D:\University Files\Assignments\7th Semester\Machine Learning\Project\VizWiz Dataset\annotations\test.json'

In [22]:
import json

with open(train_annot_dir, 'r') as file:
    train_dict = json.load(file)

with open(val_annot_dir, 'r') as file:
    val_dict = json.load(file)

with open(test_annot_dir, 'r') as file:
    test_dict = json.load(file)

In [14]:
train_img_mapping = {}

for indiv_dict in train_dict['images']:
    img_name = indiv_dict['file_name']
    img_id = indiv_dict['id']
    train_img_mapping[img_name] = img_id

In [23]:
val_img_mapping = {}

for indiv_dict in val_dict['images']:
    img_name = indiv_dict['file_name']
    img_id = indiv_dict['id']
    val_img_mapping[img_name] = img_id

Initial Convolutional Neural Network

In [24]:
HEIGHT = 224
WIDTH = 224

shape = (HEIGHT, WIDTH, 3)

def define_model(neurons, dense_layers, bn, dropouts):
  model = Sequential()

  for i, nodes in enumerate(neurons):
    if i == 0:
      model.add(
          Conv2D(nodes, (3, 3), input_shape=shape, activation='relu'))
      model.add(MaxPooling2D(pool_size=(2, 2)))
      if bn:
        model.add(BatchNormalization())
    else:
      model.add(Conv2D(nodes, (3, 3), activation='relu'))
      model.add(MaxPooling2D(pool_size=(2, 2)))

  # model.add(Flatten())

  # for i, nodes in enumerate(dense_layers):
  #   model.add(Dense(nodes, activation='relu'))
  #   model.add(Dropout(dropouts[i]))

  # model.add(Dense(1, activation='sigmoid'))
  # model.compile(loss="binary_crossentropy",
  #               optimizer='adam', metrics=["accuracy"])
  return model

In [27]:
model = define_model([32, 64, 128, 128], [256], True, [0.2])
model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_17 (Conv2D)          (None, 222, 222, 32)      896       
                                                                 
 max_pooling2d_17 (MaxPoolin  (None, 111, 111, 32)     0         
 g2D)                                                            
                                                                 
 batch_normalization_4 (Batc  (None, 111, 111, 32)     128       
 hNormalization)                                                 
                                                                 
 conv2d_18 (Conv2D)          (None, 109, 109, 64)      18496     
                                                                 
 max_pooling2d_18 (MaxPoolin  (None, 54, 54, 64)       0         
 g2D)                                                            
                                                      

In [None]:
es = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy',
    patience=5,
    verbose=1,
    mode='max'
)

cp = tf.keras.callbacks.ModelCheckpoint(
    'best_aug.h5',
    monitor='val_accuracy',
    mode='max',
    verbose=1,
    save_best_only=True
)

history = model.fit(X_train, y_train, validation_data=(
    X_val, y_val), batch_size=132, epochs=30, callbacks=[es, cp], verbose=2)

In [17]:
def get_features(model, split_set, set_dict):
    image_features = {}
    directory = os.path.join(base_dir, split_set)

    for img_name in tqdm(os.listdir(directory)):
        # Load an image
        img_path = os.path.join(directory, img_name)
        img = load_img(img_path, target_size=(HEIGHT, WIDTH))
        # Convert image into numpy pixel values
        img = img_to_array(img)
        # Reshape the data for the model
        img = img.reshape((1, img.shape[0], img.shape[1], img.shape[2]))
        # Preprocess
        img = preprocess_input(img)
        # Extract features
        feature = model.predict(img, verbose=0)
        # Map img_id with its features
        img_id = set_dict[img_name]
        image_features[img_id] = feature
    return image_features

In [18]:
model = define_model([32, 64, 128, 128], [], True, [])
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_8 (Conv2D)           (None, 222, 222, 32)      896       
                                                                 
 max_pooling2d_8 (MaxPooling  (None, 111, 111, 32)     0         
 2D)                                                             
                                                                 
 batch_normalization_2 (Batc  (None, 111, 111, 32)     128       
 hNormalization)                                                 
                                                                 
 conv2d_9 (Conv2D)           (None, 109, 109, 64)      18496     
                                                                 
 max_pooling2d_9 (MaxPooling  (None, 54, 54, 64)       0         
 2D)                                                             
                                                      

In [19]:
train_img_features = get_features(model, 'train', train_img_mapping)
val_img_features = get_features(model, 'val', val_img_mapping)

 98%|█████████▊| 23431/23954 [39:36<00:53,  9.86it/s] 


KeyError: 'VizWiz_train_00023431.jpg'