In [None]:
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
batch_size = 32
image_height = 224
image_width = 224

In [None]:
#Data generator to scale image values between 0 and 1
datagen = ImageDataGenerator(rescale=1.0/255)

In [None]:
#Generating training dataset using the data generator

train_data_dir = "/content/drive/MyDrive/IndoorSceneClassification/data/train/"

train = datagen.flow_from_directory(
    directory=train_data_dir,
    target_size=(image_height, image_width),
    batch_size=batch_size,
    class_mode='categorical')
print("The number of batches is: ", len(train))
print("Size of each batch is: ", train[0][0].shape, train[0][1].shape)

Found 5359 images belonging to 67 classes.
The number of batches is:  168
Size of each batch is:  (32, 224, 224, 3) (32, 67)


In [None]:
#Generating testing dataset using the data generator

test_data_dir = "/content/drive/MyDrive/IndoorSceneClassification/data/test/"

test = datagen.flow_from_directory(
    directory=test_data_dir,
    target_size=(image_height, image_width),
    batch_size=batch_size,
    class_mode='categorical'
)
print("The number of batches is: ", len(test))
print("Size of each batch is: ", test[0][0].shape, test[0][1].shape)

Found 1340 images belonging to 67 classes.
The number of batches is:  42
Size of each batch is:  (32, 224, 224, 3) (32, 67)


In [None]:
#Generating training data for fine tuning
train_data_dir = "/content/drive/MyDrive/IndoorSceneClassification/data/train/"
train_data_fine_tuning = tf.keras.preprocessing.image_dataset_from_directory(train_data_dir,
                                                                             image_size=(224,224),
                                                                             label_mode='categorical',
                                                                             batch_size=32)

#Generating testing data for fine tuning
test_data_dir = "/content/drive/MyDrive/IndoorSceneClassification/data/test/"
test_data_fine_tuning = tf.keras.preprocessing.image_dataset_from_directory(test_data_dir,
                                                                             image_size=(224,224),
                                                                             label_mode='categorical',
                                                                             batch_size=32)

Found 5359 files belonging to 67 classes.
Found 1340 files belonging to 67 classes.


In [None]:
str(train_data_fine_tuning.class_names)

"['airport_inside', 'artstudio', 'auditorium', 'bakery', 'bar', 'bathroom', 'bedroom', 'bookstore', 'bowling', 'buffet', 'casino', 'children_room', 'church_inside', 'classroom', 'cloister', 'closet', 'clothingstore', 'computerroom', 'concert_hall', 'corridor', 'deli', 'dentaloffice', 'dining_room', 'elevator', 'fastfood_restaurant', 'florist', 'gameroom', 'garage', 'greenhouse', 'grocerystore', 'gym', 'hairsalon', 'hospitalroom', 'inside_bus', 'inside_subway', 'jewelleryshop', 'kindergarden', 'kitchen', 'laboratorywet', 'laundromat', 'library', 'livingroom', 'lobby', 'locker_room', 'mall', 'meeting_room', 'movietheater', 'museum', 'nursery', 'office', 'operating_room', 'pantry', 'poolinside', 'prisoncell', 'restaurant', 'restaurant_kitchen', 'shoeshop', 'stairscase', 'studiomusic', 'subway', 'toystore', 'trainstation', 'tv_studio', 'videostore', 'waitingroom', 'warehouse', 'winecellar']"

On testing, owner of this project found that there are a file which is not in the right format for processing, hence code below this point finds the bad file path. The author removed the file from the folder structure.

In [None]:
#Generating a list of all file names

file_list = []
full_list_of_images = []
for i in range(len(train_data_fine_tuning.class_names)):
  path_to_image_folders = train_data_dir + train_data_fine_tuning.class_names[i]
  file_list = os.listdir(path_to_image_folders)
  file_list = [train_data_fine_tuning.class_names[i] + '/' + item for item in file_list]
  full_list_of_images.extend(file_list)
  file_list = []

#This list contains a list of all training images
print(len(full_list_of_images))

5359


In [12]:
#Identifying bad files
"""
bad_paths = []
for image_path in full_list_of_images:
  full_path = os.path.join(train_data_dir, image_path)
  try:
    img_bytes = tf.io.read_file(full_path)
    decoded_img = tf.io.decode_image(img_bytes)
  except Exception as e:
    print(f"Found bad path {full_path}...{e}")
    bad_paths.append(full_path)
"""

'\nbad_paths = []\nfor image_path in full_list_of_images:\n  full_path = os.path.join(train_data_dir, image_path)\n  try:\n    img_bytes = tf.io.read_file(full_path)\n    decoded_img = tf.io.decode_image(img_bytes)\n  except Exception as e:\n    print(f"Found bad path {full_path}...{e}")\n    bad_paths.append(full_path)\n'

In [13]:
#Removing bad file
"""
file_path_to_be_deleted = '/content/drive/MyDrive/IndoorSceneClassification/data/train/laundromat/Laundry_Room_bmp.jpg'
if os.path.exists(file_path_to_be_deleted):
  os.remove(file_path_to_be_deleted)
  print(f"File '{file_path_to_be_deleted}' has been deleted.")
else:
    print(f"File '{file_path_to_be_deleted}' does not exist.")
"""

'\nfile_path_to_be_deleted = \'/content/drive/MyDrive/IndoorSceneClassification/data/train/laundromat/Laundry_Room_bmp.jpg\'\nif os.path.exists(file_path_to_be_deleted):\n  os.remove(file_path_to_be_deleted)\n  print(f"File \'{file_path_to_be_deleted}\' has been deleted.")\nelse:\n    print(f"File \'{file_path_to_be_deleted}\' does not exist.")\n'