## Import libraries

In [7]:
import numpy as np
import os
import seaborn as sn
from sklearn.utils import shuffle           
import matplotlib.pyplot as plt             
import cv2                                 
from tqdm import tqdm
import pandas as pd

ModuleNotFoundError: No module named 'numpy'

## Data loading and data preparation

In [None]:
class_names = ["buildings", "forest", "glacier", "mountain", "sea", "street" ]
train_counts = []
test_counts = []

for class_name in class_names:
    path = "data/train/" + class_name
    num_images = 0
    for _, _, files in os.walk(path):
        num_images += len(files)
    train_counts.append(num_images)
    
    path = "data/test/" + class_name
    num_images = 0
    for _, _, files in os.walk(path):
        num_images += len(files)
    test_counts.append(num_images)
train_counts = np.array(train_counts)
test_counts = np.array(test_counts)

num_train = np.sum(train_counts)
num_test = np.sum(test_counts)
test_percentage = round(num_test / (num_train+num_test) * 100)
print("Train images: " + str(num_train))
print("Test images: " + str(num_test))
print("Test percentage: " + str(test_percentage) + "%")

class_names = ["buildings", "forest", "glacier", "mountain", "sea", "street" ]
class_names_label = {class_name:i for i, class_name in enumerate(class_names)}
nb_classes = len(class_names)

def load_images(folder_path, max_images=1000000, target_size=(150,150)):
    print("Loading " + folder_path)
    images = []
    labels = []
    images_per_folder = int(max_images / 6) # rounds down
    
    for folder in os.listdir(folder_path):
        num_images_loaded_this_folder = 0
        label = class_names_label[folder]
        # Iterate through each image in this folder
        for file in tqdm(os.listdir(os.path.join(folder_path, folder))):
            # Get the path name of the image
            img_path = os.path.join(os.path.join(folder_path, folder), file)
            # Open and resize the img
            image = cv2.imread(img_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image = cv2.resize(image, target_size) 
            # Append the image and its corresponding label to the output
            images.append(image)
            labels.append(label)
            num_images_loaded_this_folder += 1
            if num_images_loaded_this_folder >= images_per_folder:
                break
                
    print("Loaded " + folder_path)
    return np.array(images, dtype = 'float16'), np.array(labels, dtype = 'int8')

target_size = (75,75)
train_images, train_labels = load_images("data/train", max_images=7000, target_size=target_size)
test_images, test_labels = load_images("data/test", max_images=1750, target_size=target_size)

train_images, train_labels = shuffle(train_images, train_labels, random_state=3)

# Normalization
train_images = train_images / 255.0 
test_images = test_images / 255.0

n_train = train_labels.shape[0]
n_test = test_labels.shape[0]
test_percentage = (n_test/(n_train+n_test)) * 100

print("Image size:", target_size)
print ("Train images: {}".format(n_train))
print ("Test images: {}".format(n_test))
print("Test percentage: {}%".format(round(test_percentage)))

print("train_images.shape", train_images.shape)
print("train_labels[:10]", train_labels[:10])
print("test_images.shape", test_images.shape)
print("test_labels[:10]", test_labels[:10])

## Multi-class Classification using Convulotinal Neural Networks (CNNs)

We are using a pre-trained ResNet50 model is used for multi-class image classification. The ResNet50 model is loaded without its top layer and its layers are frozen to prevent them from being trained. A new sequential model is created on top of the base model, consisting of a flatten layer, a dense layer with ReLU activation, and a dense layer with softmax activation for the output. The model is compiled with the Adam optimizer and categorical cross-entropy loss. The training and test images are preprocessed by scaling their values between 0 and 1. The model is then trained using the training images and labels, with a batch size of 32 and for 10 epochs. The validation data is provided using the test images and labels. After training, the model is saved for future use.

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from sklearn.utils import shuffle

# Load the ResNet50 model without the top layer
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(75, 75, 3))

# Freeze the base model's layers
base_model.trainable = False

# Create a new model on top of the base model
model = Sequential()
model.add(base_model)
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(6, activation='softmax'))  # Assuming 6 scene classes

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Data preprocessing
train_images = train_images / 255.0
test_images = test_images / 255.0

# Train the model
model.fit(
    train_images,
    tf.keras.utils.to_categorical(train_labels, num_classes=nb_classes),
    batch_size=32,
    epochs=10,
    validation_data=(test_images, tf.keras.utils.to_categorical(test_labels, num_classes=nb_classes))
)

# Save the model
model.save('scene_classification_model.h5')