# Problem statement:
- Classifying if the image belongs to a period or modern property in UK

# Modelling using VGG16
- Total number of images: 491
- Images of period buildings: 241
- Images of modern buildings: 250

Model parameters:
- Dense layer: 128 neurons
- Dropout: 0.5
- Early Stopping: patience=5

Model results:
- Accuracy: 82%

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

In [None]:
! pip install tensorflow

In [None]:
!pip install pandas
!pip install numpy
!pip install matplotlib

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
import os

In [None]:
import pickle

In [None]:

from math import ceil
from PIL import Image

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


In [None]:
from skimage import io
from skimage import color
from skimage.transform import rescale, resize, downscale_local_mean


In [None]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
# function to load folder into arrays and  then it returns that same array

def load_files(path):
    # Put files into lists and return them as one list of size 4
    image_files = os.listdir(path)
    image_files = [path + x for x in image_files]
    return image_files

In [None]:
# feeding images into numpy ndarray

def load_array(image_files, min_size):
    X = np.array([])
    for file in image_files:
        try:
            img = io.imread(file)
            img_resized = resize(img, (min_size,min_size), anti_aliasing=True)
            if X.shape[0] == 0:
                X = np.array([img_resized])
            else:
                X = np.append(X, [img_resized], axis = 0)
        except:
            print("image error: ", file)
    return X



In [None]:
# For reproducibility
np.random.seed(42)

In [None]:
# Colab path
path = '/content/drive/MyDrive/Capstone'
image_path = path + '/images/old_samples/'
image_path2 = path + "/images/old_interior/"

# Photos of old or period buildings
image_files = load_files(image_path)

# second batch of old buildings - 150 images of interior
image_files2 = load_files(image_path2)

# concatenating 2 lists
image_files = image_files + image_files2

# set min_size = 400
min_size = 400

print(f"number of image_files = {len(image_files)}")
print(f"min_size = {min_size}")

In [None]:
# record the path of image files
df_file_info = pd.DataFrame(image_files)
df_file_info.columns = ["image_link"]
print(f"df_file_info = {df_file_info.head(5)}")

In [None]:
X_old = load_array(image_files, min_size)

# y value is zero for old buildings
y_old = np.zeros((len(image_files),1))

print(f"X_old shape = {X_old.shape}")
print(f"y_old shape = {y_old.shape}")

In [None]:
df_y_old = pd.DataFrame(y_old, columns=["label"])
df_y_old["image_link"] = df_file_info["image_link"]
print(df_y_old.shape)

In [None]:
# Photos of modern buildings

# Colab path
path = '/content/drive/MyDrive/Capstone'
image_path = path + '/images/modern_samples/'
image_path2 = path + "/images/modern_exterior/"

image_files = load_files(image_path)

# second batch of old buildings - 150 images of interior
image_files2 = load_files(image_path2)

# concatenating 2 lists
image_files = image_files + image_files2

# set min_size = 400
min_size = 400

print(f"number of image_files = {len(image_files)}")
print(f"min_size = {min_size}")

In [None]:
# record the path of image files
df_file_info = pd.DataFrame(image_files)
df_file_info.columns = ["image_link"]
print(f"df_file_info = {df_file_info.head(5)}")

In [None]:
X_modern = load_array(image_files, min_size)

# y value is one for modern buildings
y_modern = np.ones((len(image_files),1))

print(f"X_modern shape = {X_modern.shape}")
print(f"y_modern shape = {y_modern.shape}")

In [None]:
df_y_modern = pd.DataFrame(y_modern, columns=["label"])
df_y_modern["image_link"] = df_file_info["image_link"]
print(df_y_modern.shape)
X = np.append(X_old, X_modern, axis = 0)
X.shape

In [None]:
df_y = pd.concat([df_y_old, df_y_modern])
df_y.reset_index(drop=True, inplace=True)
df_y["id"] = df_y.index
print(df_y.shape)
df_y.tail(10)

In [None]:
# train test split
X_train, X_test, y_train, y_test = train_test_split(X, df_y, stratify=df_y["label"])
print(y_train.shape)

In [None]:
# Baseline accuracy - 50%
y_train['label'].value_counts()

In [None]:
y_test.shape

In [None]:
y_test['id'][:10]

In [None]:
plt.imshow(X_train[0])

In [None]:
# Standard Scaler - skipped
X_train_ss = X_train
X_test_ss = X_test

In [None]:
BATCH_SIZE = 64
IMG_SIZE = (400, 400)

# Create the base model from the pre-trained model VGG16
IMG_SHAPE = IMG_SIZE + (3,)
input_model = VGG16(input_shape=IMG_SHAPE, include_top=False, weights='imagenet')

In [None]:
input_model.trainable=False

In [None]:
# add new classifier layers
# add dropout = 0.5
flat1 = Flatten()(input_model.layers[-1].output)
class1 = Dense(128, activation='relu')(flat1)
class2 = Dropout(0.5)(class1)
output = Dense(1, activation='sigmoid')(class2)
# define new model
model = Model(inputs=input_model.inputs, outputs=output)

In [None]:
print(model.summary())

In [None]:
# early stopper
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)

In [None]:
model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

In [None]:
# Fit model on training data
history = model.fit(X_train_ss,
                        y_train['label'],
                        batch_size=32,
                        validation_data=(X_test_ss, y_test['label']),
                        epochs=30,
                        verbose=1,
                        callbacks=[es])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30

In [None]:
model_tag = "vgg16_a"

model_path = path + '/models/' + model_tag + '/'
model.save(path)