# UK Property Image Classification
### EDA model 2

---

#### Data:
Loading more images (150 each class, from 100) to build a second model
Revised data set as of 13 Feb 2021 - 12 changes

#### Model:
Convoluted Neural Nets

#### Initial model results:
The model validation accuracy is 80%, compared to a base case of 50%. 200 images are used in the initial model training

#### Training model with more images:
Validation accuracy score is 78%, similar to that of a smaller set of images. Satisfactory results given that there is more confusion with more interior images

### Importing libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import os
from math import ceil
from PIL import Image

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

from skimage import io
from skimage import color
from skimage.transform import rescale, resize, downscale_local_mean

import pickle

In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D

In [3]:
# For reproducibility
np.random.seed(42)

In [4]:
# function to load folder into arrays and  then it returns that same array

def load_files(path):
    # Put files into lists and return them as one list of size 4
    image_files = os.listdir(path)
    image_files = [path + x for x in image_files]
    return image_files

In [5]:
# feeding images into numpy ndarray

def load_array(image_files, min_size):
    X = np.array([])
    for file in image_files:
        try:
            img = io.imread(file)
            img_resized = resize(img, (min_size,min_size), anti_aliasing=True)
            if X.shape[0] == 0:
                X = np.array([img_resized])
            else:
                X = np.append(X, [img_resized], axis = 0)
        
        except:
            print("image error: ", file)
    return X



#### Load the training data - old buildings

In [6]:
# Photos of old or period buildings

image_path = "../images/old_samples/"
image_files = load_files(image_path)

# second batch of old buildings - 150 images of interior
image_path2 = "../images/old_interior/"
image_files2 = load_files(image_path2)

# concatenating 2 lists
image_files = image_files + image_files2

# set min_size = 400
min_size = 400

print(f"number of image_files = {len(image_files)}")
print(f"min_size = {min_size}")

number of image_files = 251
min_size = 400


In [7]:
# look through the resolution of image files
df_file_info = pd.DataFrame(image_files)
df_file_info.columns = ["image_link"]
print(f"df_file_info = {df_file_info.head(5)}")

df_file_info =                                         image_link
0  ../images/old_samples/photo-14610337-AfvlL7.jpg
1  ../images/old_samples/photo-14613473-oJlAGk.jpg
2  ../images/old_samples/photo-14613600-uzDqQq.jpg
3  ../images/old_samples/photo-14614603-V4bv1O.jpg
4  ../images/old_samples/photo-14615502-8O2X6l.jpg


In [None]:
X_old = load_array(image_files, min_size)

# y value is zero for old buildings
y_old = np.zeros((len(image_files),1))

print(f"X_old shape = {X_old.shape}")
print(f"y_old shape = {y_old.shape}")



In [None]:
df_y_old = pd.DataFrame(y_old, columns=["label"])
df_y_old["image_link"] = df_file_info["image_link"]
print(df_y_old.shape)

In [None]:
# Photos of modern buildings

image_path = "../images/modern_samples/"
image_files = load_files(image_path)

# second batch of old buildings - 150 images of interior
image_path2 = "../images/modern_exterior/"
image_files2 = load_files(image_path2)

# concatenating 2 lists
image_files = image_files + image_files2

# set min_size = 400
min_size = 400

print(f"number of image_files = {len(image_files)}")
print(f"min_size = {min_size}")

In [None]:
# look through the resolution of image files
df_file_info = pd.DataFrame(image_files)
df_file_info.columns = ["image_link"]
print(f"df_file_info = {df_file_info.head(5)}")

In [None]:
X_modern = load_array(image_files, min_size)

# y value is one for modern buildings
y_modern = np.ones((len(image_files),1))

print(f"X_modern shape = {X_modern.shape}")
print(f"y_modern shape = {y_modern.shape}")

In [None]:
df_y_modern = pd.DataFrame(y_modern, columns=["label"])
df_y_modern["image_link"] = df_file_info["image_link"]
print(df_y_modern.shape)

In [None]:
X = np.append(X_old, X_modern, axis = 0)
X.shape

In [None]:
df_y = pd.concat([df_y_old, df_y_modern])
df_y.reset_index(drop=True, inplace=True)
df_y["id"] = df_y.index
print(df_y.shape)
df_y.tail(10)

#### Train test split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, df_y, stratify=df_y["label"])
print(y_train.shape)

In [None]:
# Baseline accuracy - 50%
y_train['label'].value_counts()

In [None]:
y_test.shape

In [None]:
y_test['id'][:10]

In [None]:
plt.imshow(X_train[0])

#### Standard Scaler

In [None]:
X_train_flat = X_train.flatten().reshape(-1,1)
X_train_flat.shape

In [None]:
X_test_flat = X_test.flatten().reshape(-1,1)
X_test_flat.shape

In [None]:
ss = StandardScaler()
X_train_ss = ss.fit_transform(X_train_flat).reshape(X_train.shape)
X_test_ss = ss.transform(X_test_flat).reshape(X_test.shape)

### Second run of model

In [None]:
# Instantiate a CNN.
cnn_model = Sequential()

In [None]:
# Add a convolutional layer.

cnn_model.add(Conv2D(filters = 6,            # number of filters
                     kernel_size = 3,        # height/width of filter
                     activation='relu',      # activation function 
                     input_shape=(min_size,min_size,3))) # shape of input (image)

In [None]:
cnn_model.add(MaxPooling2D(pool_size=(2,2))) # dimensions of region of pooling
cnn_model.add(Conv2D(16,
                     kernel_size=3,
                     activation='relu'))
cnn_model.add(MaxPooling2D(pool_size=(2,2)))
cnn_model.add(Flatten())
# Add a densely-connected layer with 128 neurons.
cnn_model.add(Dense(128, activation='relu'))
# Add a final layer with 2 neurons.
cnn_model.add(Dense(1, activation='sigmoid'))

# Compile model
cnn_model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

In [None]:
cnn_model.summary()

In [None]:
# Fit model on training data
history = cnn_model.fit(X_train_ss,
                        y_train['label'],
                        batch_size=64,
                        validation_data=(X_test_ss, y_test['label']),
                        epochs=30,
                        verbose=1)

In [None]:
train_loss = history.history['loss']
test_loss = history.history['val_loss']
plt.figure(figsize=(12, 8))
plt.plot(train_loss, label='Training loss', color='navy')
plt.plot(test_loss, label='Testing loss', color='skyblue')
plt.legend();

#### List out the images of misclassification - test images

In [None]:
y_test.head(10)

In [None]:
y_pred = cnn_model.predict(X_test_ss)


In [None]:
y_pred.shape

In [None]:
y_test['pred'] = y_pred.round()
y_test['prob'] = y_pred.round(3)
y_test.head(10)

In [None]:
# count the number of photos misclassified in y_test
y_test_misclassified = y_test[["image_link", "label", "pred", "prob"]][y_test["label"] != y_test["pred"]]

print(f"total number in y_test: {y_test.shape[0]}")
print(f"misclassified in y_test: {y_test_misclassified.shape[0]}")
y_test_misclassified.head()



In [None]:
# show the first 20 photos misclassified in y_test
display_no = min(20, y_test_misclassified.shape[0])

fig, ax = plt.subplots(ceil(display_no/2), 2, figsize=(8,32))
j=0
for i in y_test_misclassified.index[:display_no]:
    row = j//2
    col = j%2
    image_link = y_test_misclassified.loc[i, "image_link"]
    ax[row][col].imshow(io.imread(image_link))
    ax[row][col].set_title("label:" + str(y_test_misclassified.loc[i, "label"]) + ", pred:" + str(y_test_misclassified.loc[i, "pred"]) + "prob:"  + str(y_test_misclassified.loc[i, "prob"]))
    ax[row][col].axis('off')
    j += 1

print(j)

### Saving the model

In [None]:
model_tag = "model2a"

In [None]:
# save the scaler
pickle.dump(ss, open('../models/' + model_tag + '/scaler.pkl', 'wb'))

In [None]:
# Save the model
model = cnn_model

path = '../models/' + model_tag + '/'
model.save(path)
