<a href="https://colab.research.google.com/github/alexgchicote/Real-Estate-Image-Classification/blob/main/SVM_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# !pip install tensorflow-gpu

In [None]:
!pip install hypopt

Collecting hypopt
  Downloading hypopt-1.0.9-py2.py3-none-any.whl (13 kB)
Installing collected packages: hypopt
Successfully installed hypopt-1.0.9


In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Tue Apr 19 17:48:53 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P8     9W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
# ensure adequate version of tensorflow
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import numpy as np
import os

import matplotlib.pyplot as plt
%matplotlib inline

# to make this notebook's output stable across runs
np.random.seed(42)



print(tf.__version__)

2.8.0


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


<h1>Declare Project Helper Functions</h1>

In [None]:
# create filepath for models
def create_model_filepath(name):
  return cd_filepath + 'models/' + name + '.h5'

<h1>Data Preprocessing</h1>

In [None]:
from keras.preprocessing.image import ImageDataGenerator

cd_filepath = '/content/drive/MyDrive/code/'
train_data_path = cd_filepath + 'all_images/train'
valid_data_path = cd_filepath + 'all_images/valid'
test_data_path = cd_filepath + 'all_images/test' 

image_classes = ['bathroom', 'bedroom', 'kitchen', 'livingroom']
image_shape = (256, 256)

meta_data = {'train': (train_data_path, 3783), 'valid': (valid_data_path, 1260), 'test': (test_data_path, 1262)}

data_generator = ImageDataGenerator()


def get_data(data_set):
  if data_set == 'test':
    shuffle = False
  else:
    shuffle = True

  itr = data_generator.flow_from_directory(
    meta_data[data_set][0],
    target_size=image_shape,
    batch_size=meta_data[data_set][1],
    class_mode='sparse',
    shuffle=shuffle)

  return itr.next()

<h3>Generate training, validation and testing batches</h3>

In [None]:
X_train, y_train = get_data('train')

Found 3783 images belonging to 4 classes.


  " Skipping tag %s" % (size, len(data), tag)


In [None]:
X_valid, y_valid = get_data('valid')

Found 1260 images belonging to 4 classes.


In [None]:
X_test, y_test = get_data('test')

Found 1262 images belonging to 4 classes.


  " Skipping tag %s" % (size, len(data), tag)


<h3>Save data as 2d arrays</h3>

In [None]:
from numpy import save

arrays_path = '/content/drive/MyDrive/code/datasets_arrays/'

save(arrays_path + 'X_train_2d', X_train)
save(arrays_path + 'X_valid_2d', X_valid)
save(arrays_path + 'X_test_2d', X_test)

<h3>Save datasets 1d arrays CSV</h3>

In [None]:
# flatten data sample wise
X_train = X_train.reshape(X_train.shape[0], -1)
X_valid = X_valid.reshape(X_valid.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

In [None]:
from numpy import asarray
from numpy import savetxt

arrays_path = '/content/drive/MyDrive/code/datasets_arrays/'

# save datasets to csv file
savetxt(arrays_path + 'X_train.csv', X_train, delimiter=',')
savetxt(arrays_path + 'y_train.csv', y_train, delimiter=',')
savetxt(arrays_path + 'X_valid.csv', X_valid, delimiter=',')
savetxt(arrays_path + 'y_valid.csv', y_valid, delimiter=',')
savetxt(arrays_path + 'X_test.csv', X_test, delimiter=',')
savetxt(arrays_path + 'y_test.csv', y_test, delimiter=',')

<h3>Load 1d Datasets as Numpy Arrays</h3>

In [None]:
from numpy import loadtxt

arrays_path = '/content/drive/MyDrive/code/datasets_arrays/'

# load datasets as numpy arrays
X_train = loadtxt(arrays_path + 'X_train.csv', delimiter=',')
y_train = loadtxt(arrays_path + 'y_train.csv', delimiter=',')

X_valid = loadtxt(arrays_path + 'X_valid.csv', delimiter=',')
y_valid = loadtxt(arrays_path + 'y_valid.csv', delimiter=',')

X_test = loadtxt(arrays_path + 'X_test.csv', delimiter=',')
y_test = loadtxt(arrays_path + 'y_test.csv', delimiter=',')

In [None]:
X_train.shape

(3783, 30000)

In [None]:
X_train[0]

array([ 63.,  58.,  54., ..., 138., 186., 198.])

<h3>Load labels as Numpy Arrays</h3>

<h2>Scaling the Data</h2>

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.astype(np.float32))
X_valid_scaled = scaler.transform(X_valid.astype(np.float32))
X_test_scaled = scaler.transform(X_test.astype(np.float32))

In [None]:
X_train_scaled[0]

array([-1.6714138 , -1.5356077 , -1.3229553 , ..., -0.17707634,
        0.5775146 ,  0.8044618 ], dtype=float32)

<h2>Dimesionality Reduction</h2>


*   Use PCA to perform dimensionality reduction.
*   Reduce the dimensions from n dimensions to d dimensions where d preserves 95% of the variance.



In [None]:
from sklearn.decomposition import PCA

# transform data to reduced dimensions
pca = PCA(n_components=0.95)
X_train_red = pca.fit_transform(X_train_scaled)

In [None]:
X_train_red.shape

(3783, 1157)

In [None]:
X_valid_red = pca.transform(X_valid_scaled)
X_test_red = pca.transform(X_test_scaled)

In [None]:
from hypopt import GridSearch
from sklearn.svm import SVC

# initialise the Suppopr Vector Classifier
svm_clf = SVC()

# Define the range of values for the hyper-parameters
parameter_grid = {'C': [2, 3, 4, 5, 6],
              'gamma': ['scale', 'auto'],
              'kernel': ['poly', 'rbf', 'sigmoid', 'linear']}

grid = GridSearch(svm_clf, parameter_grid)

# train the model on every hyper-parameter combination
grid.fit(X_train_red, y_train, X_valid_red, y_valid)

100%|██████████| 40/40 [05:53<00:00,  8.85s/it]


SVC(C=5, random_state=0)

In [None]:
grid.model.get_params()

{'C': 5,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': False,
 'random_state': 0,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [None]:
print('Test Score for Optimized Parameters:', grid.score(X_test_red, y_test))

Test Score for Optimized Parameters: 0.48019017432646594


In [None]:
from numpy import save

arrays_path = '/content/drive/MyDrive/code/datasets_arrays/'

predictions = grid.predict(X_test_red)
save(arrays_path + 'svm_predictions', predictions)