# nested image classification

## Prediction workbook

This workbook uses a trained model to run predictions on the images held in `data/test_data/`.  


First of all, load the required libraries, and then load the chosen keras model.

In [1]:
%matplotlib inline

import numpy as np
import pandas as pd

# for elementary image manipulation
from scipy import misc

import nested_utilities as nutil
import nested_predict as npred

from keras.preprocessing import image
from keras.models import load_model
from keras.applications.inception_v3 import preprocess_input

import os
import sys
import cv2
from datetime import datetime as dt

import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
import seaborn as sns
sns.set()

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# define path the the keras model to use, and load it

model_to_use = './models/Incv3_nested_run_4_ft_20-0.63.hdf5'
model = load_model(model_to_use)

## Catalogue test images & select interiors

The models were only trained on interior images, but `test_data/` contains all image types. So we need to catalogue the `test_data/` directory, and then use the df to only consider interior images, to match those the model was trained on.

In [3]:
# build a catalogue of the test images 
test_catalogue = nutil.build_catalogue('./data/test_data/')


# define list of interior rooms (/scenes) and use to create catalogue 
# of interior scenes (only) in test set.
# NOTE it is **IMPORTANT** that this list is in strict alphabetical order,
# so that it matches keras model output labels (0-10)
# reset_index to set to continous count (needed for merge with predictions below),
# and drop surplus 'index' column
interior_rooms = ['bathroom',
                  'bedroom',
                  'conservatory', 
                  'diningroom',
                  'empty',
                  'entrance',
                  'graphic', 
                  'kitchen',
                  'livingroom', 
                  'misc_int', 
                  'study']

interior_test_cat = test_catalogue.loc[test_catalogue['room'].isin(interior_rooms)].copy()
interior_test_cat.reset_index(inplace=True)
interior_test_cat.drop('index', axis=1, inplace=True)

# check the output looks right
print(interior_test_cat.shape)
print(interior_test_cat.info())
interior_test_cat.sample(6)


(3852, 4)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3852 entries, 0 to 3851
Data columns (total 4 columns):
id            3852 non-null object
room          3852 non-null object
filename      3852 non-null object
image_path    3852 non-null object
dtypes: object(4)
memory usage: 120.5+ KB
None


Unnamed: 0,id,room,filename,image_path
2846,21e1699682f98ebc18c484259794c196a0c71359,kitchen,21e1699682f98ebc18c484259794c196a0c71359.jpg,./data/test_data/kitchen/21e1699682f98ebc18c48...
1759,009794d8a2fb20d5b8a5f101505dd28869e06f49,empty,009794d8a2fb20d5b8a5f101505dd28869e06f49.jpg,./data/test_data/empty/009794d8a2fb20d5b8a5f10...
92,059f50187336f133fe8531bb1829480a98196ac6,bathroom,059f50187336f133fe8531bb1829480a98196ac6.jpg,./data/test_data/bathroom/059f50187336f133fe85...
2723,08806f7bfc85b8143a2d98ae484c5a5f3259376e,kitchen,08806f7bfc85b8143a2d98ae484c5a5f3259376e.jpg,./data/test_data/kitchen/08806f7bfc85b8143a2d9...
2613,07a85ce1e83da28bcac7e8fea7d90c85e97ff6ef,kitchen,07a85ce1e83da28bcac7e8fea7d90c85e97ff6ef.jpg,./data/test_data/kitchen/07a85ce1e83da28bcac7e...
340,07f7c6b22955fe3b33c2b0d11ebba58c824ede9f,bathroom,07f7c6b22955fe3b33c2b0d11ebba58c824ede9f.jpg,./data/test_data/bathroom/07f7c6b22955fe3b33c2...


In [4]:
# this is a good point to save the interior_test_cat if you want to do so.
# uncomment the line below

interior_test_cat.to_csv('./data_catalogues/interior_test_cat_10042018')

## Making predictions

Iterate through interior_test_catalogue and call predict on each image in turn. Store results as new row in 'pred_array'.

In [5]:
pred_array = npred.predict_catalogue(interior_test_cat, model)

Starting prediction for 3852 images
processing row 100
processing row 200
processing row 300
processing row 400
processing row 500
processing row 600
processing row 700
processing row 800
processing row 900
processing row 1000
processing row 1100
processing row 1200
processing row 1300
processing row 1400
processing row 1500
processing row 1600
processing row 1700
processing row 1800
processing row 1900
processing row 2000
processing row 2100
processing row 2200
processing row 2300
processing row 2400
processing row 2500
processing row 2600
processing row 2700
processing row 2800
processing row 2900
processing row 3000
processing row 3100
processing row 3200
processing row 3300
processing row 3400
processing row 3500
processing row 3600
processing row 3700
processing row 3800
Predictions complete. 3852 image predictions made


Now put these predictions into a dataframe, with correctly labelled columns (room labels)

In [6]:
# load the predictions into a dataframe, with correctly named columns
# NOTE Keras model labels (0-10) correspond to an alphabetically ordered list of
# text labels (folder names, in original modelling input)

pred_df = npred.pred_to_df(pred_array, columns=interior_rooms)
print(pred_df.info())
pred_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3852 entries, 0 to 3851
Data columns (total 11 columns):
bathroom        3852 non-null float64
bedroom         3852 non-null float64
conservatory    3852 non-null float64
diningroom      3852 non-null float64
empty           3852 non-null float64
entrance        3852 non-null float64
graphic         3852 non-null float64
kitchen         3852 non-null float64
livingroom      3852 non-null float64
misc_int        3852 non-null float64
study           3852 non-null float64
dtypes: float64(11)
memory usage: 331.1 KB
None


Unnamed: 0,bathroom,bedroom,conservatory,diningroom,empty,entrance,graphic,kitchen,livingroom,misc_int,study
0,0.999998,1.372046e-08,7.956749999999999e-19,1.731251e-10,7.169086e-08,5.29026e-11,3.3683620000000003e-22,1e-06,6.347975e-10,1.044628e-09,1.635699e-14
1,0.999782,5.479097e-07,5.68158e-11,9.364485e-07,4.901499e-08,5.003177e-09,1.510149e-13,0.000217,4.72809e-07,5.320324e-08,1.23267e-10
2,0.999678,0.000117513,1.344599e-10,1.279169e-05,8.527583e-06,2.923682e-06,1.169373e-11,0.000103,3.634837e-05,3.945936e-05,1.434315e-06
3,0.999844,5.523511e-05,4.412695e-12,4.067396e-06,3.403577e-06,1.579372e-07,2.984056e-14,8.2e-05,7.788133e-06,2.977383e-06,6.205251e-08
4,0.999998,1.978441e-10,1.99073e-16,1.329193e-09,3.304619e-10,4.323779e-13,3.33029e-21,2e-06,2.351824e-10,6.919116e-12,2.775339e-16


### Extracting top 3 labels and associated probabilities

Run some manipulation on this data, and use the `npred.get_labels_and_probs` function to get the top (highest probability) n (default=3) labels for each image, and their associated probabilities. Store the data in a dataframe.

In [7]:
pred_labels = npred.get_labels_and_probs(pred_df, num_labels=3)

print(pred_labels.info())
pred_labels.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3852 entries, 0 to 3851
Data columns (total 6 columns):
label1      3852 non-null object
label2      3852 non-null object
label3      3852 non-null object
label1_P    3852 non-null float64
label2_P    3852 non-null float64
label3_P    3852 non-null float64
dtypes: float64(3), object(3)
memory usage: 180.6+ KB
None


Unnamed: 0,label1,label2,label3,label1_P,label2_P,label3_P
0,bathroom,kitchen,empty,0.999998,1e-06,7.169086e-08
1,bathroom,kitchen,diningroom,0.999782,0.000217,9.364485e-07
2,bathroom,bedroom,kitchen,0.999678,0.000118,0.0001028072
3,bathroom,kitchen,bedroom,0.999844,8.2e-05,5.523511e-05
4,bathroom,kitchen,diningroom,0.999998,2e-06,1.329193e-09


Now merge thes `pred_labels` df with the `interior_test_pred` dataframe, so we have all the catalogue data for the test images (incl. file path) and the top n (3) labels and probabilities.

In [8]:
interior_test_pred = pd.merge(interior_test_cat, pred_labels, how='left', 
                              left_index=True, right_index=True)

print(interior_test_pred.info())
interior_test_pred.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3852 entries, 0 to 3851
Data columns (total 10 columns):
id            3852 non-null object
room          3852 non-null object
filename      3852 non-null object
image_path    3852 non-null object
label1        3852 non-null object
label2        3852 non-null object
label3        3852 non-null object
label1_P      3852 non-null float64
label2_P      3852 non-null float64
label3_P      3852 non-null float64
dtypes: float64(3), object(7)
memory usage: 301.0+ KB
None


Unnamed: 0,id,room,filename,image_path,label1,label2,label3,label1_P,label2_P,label3_P
0,003457d1fb62155b4531aba3e5b39f4d57bed9f9,bathroom,003457d1fb62155b4531aba3e5b39f4d57bed9f9.jpg,./data/test_data/bathroom/003457d1fb62155b4531...,bathroom,kitchen,empty,0.999998,1e-06,7.169086e-08
1,0038cad4553a9922d24a70a4d77abb9d2c1261f9,bathroom,0038cad4553a9922d24a70a4d77abb9d2c1261f9.jpg,./data/test_data/bathroom/0038cad4553a9922d24a...,bathroom,kitchen,diningroom,0.999782,0.000217,9.364485e-07
2,003f5052b875f64f4f5ce99948ae87f35d44b009,bathroom,003f5052b875f64f4f5ce99948ae87f35d44b009.jpg,./data/test_data/bathroom/003f5052b875f64f4f5c...,bathroom,bedroom,kitchen,0.999678,0.000118,0.0001028072
3,009e866e3eb114c3bc8579d780a4c333ddd38da9,bathroom,009e866e3eb114c3bc8579d780a4c333ddd38da9.jpg,./data/test_data/bathroom/009e866e3eb114c3bc85...,bathroom,kitchen,bedroom,0.999844,8.2e-05,5.523511e-05
4,00da56e13e015b383effff3fe8560ae7fa7cb1f9,bathroom,00da56e13e015b383effff3fe8560ae7fa7cb1f9.jpg,./data/test_data/bathroom/00da56e13e015b383eff...,bathroom,kitchen,diningroom,0.999998,2e-06,1.329193e-09


Add a few columns (booleans) to capture whether labels 1,2 or 3 are correct (== room label)

In [9]:
interior_test_pred['label1_correct'] = (interior_test_pred['room'] == interior_test_pred['label1'])
interior_test_pred['label2_correct'] = (interior_test_pred['room'] == interior_test_pred['label2'])
interior_test_pred['label3_correct'] = (interior_test_pred['room'] == interior_test_pred['label3'])

columns = ['room',
           'label1',
           'label1_P',
           'label2',
           'label2_P',
           'label3',
           'label3_P',
           'label1_correct',
           'label2_correct',
           'label3_correct',
           'image_path',
           'filename',
           'id']
                     
interior_test_pred = interior_test_pred[columns]
    
    
# take a look 
interior_test_pred.head()

Unnamed: 0,room,label1,label1_P,label2,label2_P,label3,label3_P,label1_correct,label2_correct,label3_correct,image_path,filename,id
0,bathroom,bathroom,0.999998,kitchen,1e-06,empty,7.169086e-08,True,False,False,./data/test_data/bathroom/003457d1fb62155b4531...,003457d1fb62155b4531aba3e5b39f4d57bed9f9.jpg,003457d1fb62155b4531aba3e5b39f4d57bed9f9
1,bathroom,bathroom,0.999782,kitchen,0.000217,diningroom,9.364485e-07,True,False,False,./data/test_data/bathroom/0038cad4553a9922d24a...,0038cad4553a9922d24a70a4d77abb9d2c1261f9.jpg,0038cad4553a9922d24a70a4d77abb9d2c1261f9
2,bathroom,bathroom,0.999678,bedroom,0.000118,kitchen,0.0001028072,True,False,False,./data/test_data/bathroom/003f5052b875f64f4f5c...,003f5052b875f64f4f5ce99948ae87f35d44b009.jpg,003f5052b875f64f4f5ce99948ae87f35d44b009
3,bathroom,bathroom,0.999844,kitchen,8.2e-05,bedroom,5.523511e-05,True,False,False,./data/test_data/bathroom/009e866e3eb114c3bc85...,009e866e3eb114c3bc8579d780a4c333ddd38da9.jpg,009e866e3eb114c3bc8579d780a4c333ddd38da9
4,bathroom,bathroom,0.999998,kitchen,2e-06,diningroom,1.329193e-09,True,False,False,./data/test_data/bathroom/00da56e13e015b383eff...,00da56e13e015b383effff3fe8560ae7fa7cb1f9.jpg,00da56e13e015b383effff3fe8560ae7fa7cb1f9


In [10]:

# save the prediction df
#interior_test_pred.to_csv('./data_catalogues/interior_test_pred_100418.csv')