# Classification & Localization of Objects in Images
---

## Project Overview
introduction and problem statement

## Data Exploration
familiarize yourself with the data, helps you to better understand and justify your results

statistics, visualizations

### Dataset structure

<p>
    The dataset 'tiny-imagenet-200' consists of 3 main folders 'train', 'val', 'test'.
</p>

- folder 'train'
  - contains 200 folders, one for each class
  - each class folder has a subfolder 'images' with 500 samples and a file with box coordinates of all samples
  - there are 200 * 500 = 100'000 samples in total
- folder 'val'
  - contains 1 folder 'images' with 10'000 samples and a file holding the annotations to all samples
- folder 'test'
  - contains 1 folder 'images' with 10'000 samples

<p>
There is a file with all WordNet IDs of this dataset 'wnids.txt'.
<p>
<p>
There is a file linking all <i>known</i> WordNet IDs and their descriptive name(s). It is useful in conjunction with the 'wnids.txt' to name the classes.
</p>

<pre>
+-- tiny-imagenet-200
    +--- train (200 folders)
         +--- n01443537
              +--- images (500 files)
                   |--- n01443537_0.JPEG
                   |--- ...
                   |--- n01443537_499.JPEG
              |--- n01443537_boxes.txt
         +--- ...
    +--- val
        |--- val_annotations.txt
        +--- images (10,000 files)
             |--- val_0.JPEG
             |--- ...
             |--- val_9999.JPEG
    +--- test
         +--- images (10,000 files)
              |--- test_0.JPEG
              |--- ...
              |--- test_9999.JPEG
    |--- wnids.txt
    |--- words.txt
</pre>

In [None]:
import os

DS_PATH = os.path.join ('..', 'datasets', 'tiny-imagenet-200')
DS_TRAIN_PATH = os.path.join (DS_PATH, 'train')
DS_VAL_PATH = os.path.join (DS_PATH, 'val')
DS_TEST_PATH = os.path.join (DS_PATH, 'test')

### Example image

In [None]:
# check out the properties of the images (only one picture)
# display an image
# annotations of train images
import numpy as np
import pandas as pd
from skimage import io
import matplotlib.pyplot as plt
import matplotlib.patches as patches
%matplotlib inline

DS_TRAIN_IMAGE = os.path.join (DS_TRAIN_PATH, 'n01443537', 'images', 'n01443537_0.JPEG')
train_img = io.imread (DS_TRAIN_IMAGE)

DS_TRAIN_IMAGE_ANNOT = os.path.join (DS_TRAIN_PATH, 'n01443537', 'n01443537_boxes.txt')
img_annot_df = pd.read_csv (DS_TRAIN_IMAGE_ANNOT, sep='\t', header=None, names=['image', 'box_x1', 'box_y1', 'box_x2', 'box_y2'])
print ('box of image object:')
print (img_annot_df.head (1), end='\n\n')

print ('shape of image object:', train_img.shape, end='\n\n')

print ('image object:')
fig, axs = plt.subplots (1, 1)
axs.set_ylabel ('height (y)')
axs.set_xlabel ('width (x)')
axs.imshow (train_img)
rect = patches.Rectangle ((img_annot_df.iloc[0][1],img_annot_df.iloc[0][2]),
                          (img_annot_df.iloc[0][3] - img_annot_df.iloc[0][1]),
                          (img_annot_df.iloc[0][4] - img_annot_df.iloc[0][2]),
                          linewidth=2,edgecolor='r',facecolor='none')
axs.add_patch (rect)
plt.show ()

### wnid file (wnids.txt)
holds all WordNet IDs of the dataset 

In [None]:
# check out the wnid annotations
import numpy as np
import pandas as pd

WNID_FILE = os.path.join (DS_PATH, 'wnids.txt')
wnid_df = pd.read_csv (WNID_FILE, header=None, names=['wnid'])

print (wnid_df.info ())
print (wnid_df.describe ())
print (wnid_df.sample (3))

### words file (words.txt)
holds all WordNet IDs and their description(s)

In [None]:
import numpy as np
import pandas as pd

WNID_WORDS_FILE = os.path.join (DS_PATH, 'words.txt')
wnid_words_df = pd.read_csv (WNID_WORDS_FILE, sep='\t', header=None, names=['wnid', 'desc'])

print (wnid_words_df.info ())
print (wnid_words_df.describe ())
print (wnid_words_df.sample (3))

In [None]:
# get the description of a wnid
#wnid = wnid_df['wnid'].sample ().values[0]
wnid = 'n01443537'
wnid_desc = wnid_words_df[wnid_words_df['wnid'] == wnid]['desc'].values[0]

print (wnid, wnid_desc)

## Benchmark
some other algorithm / software / result to compare this algorithm against (by using the defined metric)

In [None]:
# classification: random guess
# localization: - tbd (for example the mean of all contestors of this Kaggle competition)

---

In [None]:
# split project here into
# - multiclass classification
# - regression for localization

## Task: Classification

### Metric
used metric for classification is categorial cross-entropy

### Data Preprocessing

__train data__
<pre>
+-- dataset\tiny-imagenet-200
    |+-- train (200 directories)
         |+-- n01443537
              |+-- images (500 files)
                   |--- n01443537_0.JPEG
                   |--- ...
                   |--- n01443537_499.JPEG
              |--- n01443537_boxes.txt
         |+-- ...
         |+-- n12267677
</pre>

In [None]:
# read in all images from training dataset
from collections import defaultdict

# get a list of all image class directories
train_data = defaultdict (list)
train_path_entries = os.listdir (DS_TRAIN_PATH)
for i in range (len (train_path_entries)):
    entry_i_imgs_p = DS_TRAIN_PATH + os.sep + train_path_entries[i] + os.sep + 'images'
    
    # add data as key-value pair for later use
    k = train_path_entries[i]
    v = [entry_i_imgs_p + os.sep + img for img in os.listdir (entry_i_imgs_p)]
    train_data[k] = v

print ('Now there are {} keys (classes) in the dictionary, each holding {} values (image paths).'\
       .format (len (train_data.keys ()), len (train_data[list (train_data.keys ())[0]])))
print ()
print ('Example:',\
       'key (class) =', list (train_data.keys ())[42],\
       ', values (image paths) =',\
       train_data[list (train_data.keys())[42]][:2], '...',\
       train_data[list (train_data.keys())[42]][-2:])

CNN expects tensors in form (#, w, h, d) > read in images and create tensor per class.

[!] mixed BW and RGB images

In [None]:
import numpy as np
from skimage.io import imread
from skimage.color import gray2rgb

def image_path_to_tensor (img_path_list):
    img_list = []
    for img_path in img_path_list:
        img_arr = imread (img_path)
        # check depth for BW image
        if (len (img_arr.shape) < 3):
            img_arr = gray2rgb (img_arr)
        
        img_arr = np.expand_dims (img_arr, axis=0)
        img_list.append (img_arr)
    
    return np.vstack (img_list)
    
# unit test
# (image_path_to_tensor (train_data[list (train_data.keys())[42]][:5])).shape

In [None]:
train_data_tensor_list = []
train_data_targets_list = []
i = 1
for img_class, img_paths in train_data.items ():
    #pass
    # read images
    print ('\rReading images of wnid class {}, ({:4} / {:4})'.format (img_class, i, len (train_data.keys ())), end='', flush=True)
    img_class_tensor = image_path_to_tensor (train_data[img_class])
    # add images to train data tensor
    train_data_tensor_list.append (img_class_tensor)
    # write image class to target variable
    train_data_targets_list.append ([img_class for x in range (len (train_data[img_class]))])
    # blub
    i += 1

train_data_tensor = (np.vstack (train_data_tensor_list)).astype (dtype='f4', copy=False) / 255 # incl. normalization [0, 255] -> [0, 1]
train_data_targets = np.hstack (train_data_targets_list)

# clean up
del train_data_tensor_list
del train_data_targets_list
del train_data

print ('\n')
print ('Shape of train data tensor:', train_data_tensor.shape)
print ('Shape of train data target vector:', train_data_targets.shape)

__validation data__
<pre>
+-- dataset\tiny-imagenet-200
    |+-- val
        |--- val_annotations.txt
        |+-- images (10,000 files)
             |--- val_0.JPEG
             |--- ...
             |--- val_9999.JPEG
</pre>

In [None]:
from collections import defaultdict
import pandas as pd

# get a list of all image class directories
val_data = defaultdict (list)
entry_i_imgs_p = DS_VAL_PATH + os.sep + 'images'
k = 'val_imgs'
v = [entry_i_imgs_p + os.sep + img for img in os.listdir (entry_i_imgs_p)]
val_data[k] = v

# image annotations (contain wnid class)
DS_VAL_PATH_ANNOTS = DS_VAL_PATH + os.sep + 'val_annotations.txt'
val_annots_df = pd.read_csv (DS_VAL_PATH_ANNOTS, sep='\t', header=None, names=['image', 'wnid', 'box_x1', 'box_y1', 'box_x2', 'box_y2'])


print ('Now there are {} values (image paths) in validation set.'\
       .format (len (val_data['val_imgs'])))
print ()
print ('Example:',\
       'key =', list (val_data.keys ())[0],\
       ', values (image paths) =',\
       val_data[list (val_data.keys())[0]][:2], '...',\
       val_data[list (val_data.keys())[0]][-2:])

In [None]:
# read in all images from validation dataset
val_data_tensor_list = []
val_data_targets_list = []
i = 1
for img_path in val_data['val_imgs']:
    # read images
    print ('\rReading validation images, ({:5} / {:5})'.format (i, len (val_data['val_imgs'])), end='', flush=True)
    img_class_tensor = image_path_to_tensor ([img_path])
    # add images to  tensor
    val_data_tensor_list.append (img_class_tensor)
    # write image class to target variable
    img_name = os.path.basename (img_path)
    img_wnid = val_annots_df[val_annots_df['image'] == img_name]['wnid'].values[0]
    val_data_targets_list.append (img_wnid)
    # blub
    i += 1

val_data_tensor = (np.vstack (val_data_tensor_list)).astype (dtype='f4', copy=False) / 255 # incl. normalization [0, 255] -> [0, 1]
val_data_targets = np.hstack (val_data_targets_list)

# clean up
del val_data_tensor_list
del val_data_targets_list
del val_data

print ('\n')
print ('Shape of val data tensor:', val_data_tensor.shape)
print ('Shape of val data target vector:', val_data_targets.shape)

In [None]:
# one-hot encode targets
import pandas as pd

train_data_targets_onehot = pd.get_dummies (train_data_targets)

val_data_targets_onehot = pd.DataFrame (
    data= np.zeros ((val_data_targets.shape[0], train_data_targets_onehot.shape[1]),  dtype=train_data_targets_onehot.values.dtype),
    columns=train_data_targets_onehot.columns.values)

for i in range (val_data_targets.shape[0]):
    cur_wnid = val_data_targets[i]
    val_data_targets_onehot.iloc[i][cur_wnid] = 1

In [None]:
# make arrays of everything (i.e. get rid of index and column)
train_targets = train_data_targets_onehot.values
val_targets = val_data_targets_onehot.values

# save the columns once to identify the wnid later
targets_names = train_data_targets_onehot.columns.values

In [None]:
print ('train data:')
print ('  ', 'type :', type (train_data_tensor))
print ('  ', 'dtype:', train_data_tensor.dtype)
print ('  ', 'shape:', train_data_tensor.shape)

print ()

print ('validation data:')
print ('  ', 'type :', type (val_data_tensor))
print ('  ', 'dtype:', val_data_tensor.dtype)
print ('  ', 'shape:', val_data_tensor.shape)

print ()

print ('target data:')
print ('  ', 'type :', type (train_targets))
print ('  ', 'dtype:', train_targets.dtype)
print ('  ', 'shape:', train_targets.shape)
print ('  ', 'names:', targets_names[:3], '...', targets_names[-3:])

In [None]:
# shuffle data
RND_STATE = 42
from sklearn.model_selection import train_test_split

train_data_shuffled, _, train_targets_shuffled, _ = train_test_split (train_data_tensor, train_targets,
                                                                test_size=0.0, random_state=RND_STATE, shuffle=True)

val_data_shuffled, _, val_targets_shuffled, _ = train_test_split (val_data_tensor, val_targets,
                                                                  test_size=0.0, random_state=RND_STATE, shuffle=True)

In [None]:
# clean up
del train_data_tensor
del train_targets
del val_data_tensor
del val_targets

### Implementation

In [1]:
from keras import layers, models, optimizers

input_shape = (64,64,3)#train_data_shuffled.shape[1:]
n_outputs = 200

# input layer
inputs = layers.Input (shape=input_shape, name='input')

# hidden layer
net = layers.Conv2D (
    filters = 16,
    kernel_size = (5,5),
    strides = (1,1),
    padding = 'same',
    activation = 'elu',
    name = 'conv_11'
) (inputs)

net = layers.MaxPooling2D (
    pool_size = (2,2),
    name = 'pool_1'
) (net)

net = layers.Conv2D (
    filters = 32,
    kernel_size = (5,5),
    strides = (1,1),
    padding = 'same',
    activation = 'elu',
    name = 'conv_21'
) (net)

net = layers.MaxPooling2D (
    pool_size = (2,2),
    name = 'pool_2'
) (net)

net = layers.Conv2D (
    filters = 64,
    kernel_size = (5,5),
    strides = (1,1),
    padding = 'same',
    activation = 'elu',
    name = 'conv_31'
) (net)

net = layers.MaxPooling2D (
    pool_size = (2,2),
    name = 'pool_3'
) (net)

net = layers.Flatten (
    name = 'flat_3'
) (net)

net = layers.Dense (
    units = 1024,
    activation = 'elu',
    name = 'fc_4'
) (net)

# output layer
outputs = layers.Dense (
    units = n_outputs,
    activation = 'linear',
    name = 'output'
) (net)

# create optimizer
opt_sgd = optimizers.SGD (lr=0.001)

# build and compile model
clf = models.Model (inputs=inputs, outputs=outputs, name='imloc.clf')
clf.compile (optimizer=opt_sgd, loss='categorical_crossentropy', metrics=['accuracy'])

Using TensorFlow backend.









In [2]:
clf.summary ()

Model: "imloc.clf"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           (None, 64, 64, 3)         0         
_________________________________________________________________
conv_11 (Conv2D)             (None, 64, 64, 16)        1216      
_________________________________________________________________
pool_1 (MaxPooling2D)        (None, 32, 32, 16)        0         
_________________________________________________________________
conv_21 (Conv2D)             (None, 32, 32, 32)        12832     
_________________________________________________________________
pool_2 (MaxPooling2D)        (None, 16, 16, 32)        0         
_________________________________________________________________
conv_31 (Conv2D)             (None, 16, 16, 64)        51264     
_________________________________________________________________
pool_3 (MaxPooling2D)        (None, 8, 8, 64)          0 

In [None]:
from keras.callbacks import ModelCheckpoint

epochs = 20

### Do NOT modify the code below this line.
checkpointer = ModelCheckpoint (
                    filepath='project.ml.imloc.weights.best.hdf5', 
                    verbose=1,
                    save_best_only=True)

history = clf.fit (train_data_shuffled, train_targets_shuffled,
          validation_data=(val_data_shuffled, val_targets_shuffled),
          epochs=epochs, batch_size=100, callbacks=[checkpointer], verbose=1)

---

## Metric
define the metric to measure the quality of the learning algorithm

## Data Preprocessing

## Implementation
split data, define and train a model

---

## Model Evaluation and Validation
results and justification

---

## Helper stuff

In [None]:
# list dataset directory with number of folders and files
import os

In [None]:
ds_path = os.path.join ('dataset', 'tiny-imagenet-200')

print ('+--', ds_path)
path_entries = os.listdir (ds_path)
num_dirs = 0
num_files = 0
for entry in path_entries:
    entry_path = os.path.join (ds_path, entry)
    if (os.path.isdir (entry_path)):
        print ('    |+--', entry)
        num_dirs+=1
    if (os.path.isfile (entry_path)):
        print ('    |---', entry)
        num_files+=1

In [None]:
print ('directories:', num_dirs, ', files:', num_files)

In [None]:
ds_test_path = os.path.join (ds_path, 'test')

print ('+--', ds_test_path)
num_dirs = 0
num_files = 0
path_entries = os.listdir (ds_test_path)
for entry in path_entries:
    entry_path = os.path.join (ds_test_path, entry)
    if (os.path.isdir (entry_path)):
        print ('    |+--', entry)
        num_dirs+=1
    if (os.path.isfile (entry_path)):
        print ('    |---', entry)
        num_files+=1

In [None]:
print ('directories:', num_dirs, ', files:', num_files)

In [None]:
ds_test_images_path = os.path.join (ds_test_path, 'images')

print ('+--', ds_test_images_path)
num_dirs = 0
num_files = 0
path_entries = os.listdir (ds_test_images_path)
for entry in path_entries:
    entry_path = os.path.join (ds_test_images_path, entry)
    if (os.path.isdir (entry_path)):
        print ('    |+--', entry)
        num_dirs+=1
    if (os.path.isfile (entry_path)):
        print ('    |---', entry)
        num_files+=1

In [None]:
print ('directories:', num_dirs, ', files:', num_files)

In [None]:
ds_val_path = os.path.join (ds_path, 'val')

print ('+--', ds_val_path)
num_dirs = 0
num_files = 0
path_entries = os.listdir (ds_val_path)
for entry in path_entries:
    entry_path = os.path.join (ds_val_path, entry)
    if (os.path.isdir (entry_path)):
        print ('    |+--', entry)
        num_dirs+=1
    if (os.path.isfile (entry_path)):
        print ('    |---', entry)
        num_files+=1

In [None]:
print ('directories:', num_dirs, ', files:', num_files)

In [None]:
ds_val_images_path = os.path.join (ds_val_path, 'images')

print ('+--', ds_val_images_path)
num_dirs = 0
num_files = 0
path_entries = os.listdir (ds_val_images_path)
for entry in path_entries:
    entry_path = os.path.join (ds_val_images_path, entry)
    if (os.path.isdir (entry_path)):
        print ('    |+--', entry)
        num_dirs+=1
    if (os.path.isfile (entry_path)):
        print ('    |---', entry)
        num_files+=1

In [None]:
print ('directories:', num_dirs, ', files:', num_files)

In [None]:
ds_train_path = os.path.join (ds_path, 'train')

print ('+--', ds_train_path)
num_dirs = 0
num_files = 0
path_entries = os.listdir (ds_train_path)
for entry in path_entries:
    entry_path = os.path.join (ds_train_path, entry)
    if (os.path.isdir (entry_path)):
        print ('    |+--', entry)
        num_dirs+=1
    if (os.path.isfile (entry_path)):
        print ('    |---', entry)
        num_files+=1

In [None]:
print ('directories:', num_dirs, ', files:', num_files)

In [None]:
ds_train_wnid_path = os.path.join (ds_train_path, 'n01443537')

print ('+--', ds_train_wnid_path)
num_dirs = 0
num_files = 0
path_entries = os.listdir (ds_train_wnid_path)
for entry in path_entries:
    entry_path = os.path.join (ds_train_wnid_path, entry)
    if (os.path.isdir (entry_path)):
        print ('    |+--', entry)
        num_dirs+=1
    if (os.path.isfile (entry_path)):
        print ('    |---', entry)
        num_files+=1

In [None]:
print ('directories:', num_dirs, ', files:', num_files)

In [None]:
ds_train_wnid_images_path = os.path.join (ds_train_wnid_path, 'images')

print ('+--', ds_train_wnid_images_path)
num_dirs = 0
num_files = 0
path_entries = os.listdir (ds_train_wnid_images_path)
for entry in path_entries:
    entry_path = os.path.join (ds_train_wnid_images_path, entry)
    if (os.path.isdir (entry_path)):
        print ('    |+--', entry)
        num_dirs+=1
    if (os.path.isfile (entry_path)):
        print ('    |---', entry)
        num_files+=1
print ('directories:', num_dirs, ', files:', num_files)

In [None]:
print ('directories:', num_dirs, ', files:', num_files)