# Calculating features

In [2]:
from keras.preprocessing import image
#from keras.applications.resnet50 import ResNet50
#from keras.applications.resnet50 import preprocess_input

from keras.applications.inception_v3 import InceptionV3
from keras.applications.inception_v3 import preprocess_input


import numpy as np
import pandas as pd
from os.path import join
import os

In [3]:
regions = ['borde_rural', 'borde_soacha', 'mixco_1_and_ebenezer', 'mixco_3']

In [4]:
region = 'borde_soacha'
train_dir = join('..', '..', 'data', 'data2', region, 'roofs_train')
test_dir = join('..', '..', 'data', 'data2', region, 'roofs_test')
materials = {'concrete_cement':0, 'healthy_metal':1, 'incomplete':2, 'irregular_metal':3, 'other':4}

Use a pretrained network to compute features from the images, which are later classified. Import the model from the Kerase library and remove the top layer by setting `include_top=False`. The last layer is pooled such that we get outputs of size 1x2048 instead of 7x7x2048. The pooling method `'max'` is chosen empirically.

In [5]:
model = InceptionV3(weights='imagenet', include_top=False, pooling='max')
model.summary()














__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, None, None, 3 0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, None, None, 3 864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, None, None, 3 96          conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, None, None, 3 0           batch_normalization_1[0][0]      
________________________________________________________________________________________________

## 1. Calculate features on the training data

The preprocessed data is stored in such a way, that every region has their own folder, that contains a subfolder defined as `train_dir` above. This folder again is subdivided by material. Images carry their `id` as filename.

Traverse all images in their corresponding folders which indicate the material and use `model.predict()` to get the features.

In [6]:
def calculate_features_train(train_dir):
    
    # Initialize a dataframe for the features    
    columns = ['id', 'features', 'label']
    df_features = pd.DataFrame(columns=columns)
    
    data = []

    # Walk through all images
    for material in materials.keys():
        material_fp = join(train_dir, material)
        for root, dirs, files in os.walk(material_fp):
            for file in files:
                img_fp = join(material_fp, file)
                label = materials[material]
                id = file.split('.')[0]
                print(id, "labeled as", material, ":", label)

                # Pad if size is too small, preprocess
                img = image.load_img(img_fp, target_size=(224, 224))
                img_data = image.img_to_array(img)
                img_data = np.expand_dims(img_data, axis=0)
                img_data = preprocess_input(img_data)

                # Compute features 
                features = model.predict(img_data)
                features_np = np.array(features).flatten()

                data.append({'id': id ,
                             'features': features_np,
                             'label': label})
                #df_features = df_features.append({'id': id ,
                #                                  'features': resnet50_feature_np,
                #                                  'label': label}, ignore_index=True)
    
    df_features = pd.DataFrame(data)
    return df_features

The results are stored in a dataframe as shown below and saved to disk as a pickle file.

In [7]:
print(train_dir)

../../data/data2/borde_soacha/roofs_train


In [None]:
features_train = calculate_features_train(train_dir)

7a20f7ac labeled as concrete_cement : 0
7a38b298 labeled as concrete_cement : 0
7a3a662e labeled as concrete_cement : 0
7a3d7d28 labeled as concrete_cement : 0
7a397142 labeled as concrete_cement : 0
7a382b34 labeled as concrete_cement : 0
7a38f03c labeled as concrete_cement : 0
7a238a3a labeled as concrete_cement : 0
7a2888fa labeled as concrete_cement : 0
7a3709c0 labeled as concrete_cement : 0
7a1d5caa labeled as concrete_cement : 0
7a1d085e labeled as concrete_cement : 0
7a2eb22a labeled as concrete_cement : 0
7a23f1dc labeled as concrete_cement : 0
7a3c5e66 labeled as concrete_cement : 0
7a2ac9b2 labeled as concrete_cement : 0
7a2bc9de labeled as concrete_cement : 0
7a2ad164 labeled as concrete_cement : 0
7a2792a6 labeled as concrete_cement : 0
7a2eea10 labeled as concrete_cement : 0
7a372f54 labeled as concrete_cement : 0
7a2ae12c labeled as concrete_cement : 0
7a3b557a labeled as concrete_cement : 0
7a3b9904 labeled as concrete_cement : 0
7a1d64a2 labeled as concrete_cement : 0


7a1e9192 labeled as concrete_cement : 0
7a2bac6a labeled as concrete_cement : 0
7a449e5a labeled as concrete_cement : 0
7a239a66 labeled as concrete_cement : 0
7a1e7158 labeled as concrete_cement : 0
7a25838a labeled as concrete_cement : 0
7a2bb426 labeled as concrete_cement : 0
7a2abc10 labeled as concrete_cement : 0
7a24b78e labeled as concrete_cement : 0
7a1ca080 labeled as concrete_cement : 0
7a200b44 labeled as concrete_cement : 0
7a21ce8e labeled as concrete_cement : 0
7a2f63be labeled as concrete_cement : 0
7a4072d0 labeled as concrete_cement : 0
7a4097f6 labeled as concrete_cement : 0
7a1c7646 labeled as concrete_cement : 0
7a231956 labeled as concrete_cement : 0
7a2035ce labeled as concrete_cement : 0
7a310d72 labeled as concrete_cement : 0
7a285128 labeled as concrete_cement : 0
7a1cd0a0 labeled as concrete_cement : 0
7a40aa70 labeled as concrete_cement : 0
7a319440 labeled as concrete_cement : 0
7a36181c labeled as concrete_cement : 0
7a44a2ba labeled as concrete_cement : 0


7a2aa810 labeled as healthy_metal : 1
7a38cb48 labeled as healthy_metal : 1
7a365e44 labeled as healthy_metal : 1
7a31ef58 labeled as healthy_metal : 1
7a219f4a labeled as healthy_metal : 1
7a44abfc labeled as healthy_metal : 1
7a41660e labeled as healthy_metal : 1
7a44493c labeled as healthy_metal : 1
7a1fb7c0 labeled as healthy_metal : 1
7a3132e8 labeled as healthy_metal : 1
7a3c13a2 labeled as healthy_metal : 1
7a2cc6ae labeled as healthy_metal : 1
7a1e5d6c labeled as healthy_metal : 1
7a3f4ec8 labeled as healthy_metal : 1
7a1cf40e labeled as healthy_metal : 1
7a2f4564 labeled as healthy_metal : 1
7a354d88 labeled as healthy_metal : 1
7a4256b8 labeled as healthy_metal : 1
7a1ef060 labeled as healthy_metal : 1
7a39ed7a labeled as healthy_metal : 1
7a326186 labeled as healthy_metal : 1
7a34d6be labeled as healthy_metal : 1
7a282a54 labeled as healthy_metal : 1
7a1f7c74 labeled as healthy_metal : 1
7a39eca8 labeled as healthy_metal : 1
7a1e8ab2 labeled as healthy_metal : 1
7a3cd0f8 lab

7a2be7de labeled as healthy_metal : 1
7a3a03c8 labeled as healthy_metal : 1
7a3a149e labeled as healthy_metal : 1
7a396666 labeled as healthy_metal : 1
7a2ad600 labeled as healthy_metal : 1
7a2f6e18 labeled as healthy_metal : 1
7a288b16 labeled as healthy_metal : 1
7a378ac6 labeled as healthy_metal : 1
7a22ebd4 labeled as healthy_metal : 1
7a2632b2 labeled as healthy_metal : 1
7a2de89a labeled as healthy_metal : 1
7a321118 labeled as healthy_metal : 1
7a241040 labeled as healthy_metal : 1
7a3015b6 labeled as healthy_metal : 1
7a34c43a labeled as healthy_metal : 1
7a3de07e labeled as healthy_metal : 1
7a32badc labeled as healthy_metal : 1
7a3226ee labeled as healthy_metal : 1
7a3e764c labeled as healthy_metal : 1
7a3dfeba labeled as healthy_metal : 1
7a273360 labeled as healthy_metal : 1
7a37a93e labeled as healthy_metal : 1
7a3a2d30 labeled as healthy_metal : 1
7a3956b2 labeled as healthy_metal : 1
7a3d622a labeled as healthy_metal : 1
7a2672c2 labeled as healthy_metal : 1
7a3d6158 lab

7a321dc0 labeled as healthy_metal : 1
7a42bbb2 labeled as healthy_metal : 1
7a40f4bc labeled as healthy_metal : 1
7a2e3fd4 labeled as healthy_metal : 1
7a32240a labeled as healthy_metal : 1
7a350a62 labeled as healthy_metal : 1
7a42749a labeled as healthy_metal : 1
7a4281a6 labeled as healthy_metal : 1
7a1eaa92 labeled as healthy_metal : 1
7a242b84 labeled as healthy_metal : 1
7a25c034 labeled as healthy_metal : 1
7a26c90c labeled as healthy_metal : 1
7a3936dc labeled as healthy_metal : 1
7a22dd10 labeled as healthy_metal : 1
7a2dfde4 labeled as healthy_metal : 1
7a3a1886 labeled as healthy_metal : 1
7a2fccbe labeled as healthy_metal : 1
7a26f31e labeled as healthy_metal : 1
7a36ada4 labeled as healthy_metal : 1
7a27e4f4 labeled as healthy_metal : 1
7a229710 labeled as healthy_metal : 1
7a271808 labeled as healthy_metal : 1
7a352cfe labeled as healthy_metal : 1
7a34103a labeled as healthy_metal : 1
7a3fb598 labeled as healthy_metal : 1
7a41fcfe labeled as healthy_metal : 1
7a32f0b0 lab

7a28914c labeled as healthy_metal : 1
7a36ae08 labeled as healthy_metal : 1
7a354036 labeled as healthy_metal : 1
7a307ec0 labeled as healthy_metal : 1
7a336cac labeled as healthy_metal : 1
7a411b7c labeled as healthy_metal : 1
7a228842 labeled as healthy_metal : 1
7a26648a labeled as healthy_metal : 1
7a3106d8 labeled as healthy_metal : 1
7a2fa09a labeled as healthy_metal : 1
7a3db022 labeled as healthy_metal : 1
7a324e58 labeled as healthy_metal : 1
7a36b006 labeled as healthy_metal : 1
7a215d3c labeled as healthy_metal : 1
7a3abe62 labeled as healthy_metal : 1
7a2c903a labeled as healthy_metal : 1
7a3948d4 labeled as healthy_metal : 1
7a25249e labeled as healthy_metal : 1
7a42208a labeled as healthy_metal : 1
7a215c60 labeled as healthy_metal : 1
7a266714 labeled as healthy_metal : 1
7a3a6926 labeled as healthy_metal : 1
7a31d338 labeled as healthy_metal : 1
7a416a28 labeled as healthy_metal : 1
7a33ce18 labeled as healthy_metal : 1
7a34eb5e labeled as healthy_metal : 1
7a23a5ce lab

7a37c8f6 labeled as healthy_metal : 1
7a1f5c58 labeled as healthy_metal : 1
7a279bde labeled as healthy_metal : 1
7a363f54 labeled as healthy_metal : 1
7a23bb2c labeled as healthy_metal : 1
7a360c1e labeled as healthy_metal : 1
7a25951e labeled as healthy_metal : 1
7a1f8516 labeled as healthy_metal : 1
7a334ae2 labeled as healthy_metal : 1
7a217dee labeled as healthy_metal : 1
7a39550e labeled as healthy_metal : 1
7a22963e labeled as healthy_metal : 1
7a413cb0 labeled as healthy_metal : 1
7a32877e labeled as healthy_metal : 1
7a3cb3d4 labeled as healthy_metal : 1
7a28d6fc labeled as healthy_metal : 1
7a1ebc1c labeled as healthy_metal : 1
7a221326 labeled as healthy_metal : 1
7a20d27c labeled as healthy_metal : 1
7a40bfba labeled as healthy_metal : 1
7a2de08e labeled as healthy_metal : 1
7a1d2eba labeled as healthy_metal : 1
7a40d536 labeled as healthy_metal : 1
7a1dba7e labeled as healthy_metal : 1
7a353758 labeled as healthy_metal : 1
7a2d0704 labeled as healthy_metal : 1
7a311416 lab

7a3378e6 labeled as healthy_metal : 1
7a399c94 labeled as healthy_metal : 1
7a415ccc labeled as healthy_metal : 1
7a3b8702 labeled as healthy_metal : 1
7a42bd6a labeled as healthy_metal : 1
7a25d4f2 labeled as healthy_metal : 1
7a424254 labeled as healthy_metal : 1
7a3edbf0 labeled as healthy_metal : 1
7a3e497e labeled as healthy_metal : 1
7a42a50a labeled as healthy_metal : 1
7a41e886 labeled as healthy_metal : 1
7a1e466a labeled as healthy_metal : 1
7a440aee labeled as healthy_metal : 1
7a3f2d4e labeled as healthy_metal : 1
7a221a24 labeled as healthy_metal : 1
7a3e80c4 labeled as healthy_metal : 1
7a41c4b4 labeled as healthy_metal : 1
7a2de30e labeled as healthy_metal : 1
7a3395c4 labeled as healthy_metal : 1
7a1f6004 labeled as healthy_metal : 1
7a29505a labeled as healthy_metal : 1
7a27495e labeled as healthy_metal : 1
7a3e568a labeled as healthy_metal : 1
7a1ee4b2 labeled as healthy_metal : 1
7a25135a labeled as healthy_metal : 1
7a36200a labeled as healthy_metal : 1
7a3d9ff6 lab

7a24ba90 labeled as healthy_metal : 1
7a1d9ac6 labeled as healthy_metal : 1
7a31fa7a labeled as healthy_metal : 1
7a41293c labeled as healthy_metal : 1
7a3ce67e labeled as healthy_metal : 1
7a31e6fc labeled as healthy_metal : 1
7a3df974 labeled as healthy_metal : 1
7a327cde labeled as healthy_metal : 1
7a1e2a2c labeled as healthy_metal : 1
7a3f8c44 labeled as healthy_metal : 1
7a3ebd5a labeled as healthy_metal : 1
7a2d6456 labeled as healthy_metal : 1
7a27ae3a labeled as healthy_metal : 1
7a367546 labeled as healthy_metal : 1
7a3cb668 labeled as healthy_metal : 1
7a290d84 labeled as healthy_metal : 1
7a42794a labeled as healthy_metal : 1
7a37d59e labeled as healthy_metal : 1
7a1de9ea labeled as healthy_metal : 1
7a1e7d42 labeled as healthy_metal : 1
7a222c6c labeled as healthy_metal : 1
7a338b38 labeled as healthy_metal : 1
7a337cf6 labeled as healthy_metal : 1
7a2adccc labeled as healthy_metal : 1
7a208c22 labeled as healthy_metal : 1
7a30c498 labeled as healthy_metal : 1
7a29959c lab

7a395f4a labeled as healthy_metal : 1
7a37964c labeled as healthy_metal : 1
7a37796e labeled as healthy_metal : 1
7a2fd13c labeled as healthy_metal : 1
7a31cf14 labeled as healthy_metal : 1
7a1f5046 labeled as healthy_metal : 1
7a2b817c labeled as healthy_metal : 1
7a37059c labeled as healthy_metal : 1
7a3b2398 labeled as healthy_metal : 1
7a2bd80c labeled as healthy_metal : 1
7a3ca434 labeled as healthy_metal : 1
7a22a4e4 labeled as healthy_metal : 1
7a34f0b8 labeled as healthy_metal : 1
7a2b0f58 labeled as healthy_metal : 1
7a2a306a labeled as healthy_metal : 1
7a312b86 labeled as healthy_metal : 1
7a3cf3bc labeled as healthy_metal : 1
7a2e0f46 labeled as healthy_metal : 1
7a375510 labeled as healthy_metal : 1
7a40242e labeled as healthy_metal : 1
7a1d27ee labeled as healthy_metal : 1
7a262902 labeled as healthy_metal : 1
7a355b34 labeled as healthy_metal : 1
7a3e47da labeled as healthy_metal : 1
7a41ca7c labeled as healthy_metal : 1
7a1ed26a labeled as healthy_metal : 1
7a2476c0 lab

7a385da2 labeled as healthy_metal : 1
7a3e19ea labeled as healthy_metal : 1
7a29fa82 labeled as healthy_metal : 1
7a34bed6 labeled as healthy_metal : 1
7a30dc44 labeled as healthy_metal : 1
7a266e1c labeled as healthy_metal : 1
7a26c24a labeled as healthy_metal : 1
7a2e076c labeled as healthy_metal : 1
7a3e1094 labeled as healthy_metal : 1
7a247cb0 labeled as healthy_metal : 1
7a218f3c labeled as healthy_metal : 1
7a41222a labeled as healthy_metal : 1
7a397ae8 labeled as healthy_metal : 1
7a2cafe8 labeled as healthy_metal : 1
7a32e2d2 labeled as healthy_metal : 1
7a236492 labeled as healthy_metal : 1
7a3d4574 labeled as healthy_metal : 1
7a201710 labeled as healthy_metal : 1
7a1e3526 labeled as healthy_metal : 1
7a390cd4 labeled as healthy_metal : 1
7a2c5fa2 labeled as healthy_metal : 1
7a1ec2ac labeled as healthy_metal : 1
7a30c07e labeled as healthy_metal : 1
7a40c028 labeled as healthy_metal : 1
7a3510fc labeled as healthy_metal : 1
7a326898 labeled as healthy_metal : 1
7a323954 lab

7a2dff92 labeled as healthy_metal : 1
7a3505d0 labeled as healthy_metal : 1
7a3828b4 labeled as healthy_metal : 1
7a42542e labeled as healthy_metal : 1
7a3d42f4 labeled as healthy_metal : 1
7a40b916 labeled as healthy_metal : 1
7a237efa labeled as healthy_metal : 1
7a40e0ee labeled as healthy_metal : 1
7a31b48e labeled as healthy_metal : 1
7a236e9c labeled as healthy_metal : 1
7a2e12a2 labeled as healthy_metal : 1
7a2dd7a6 labeled as healthy_metal : 1
7a301cbe labeled as healthy_metal : 1
7a3b2b7c labeled as healthy_metal : 1
7a36ffde labeled as healthy_metal : 1
7a354aa4 labeled as healthy_metal : 1
7a39fbc6 labeled as healthy_metal : 1
7a376e88 labeled as healthy_metal : 1
7a25d196 labeled as healthy_metal : 1
7a3d6e00 labeled as healthy_metal : 1
7a36422e labeled as healthy_metal : 1
7a2d9c3c labeled as healthy_metal : 1
7a2212c2 labeled as healthy_metal : 1
7a2ec4b8 labeled as healthy_metal : 1
7a2cd914 labeled as healthy_metal : 1
7a1cf6d4 labeled as healthy_metal : 1
7a413a94 lab

7a210a8a labeled as healthy_metal : 1
7a2412c0 labeled as healthy_metal : 1
7a229cc4 labeled as healthy_metal : 1
7a31b84e labeled as healthy_metal : 1
7a3b08f4 labeled as healthy_metal : 1
7a261a20 labeled as healthy_metal : 1
7a25f4dc labeled as healthy_metal : 1
7a1ccf9c labeled as healthy_metal : 1
7a2a642c labeled as healthy_metal : 1
7a25f40a labeled as healthy_metal : 1
7a3830a2 labeled as healthy_metal : 1
7a2325cc labeled as healthy_metal : 1
7a229436 labeled as healthy_metal : 1
7a42993e labeled as healthy_metal : 1
7a282450 labeled as healthy_metal : 1
7a2081aa labeled as healthy_metal : 1
7a3e2b1a labeled as healthy_metal : 1
7a24cb0c labeled as healthy_metal : 1
7a35b70a labeled as healthy_metal : 1
7a3ae9fa labeled as healthy_metal : 1
7a1d6e20 labeled as healthy_metal : 1
7a2c340a labeled as healthy_metal : 1
7a266ab6 labeled as healthy_metal : 1
7a417a4a labeled as healthy_metal : 1
7a23de86 labeled as healthy_metal : 1
7a1ed986 labeled as healthy_metal : 1
7a323670 lab

7a3d8548 labeled as healthy_metal : 1
7a37e4e4 labeled as healthy_metal : 1
7a2fb472 labeled as healthy_metal : 1
7a25a644 labeled as healthy_metal : 1
7a27ab56 labeled as healthy_metal : 1
7a439852 labeled as healthy_metal : 1
7a1d03ea labeled as healthy_metal : 1
7a2f1076 labeled as healthy_metal : 1
7a3c2f54 labeled as healthy_metal : 1
7a31cc30 labeled as healthy_metal : 1
7a41a48e labeled as healthy_metal : 1
7a280e0c labeled as healthy_metal : 1
7a2afe14 labeled as healthy_metal : 1
7a1efee8 labeled as healthy_metal : 1
7a2926ac labeled as healthy_metal : 1
7a2fc2d2 labeled as healthy_metal : 1
7a20c714 labeled as healthy_metal : 1
7a216566 labeled as healthy_metal : 1
7a3044b4 labeled as healthy_metal : 1
7a2e9678 labeled as healthy_metal : 1
7a3d6c02 labeled as healthy_metal : 1
7a3e921c labeled as healthy_metal : 1
7a33ffd2 labeled as healthy_metal : 1
7a206562 labeled as healthy_metal : 1
7a328512 labeled as healthy_metal : 1
7a43fd10 labeled as healthy_metal : 1
7a3db0f4 lab

7a2bc61e labeled as healthy_metal : 1
7a43c566 labeled as healthy_metal : 1
7a440a1c labeled as healthy_metal : 1
7a420db6 labeled as healthy_metal : 1
7a37c4c8 labeled as healthy_metal : 1
7a3afe5e labeled as healthy_metal : 1
7a2e0988 labeled as healthy_metal : 1
7a4425a6 labeled as healthy_metal : 1
7a2b201a labeled as healthy_metal : 1
7a3c944e labeled as healthy_metal : 1
7a281834 labeled as healthy_metal : 1
7a317910 labeled as healthy_metal : 1
7a323a94 labeled as healthy_metal : 1
7a3d3fac labeled as healthy_metal : 1
7a286258 labeled as healthy_metal : 1
7a42ea06 labeled as healthy_metal : 1
7a309e64 labeled as healthy_metal : 1
7a2253c2 labeled as healthy_metal : 1
7a3bc50a labeled as healthy_metal : 1
7a2856a0 labeled as healthy_metal : 1
7a3ff706 labeled as healthy_metal : 1
7a1fdbba labeled as healthy_metal : 1
7a1ca5c6 labeled as healthy_metal : 1
7a3bc0c8 labeled as healthy_metal : 1
7a247878 labeled as healthy_metal : 1
7a1dd5cc labeled as healthy_metal : 1
7a297dd2 lab

7a269a2c labeled as healthy_metal : 1
7a362078 labeled as healthy_metal : 1
7a449f5e labeled as healthy_metal : 1
7a34741c labeled as healthy_metal : 1
7a272cf8 labeled as healthy_metal : 1
7a2a35ec labeled as healthy_metal : 1
7a2ef820 labeled as healthy_metal : 1
7a43c2e6 labeled as healthy_metal : 1
7a292a44 labeled as healthy_metal : 1
7a3e8484 labeled as healthy_metal : 1
7a1e9822 labeled as healthy_metal : 1
7a368630 labeled as healthy_metal : 1
7a2e4efc labeled as healthy_metal : 1
7a3529b6 labeled as healthy_metal : 1
7a1f3d0e labeled as healthy_metal : 1
7a2d1a32 labeled as healthy_metal : 1
7a301c50 labeled as healthy_metal : 1
7a3ea36a labeled as healthy_metal : 1
7a2bfa30 labeled as healthy_metal : 1
7a2a3362 labeled as healthy_metal : 1
7a1d29ba labeled as healthy_metal : 1
7a1e27b6 labeled as healthy_metal : 1
7a4014f2 labeled as healthy_metal : 1
7a25cdc2 labeled as healthy_metal : 1
7a44711e labeled as healthy_metal : 1
7a34a568 labeled as healthy_metal : 1
7a2fbb66 lab

7a2d048e labeled as incomplete : 2
7a3e3920 labeled as incomplete : 2
7a403dc4 labeled as incomplete : 2
7a20af40 labeled as incomplete : 2
7a21b9b2 labeled as incomplete : 2
7a208218 labeled as incomplete : 2
7a25db96 labeled as incomplete : 2
7a315066 labeled as incomplete : 2
7a2e9b00 labeled as incomplete : 2
7a2a09e6 labeled as incomplete : 2
7a1fb248 labeled as incomplete : 2
7a3cfb96 labeled as incomplete : 2
7a39babc labeled as incomplete : 2
7a1c74fc labeled as incomplete : 2
7a2c69e8 labeled as incomplete : 2
7a43ac3e labeled as incomplete : 2
7a3c78a6 labeled as incomplete : 2
7a3c42d2 labeled as incomplete : 2
7a40ea6c labeled as incomplete : 2
7a203e34 labeled as incomplete : 2
7a244c22 labeled as incomplete : 2
7a2ea91a labeled as incomplete : 2
7a3a27ae labeled as incomplete : 2
7a3a9658 labeled as incomplete : 2
7a395aae labeled as incomplete : 2
7a31fc1e labeled as incomplete : 2
7a3a21f0 labeled as incomplete : 2
7a1e5c0e labeled as incomplete : 2
7a2051f8 labeled as 

In [None]:
features_train.head()

In [None]:
import pickle
from os import makedirs
from os.path import exists

pickle_path = join('..', '..', 'pickles')
if not exists(pickle_path):
    makedirs(pickle_path)

with open(join(pickle_path, 'inceptionv3_features_' + region + '_train.pkl' ), 'wb') as f:
    pickle.dump(features_train, f)

### Visualize the features

Reduce the dimensionality to 2D using tSNE (https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding)

In [None]:
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import matplotlib
from sklearn.decomposition import PCA
%matplotlib inline

In [None]:
def plot_tSNE(features, labels=None, number_of_materials=5):
    if labels is None:
        labels = np.zeros((features.shape[0]))

    # Visualization_
    #pca_object = PCA(n_components=50)
    #pca_features = pca_object.fit_transform(features)
    tsne_features = TSNE(n_components=2).fit_transform(features)

    # define the colormap
    cmap = plt.cm.jet
    # extract all colors from the .jet map
    cmaplist = [cmap(i) for i in range(cmap.N)]
    # create the new map
    cmap = cmap.from_list('Custom cmap', cmaplist, cmap.N)

    # define the bins and normalize
    bounds = np.linspace(0, number_of_materials, number_of_materials + 1)
    norm = matplotlib.colors.BoundaryNorm(bounds, cmap.N)
    
    print(tsne_features.shape)

    plt.figure()
    scat = plt.scatter(tsne_features[:, 0], tsne_features[:, 1], c=labels, cmap=cmap, norm=norm)
    cb = plt.colorbar(scat, spacing='proportional', ticks=bounds)

Convert features within the dataframe to a single matrix

In [None]:
feat_matrix = features_train['features'].to_numpy()
feat_matrix = np.column_stack(feat_matrix).transpose()

labels = features_train['label'].to_numpy()

In [None]:
plot_tSNE(feat_matrix, labels)

## 2. Calculate features on the test data


This procedure is similar as for the training data, except that we do not know the labels and the folder with test images is not subdivided by material

In [None]:
def calculate_features_test(test_dir):
    
    # Initialize a dataframe for the features    
    columns = ['id', 'features']
    df_features = pd.DataFrame(columns=columns)

    # Walk through all images
    for root, dirs, files in os.walk(test_dir):
        for file in files:
            img_fp = join(root, file)
            id = file.split('.')[0]
            print("Calculate features for", id)

            # Pad if size is too small, preprocess
            img = image.load_img(img_fp, target_size=(224, 224))
            img_data = image.img_to_array(img)
            img_data = np.expand_dims(img_data, axis=0)
            img_data = preprocess_input(img_data)

            # Compute features 
            features = model.predict(img_data)
            features_np = np.array(features).flatten()

            df_features = df_features.append({'id': id ,
                                              'features': features_np},
                                             ignore_index=True)

    return df_features

In [None]:
features_test = calculate_features_test(test_dir)

In [None]:
features_test.head()

In [None]:
with open(join(pickle_path, 'inceptionv3_features_' + region + '_test.pkl' ), 'wb') as f:
    pickle.dump(features_test, f)