# Calculating features

In [1]:
from keras.preprocessing import image
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input

import numpy as np
import pandas as pd
from os.path import join
import os

Using TensorFlow backend.


In [2]:
regions = ['borde_rural', 'borde_soacha', 'mixco_1_and_ebenezer', 'mixco_3']

In [3]:
region = 'dennery'
train_dir = join('..', '..', 'data', 'data2', region, 'roofs_train')
test_dir = join('..', '..', 'data', 'data2', region, 'roofs_test')
materials = {'concrete_cement':0, 'healthy_metal':1, 'incomplete':2, 'irregular_metal':3, 'other':4}

Use a pretrained network to compute features from the images, which are later classified. Import the model from the Kerase library and remove the top layer by setting `include_top=False`. The last layer is pooled such that we get outputs of size 1x2048 instead of 7x7x2048. The pooling method `'max'` is chosen empirically.

In [5]:
model = ResNet50(weights='imagenet', include_top=False, pooling='max')
model.summary()













Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, None, None, 3 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, None, None, 3 0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, None, None, 6 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, None, None

## 1. Calculate features on the training data

The preprocessed data is stored in such a way, that every region has their own folder, that contains a subfolder defined as `train_dir` above. This folder again is subdivided by material. Images carry their `id` as filename.

Traverse all images in their corresponding folders which indicate the material and use `model.predict()` to get the features.

In [6]:
def calculate_features_train(train_dir):
    
    # Initialize a dataframe for the features    
    columns = ['id', 'features', 'label']
    df_features = pd.DataFrame(columns=columns)
    
    data = []

    # Walk through all images
    for material in materials.keys():
        material_fp = join(train_dir, material)
        for root, dirs, files in os.walk(material_fp):
            for file in files:
                img_fp = join(material_fp, file)
                label = materials[material]
                id = file.split('.')[0]
                print(id, "labeled as", material, ":", label)

                # Pad if size is too small, preprocess
                img = image.load_img(img_fp, target_size=(224, 224))
                img_data = image.img_to_array(img)
                img_data = np.expand_dims(img_data, axis=0)
                img_data = preprocess_input(img_data)

                # Compute features 
                features = model.predict(img_data)
                features_np = np.array(features).flatten()

                data.append({'id': id ,
                             'features': features_np,
                             'label': label})
                #df_features = df_features.append({'id': id ,
                #                                  'features': resnet50_feature_np,
                #                                  'label': label}, ignore_index=True)
    
    df_features = pd.DataFrame(data)
    return df_features

The results are stored in a dataframe as shown below and saved to disk as a pickle file.

In [7]:
print(train_dir)

../../data/data2/borde_soacha/roofs_train


In [8]:
features_train = calculate_features_train(train_dir)

7a20f7ac labeled as concrete_cement : 0
7a38b298 labeled as concrete_cement : 0
7a3a662e labeled as concrete_cement : 0
7a3d7d28 labeled as concrete_cement : 0
7a397142 labeled as concrete_cement : 0
7a382b34 labeled as concrete_cement : 0
7a38f03c labeled as concrete_cement : 0
7a238a3a labeled as concrete_cement : 0
7a2888fa labeled as concrete_cement : 0
7a3709c0 labeled as concrete_cement : 0
7a1d5caa labeled as concrete_cement : 0
7a1d085e labeled as concrete_cement : 0
7a2eb22a labeled as concrete_cement : 0
7a23f1dc labeled as concrete_cement : 0
7a3c5e66 labeled as concrete_cement : 0
7a2ac9b2 labeled as concrete_cement : 0
7a2bc9de labeled as concrete_cement : 0
7a2ad164 labeled as concrete_cement : 0
7a2792a6 labeled as concrete_cement : 0
7a2eea10 labeled as concrete_cement : 0
7a372f54 labeled as concrete_cement : 0
7a2ae12c labeled as concrete_cement : 0
7a3b557a labeled as concrete_cement : 0
7a3b9904 labeled as concrete_cement : 0
7a1d64a2 labeled as concrete_cement : 0


7a1e9192 labeled as concrete_cement : 0
7a2bac6a labeled as concrete_cement : 0
7a449e5a labeled as concrete_cement : 0
7a239a66 labeled as concrete_cement : 0
7a1e7158 labeled as concrete_cement : 0
7a25838a labeled as concrete_cement : 0
7a2bb426 labeled as concrete_cement : 0
7a2abc10 labeled as concrete_cement : 0
7a24b78e labeled as concrete_cement : 0
7a1ca080 labeled as concrete_cement : 0
7a200b44 labeled as concrete_cement : 0
7a21ce8e labeled as concrete_cement : 0
7a2f63be labeled as concrete_cement : 0
7a4072d0 labeled as concrete_cement : 0
7a4097f6 labeled as concrete_cement : 0
7a1c7646 labeled as concrete_cement : 0
7a231956 labeled as concrete_cement : 0
7a2035ce labeled as concrete_cement : 0
7a310d72 labeled as concrete_cement : 0
7a285128 labeled as concrete_cement : 0
7a1cd0a0 labeled as concrete_cement : 0
7a40aa70 labeled as concrete_cement : 0
7a319440 labeled as concrete_cement : 0
7a36181c labeled as concrete_cement : 0
7a44a2ba labeled as concrete_cement : 0


7a2aa810 labeled as healthy_metal : 1
7a38cb48 labeled as healthy_metal : 1
7a365e44 labeled as healthy_metal : 1
7a31ef58 labeled as healthy_metal : 1
7a219f4a labeled as healthy_metal : 1
7a44abfc labeled as healthy_metal : 1
7a41660e labeled as healthy_metal : 1
7a44493c labeled as healthy_metal : 1
7a1fb7c0 labeled as healthy_metal : 1
7a3132e8 labeled as healthy_metal : 1
7a3c13a2 labeled as healthy_metal : 1
7a2cc6ae labeled as healthy_metal : 1
7a1e5d6c labeled as healthy_metal : 1
7a3f4ec8 labeled as healthy_metal : 1
7a1cf40e labeled as healthy_metal : 1
7a2f4564 labeled as healthy_metal : 1
7a354d88 labeled as healthy_metal : 1
7a4256b8 labeled as healthy_metal : 1
7a1ef060 labeled as healthy_metal : 1
7a39ed7a labeled as healthy_metal : 1
7a326186 labeled as healthy_metal : 1
7a34d6be labeled as healthy_metal : 1
7a282a54 labeled as healthy_metal : 1
7a1f7c74 labeled as healthy_metal : 1
7a39eca8 labeled as healthy_metal : 1
7a1e8ab2 labeled as healthy_metal : 1
7a3cd0f8 lab

7a2be7de labeled as healthy_metal : 1
7a3a03c8 labeled as healthy_metal : 1
7a3a149e labeled as healthy_metal : 1
7a396666 labeled as healthy_metal : 1
7a2ad600 labeled as healthy_metal : 1
7a2f6e18 labeled as healthy_metal : 1
7a288b16 labeled as healthy_metal : 1
7a378ac6 labeled as healthy_metal : 1
7a22ebd4 labeled as healthy_metal : 1
7a2632b2 labeled as healthy_metal : 1
7a2de89a labeled as healthy_metal : 1
7a321118 labeled as healthy_metal : 1
7a241040 labeled as healthy_metal : 1
7a3015b6 labeled as healthy_metal : 1
7a34c43a labeled as healthy_metal : 1
7a3de07e labeled as healthy_metal : 1
7a32badc labeled as healthy_metal : 1
7a3226ee labeled as healthy_metal : 1
7a3e764c labeled as healthy_metal : 1
7a3dfeba labeled as healthy_metal : 1
7a273360 labeled as healthy_metal : 1
7a37a93e labeled as healthy_metal : 1
7a3a2d30 labeled as healthy_metal : 1
7a3956b2 labeled as healthy_metal : 1
7a3d622a labeled as healthy_metal : 1
7a2672c2 labeled as healthy_metal : 1
7a3d6158 lab

7a321dc0 labeled as healthy_metal : 1
7a42bbb2 labeled as healthy_metal : 1
7a40f4bc labeled as healthy_metal : 1
7a2e3fd4 labeled as healthy_metal : 1
7a32240a labeled as healthy_metal : 1
7a350a62 labeled as healthy_metal : 1
7a42749a labeled as healthy_metal : 1
7a4281a6 labeled as healthy_metal : 1
7a1eaa92 labeled as healthy_metal : 1
7a242b84 labeled as healthy_metal : 1
7a25c034 labeled as healthy_metal : 1
7a26c90c labeled as healthy_metal : 1
7a3936dc labeled as healthy_metal : 1
7a22dd10 labeled as healthy_metal : 1
7a2dfde4 labeled as healthy_metal : 1
7a3a1886 labeled as healthy_metal : 1
7a2fccbe labeled as healthy_metal : 1
7a26f31e labeled as healthy_metal : 1
7a36ada4 labeled as healthy_metal : 1
7a27e4f4 labeled as healthy_metal : 1
7a229710 labeled as healthy_metal : 1
7a271808 labeled as healthy_metal : 1
7a352cfe labeled as healthy_metal : 1
7a34103a labeled as healthy_metal : 1
7a3fb598 labeled as healthy_metal : 1
7a41fcfe labeled as healthy_metal : 1
7a32f0b0 lab

7a28914c labeled as healthy_metal : 1
7a36ae08 labeled as healthy_metal : 1
7a354036 labeled as healthy_metal : 1
7a307ec0 labeled as healthy_metal : 1
7a336cac labeled as healthy_metal : 1
7a411b7c labeled as healthy_metal : 1
7a228842 labeled as healthy_metal : 1
7a26648a labeled as healthy_metal : 1
7a3106d8 labeled as healthy_metal : 1
7a2fa09a labeled as healthy_metal : 1
7a3db022 labeled as healthy_metal : 1
7a324e58 labeled as healthy_metal : 1
7a36b006 labeled as healthy_metal : 1
7a215d3c labeled as healthy_metal : 1
7a3abe62 labeled as healthy_metal : 1
7a2c903a labeled as healthy_metal : 1
7a3948d4 labeled as healthy_metal : 1
7a25249e labeled as healthy_metal : 1
7a42208a labeled as healthy_metal : 1
7a215c60 labeled as healthy_metal : 1
7a266714 labeled as healthy_metal : 1
7a3a6926 labeled as healthy_metal : 1
7a31d338 labeled as healthy_metal : 1
7a416a28 labeled as healthy_metal : 1
7a33ce18 labeled as healthy_metal : 1
7a34eb5e labeled as healthy_metal : 1
7a23a5ce lab

7a37c8f6 labeled as healthy_metal : 1
7a1f5c58 labeled as healthy_metal : 1
7a279bde labeled as healthy_metal : 1
7a363f54 labeled as healthy_metal : 1
7a23bb2c labeled as healthy_metal : 1
7a360c1e labeled as healthy_metal : 1
7a25951e labeled as healthy_metal : 1
7a1f8516 labeled as healthy_metal : 1
7a334ae2 labeled as healthy_metal : 1
7a217dee labeled as healthy_metal : 1
7a39550e labeled as healthy_metal : 1
7a22963e labeled as healthy_metal : 1
7a413cb0 labeled as healthy_metal : 1
7a32877e labeled as healthy_metal : 1
7a3cb3d4 labeled as healthy_metal : 1
7a28d6fc labeled as healthy_metal : 1
7a1ebc1c labeled as healthy_metal : 1
7a221326 labeled as healthy_metal : 1
7a20d27c labeled as healthy_metal : 1
7a40bfba labeled as healthy_metal : 1
7a2de08e labeled as healthy_metal : 1
7a1d2eba labeled as healthy_metal : 1
7a40d536 labeled as healthy_metal : 1
7a1dba7e labeled as healthy_metal : 1
7a353758 labeled as healthy_metal : 1
7a2d0704 labeled as healthy_metal : 1
7a311416 lab

7a3378e6 labeled as healthy_metal : 1
7a399c94 labeled as healthy_metal : 1
7a415ccc labeled as healthy_metal : 1
7a3b8702 labeled as healthy_metal : 1
7a42bd6a labeled as healthy_metal : 1
7a25d4f2 labeled as healthy_metal : 1
7a424254 labeled as healthy_metal : 1
7a3edbf0 labeled as healthy_metal : 1
7a3e497e labeled as healthy_metal : 1
7a42a50a labeled as healthy_metal : 1
7a41e886 labeled as healthy_metal : 1
7a1e466a labeled as healthy_metal : 1
7a440aee labeled as healthy_metal : 1
7a3f2d4e labeled as healthy_metal : 1
7a221a24 labeled as healthy_metal : 1
7a3e80c4 labeled as healthy_metal : 1
7a41c4b4 labeled as healthy_metal : 1
7a2de30e labeled as healthy_metal : 1
7a3395c4 labeled as healthy_metal : 1
7a1f6004 labeled as healthy_metal : 1
7a29505a labeled as healthy_metal : 1
7a27495e labeled as healthy_metal : 1
7a3e568a labeled as healthy_metal : 1
7a1ee4b2 labeled as healthy_metal : 1
7a25135a labeled as healthy_metal : 1
7a36200a labeled as healthy_metal : 1
7a3d9ff6 lab

7a24ba90 labeled as healthy_metal : 1
7a1d9ac6 labeled as healthy_metal : 1
7a31fa7a labeled as healthy_metal : 1
7a41293c labeled as healthy_metal : 1
7a3ce67e labeled as healthy_metal : 1
7a31e6fc labeled as healthy_metal : 1
7a3df974 labeled as healthy_metal : 1
7a327cde labeled as healthy_metal : 1
7a1e2a2c labeled as healthy_metal : 1
7a3f8c44 labeled as healthy_metal : 1
7a3ebd5a labeled as healthy_metal : 1
7a2d6456 labeled as healthy_metal : 1
7a27ae3a labeled as healthy_metal : 1
7a367546 labeled as healthy_metal : 1
7a3cb668 labeled as healthy_metal : 1
7a290d84 labeled as healthy_metal : 1
7a42794a labeled as healthy_metal : 1
7a37d59e labeled as healthy_metal : 1
7a1de9ea labeled as healthy_metal : 1
7a1e7d42 labeled as healthy_metal : 1
7a222c6c labeled as healthy_metal : 1
7a338b38 labeled as healthy_metal : 1
7a337cf6 labeled as healthy_metal : 1
7a2adccc labeled as healthy_metal : 1
7a208c22 labeled as healthy_metal : 1
7a30c498 labeled as healthy_metal : 1
7a29959c lab

7a395f4a labeled as healthy_metal : 1
7a37964c labeled as healthy_metal : 1
7a37796e labeled as healthy_metal : 1
7a2fd13c labeled as healthy_metal : 1
7a31cf14 labeled as healthy_metal : 1
7a1f5046 labeled as healthy_metal : 1
7a2b817c labeled as healthy_metal : 1
7a37059c labeled as healthy_metal : 1
7a3b2398 labeled as healthy_metal : 1
7a2bd80c labeled as healthy_metal : 1
7a3ca434 labeled as healthy_metal : 1
7a22a4e4 labeled as healthy_metal : 1
7a34f0b8 labeled as healthy_metal : 1
7a2b0f58 labeled as healthy_metal : 1
7a2a306a labeled as healthy_metal : 1
7a312b86 labeled as healthy_metal : 1
7a3cf3bc labeled as healthy_metal : 1
7a2e0f46 labeled as healthy_metal : 1
7a375510 labeled as healthy_metal : 1
7a40242e labeled as healthy_metal : 1
7a1d27ee labeled as healthy_metal : 1
7a262902 labeled as healthy_metal : 1
7a355b34 labeled as healthy_metal : 1
7a3e47da labeled as healthy_metal : 1
7a41ca7c labeled as healthy_metal : 1
7a1ed26a labeled as healthy_metal : 1
7a2476c0 lab

7a3e19ea labeled as healthy_metal : 1
7a29fa82 labeled as healthy_metal : 1
7a34bed6 labeled as healthy_metal : 1
7a30dc44 labeled as healthy_metal : 1
7a266e1c labeled as healthy_metal : 1
7a26c24a labeled as healthy_metal : 1
7a2e076c labeled as healthy_metal : 1
7a3e1094 labeled as healthy_metal : 1
7a247cb0 labeled as healthy_metal : 1
7a218f3c labeled as healthy_metal : 1
7a41222a labeled as healthy_metal : 1
7a397ae8 labeled as healthy_metal : 1
7a2cafe8 labeled as healthy_metal : 1
7a32e2d2 labeled as healthy_metal : 1
7a236492 labeled as healthy_metal : 1
7a3d4574 labeled as healthy_metal : 1
7a201710 labeled as healthy_metal : 1
7a1e3526 labeled as healthy_metal : 1
7a390cd4 labeled as healthy_metal : 1
7a2c5fa2 labeled as healthy_metal : 1
7a1ec2ac labeled as healthy_metal : 1
7a30c07e labeled as healthy_metal : 1
7a40c028 labeled as healthy_metal : 1
7a3510fc labeled as healthy_metal : 1
7a326898 labeled as healthy_metal : 1
7a323954 labeled as healthy_metal : 1
7a3db6b2 lab

7a3505d0 labeled as healthy_metal : 1
7a3828b4 labeled as healthy_metal : 1
7a42542e labeled as healthy_metal : 1
7a3d42f4 labeled as healthy_metal : 1
7a40b916 labeled as healthy_metal : 1
7a237efa labeled as healthy_metal : 1
7a40e0ee labeled as healthy_metal : 1
7a31b48e labeled as healthy_metal : 1
7a236e9c labeled as healthy_metal : 1
7a2e12a2 labeled as healthy_metal : 1
7a2dd7a6 labeled as healthy_metal : 1
7a301cbe labeled as healthy_metal : 1
7a3b2b7c labeled as healthy_metal : 1
7a36ffde labeled as healthy_metal : 1
7a354aa4 labeled as healthy_metal : 1
7a39fbc6 labeled as healthy_metal : 1
7a376e88 labeled as healthy_metal : 1
7a25d196 labeled as healthy_metal : 1
7a3d6e00 labeled as healthy_metal : 1
7a36422e labeled as healthy_metal : 1
7a2d9c3c labeled as healthy_metal : 1
7a2212c2 labeled as healthy_metal : 1
7a2ec4b8 labeled as healthy_metal : 1
7a2cd914 labeled as healthy_metal : 1
7a1cf6d4 labeled as healthy_metal : 1
7a413a94 labeled as healthy_metal : 1
7a3d74fe lab

7a2412c0 labeled as healthy_metal : 1
7a229cc4 labeled as healthy_metal : 1
7a31b84e labeled as healthy_metal : 1
7a3b08f4 labeled as healthy_metal : 1
7a261a20 labeled as healthy_metal : 1
7a25f4dc labeled as healthy_metal : 1
7a1ccf9c labeled as healthy_metal : 1
7a2a642c labeled as healthy_metal : 1
7a25f40a labeled as healthy_metal : 1
7a3830a2 labeled as healthy_metal : 1
7a2325cc labeled as healthy_metal : 1
7a229436 labeled as healthy_metal : 1
7a42993e labeled as healthy_metal : 1
7a282450 labeled as healthy_metal : 1
7a2081aa labeled as healthy_metal : 1
7a3e2b1a labeled as healthy_metal : 1
7a24cb0c labeled as healthy_metal : 1
7a35b70a labeled as healthy_metal : 1
7a3ae9fa labeled as healthy_metal : 1
7a1d6e20 labeled as healthy_metal : 1
7a2c340a labeled as healthy_metal : 1
7a266ab6 labeled as healthy_metal : 1
7a417a4a labeled as healthy_metal : 1
7a23de86 labeled as healthy_metal : 1
7a1ed986 labeled as healthy_metal : 1
7a323670 labeled as healthy_metal : 1
7a325e3e lab

7a37e4e4 labeled as healthy_metal : 1
7a2fb472 labeled as healthy_metal : 1
7a25a644 labeled as healthy_metal : 1
7a27ab56 labeled as healthy_metal : 1
7a439852 labeled as healthy_metal : 1
7a1d03ea labeled as healthy_metal : 1
7a2f1076 labeled as healthy_metal : 1
7a3c2f54 labeled as healthy_metal : 1
7a31cc30 labeled as healthy_metal : 1
7a41a48e labeled as healthy_metal : 1
7a280e0c labeled as healthy_metal : 1
7a2afe14 labeled as healthy_metal : 1
7a1efee8 labeled as healthy_metal : 1
7a2926ac labeled as healthy_metal : 1
7a2fc2d2 labeled as healthy_metal : 1
7a20c714 labeled as healthy_metal : 1
7a216566 labeled as healthy_metal : 1
7a3044b4 labeled as healthy_metal : 1
7a2e9678 labeled as healthy_metal : 1
7a3d6c02 labeled as healthy_metal : 1
7a3e921c labeled as healthy_metal : 1
7a33ffd2 labeled as healthy_metal : 1
7a206562 labeled as healthy_metal : 1
7a328512 labeled as healthy_metal : 1
7a43fd10 labeled as healthy_metal : 1
7a3db0f4 labeled as healthy_metal : 1
7a2ca944 lab

7a440a1c labeled as healthy_metal : 1
7a420db6 labeled as healthy_metal : 1
7a37c4c8 labeled as healthy_metal : 1
7a3afe5e labeled as healthy_metal : 1
7a2e0988 labeled as healthy_metal : 1
7a4425a6 labeled as healthy_metal : 1
7a2b201a labeled as healthy_metal : 1
7a3c944e labeled as healthy_metal : 1
7a281834 labeled as healthy_metal : 1
7a317910 labeled as healthy_metal : 1
7a323a94 labeled as healthy_metal : 1
7a3d3fac labeled as healthy_metal : 1
7a286258 labeled as healthy_metal : 1
7a42ea06 labeled as healthy_metal : 1
7a309e64 labeled as healthy_metal : 1
7a2253c2 labeled as healthy_metal : 1
7a3bc50a labeled as healthy_metal : 1
7a2856a0 labeled as healthy_metal : 1
7a3ff706 labeled as healthy_metal : 1
7a1fdbba labeled as healthy_metal : 1
7a1ca5c6 labeled as healthy_metal : 1
7a3bc0c8 labeled as healthy_metal : 1
7a247878 labeled as healthy_metal : 1
7a1dd5cc labeled as healthy_metal : 1
7a297dd2 labeled as healthy_metal : 1
7a3802c6 labeled as healthy_metal : 1
7a3a5a26 lab

7a34741c labeled as healthy_metal : 1
7a272cf8 labeled as healthy_metal : 1
7a2a35ec labeled as healthy_metal : 1
7a2ef820 labeled as healthy_metal : 1
7a43c2e6 labeled as healthy_metal : 1
7a292a44 labeled as healthy_metal : 1
7a3e8484 labeled as healthy_metal : 1
7a1e9822 labeled as healthy_metal : 1
7a368630 labeled as healthy_metal : 1
7a2e4efc labeled as healthy_metal : 1
7a3529b6 labeled as healthy_metal : 1
7a1f3d0e labeled as healthy_metal : 1
7a2d1a32 labeled as healthy_metal : 1
7a301c50 labeled as healthy_metal : 1
7a3ea36a labeled as healthy_metal : 1
7a2bfa30 labeled as healthy_metal : 1
7a2a3362 labeled as healthy_metal : 1
7a1d29ba labeled as healthy_metal : 1
7a1e27b6 labeled as healthy_metal : 1
7a4014f2 labeled as healthy_metal : 1
7a25cdc2 labeled as healthy_metal : 1
7a44711e labeled as healthy_metal : 1
7a34a568 labeled as healthy_metal : 1
7a2fbb66 labeled as healthy_metal : 1
7a41e53e labeled as healthy_metal : 1
7a245dde labeled as healthy_metal : 1
7a28c63a lab

7a20af40 labeled as incomplete : 2
7a21b9b2 labeled as incomplete : 2
7a208218 labeled as incomplete : 2
7a25db96 labeled as incomplete : 2
7a315066 labeled as incomplete : 2
7a2e9b00 labeled as incomplete : 2
7a2a09e6 labeled as incomplete : 2
7a1fb248 labeled as incomplete : 2
7a3cfb96 labeled as incomplete : 2
7a39babc labeled as incomplete : 2
7a1c74fc labeled as incomplete : 2
7a2c69e8 labeled as incomplete : 2
7a43ac3e labeled as incomplete : 2
7a3c78a6 labeled as incomplete : 2
7a3c42d2 labeled as incomplete : 2
7a40ea6c labeled as incomplete : 2
7a203e34 labeled as incomplete : 2
7a244c22 labeled as incomplete : 2
7a2ea91a labeled as incomplete : 2
7a3a27ae labeled as incomplete : 2
7a3a9658 labeled as incomplete : 2
7a395aae labeled as incomplete : 2
7a31fc1e labeled as incomplete : 2
7a3a21f0 labeled as incomplete : 2
7a1e5c0e labeled as incomplete : 2
7a2051f8 labeled as incomplete : 2
7a3d09f6 labeled as incomplete : 2
7a363a72 labeled as incomplete : 2
7a3c8cba labeled as 

7a1fd782 labeled as incomplete : 2
7a3b8b30 labeled as incomplete : 2
7a2baf62 labeled as incomplete : 2
7a28766c labeled as incomplete : 2
7a3c4ce6 labeled as incomplete : 2
7a31a75a labeled as incomplete : 2
7a1fc634 labeled as incomplete : 2
7a39e44c labeled as incomplete : 2
7a29a08c labeled as incomplete : 2
7a355710 labeled as incomplete : 2
7a3315a4 labeled as incomplete : 2
7a1eff60 labeled as incomplete : 2
7a202534 labeled as incomplete : 2
7a3d15a4 labeled as incomplete : 2
7a33c062 labeled as incomplete : 2
7a4262e8 labeled as incomplete : 2
7a32b302 labeled as incomplete : 2
7a2a2f98 labeled as incomplete : 2
7a267178 labeled as incomplete : 2
7a2420da labeled as incomplete : 2
7a231bf4 labeled as incomplete : 2
7a3e1850 labeled as incomplete : 2
7a26f10c labeled as incomplete : 2
7a29d17e labeled as incomplete : 2
7a2e9f24 labeled as incomplete : 2
7a207b9c labeled as incomplete : 2
7a365002 labeled as incomplete : 2
7a23a48e labeled as incomplete : 2
7a2a1c24 labeled as 

7a27d158 labeled as irregular_metal : 3
7a3ec5ac labeled as irregular_metal : 3
7a338994 labeled as irregular_metal : 3
7a20225a labeled as irregular_metal : 3
7a3e93ca labeled as irregular_metal : 3
7a32d274 labeled as irregular_metal : 3
7a20cd22 labeled as irregular_metal : 3
7a3ee848 labeled as irregular_metal : 3
7a3459f0 labeled as irregular_metal : 3
7a2e898a labeled as irregular_metal : 3
7a397d72 labeled as irregular_metal : 3
7a33d570 labeled as irregular_metal : 3
7a39679c labeled as irregular_metal : 3
7a3df636 labeled as irregular_metal : 3
7a2200ca labeled as irregular_metal : 3
7a2b09e0 labeled as irregular_metal : 3
7a2ffe32 labeled as irregular_metal : 3
7a2e758a labeled as irregular_metal : 3
7a2a86fa labeled as irregular_metal : 3
7a21e414 labeled as irregular_metal : 3
7a3da71c labeled as irregular_metal : 3
7a3f83ac labeled as irregular_metal : 3
7a21f97c labeled as irregular_metal : 3
7a2a0cd4 labeled as irregular_metal : 3
7a264fc2 labeled as irregular_metal : 3


7a3b6678 labeled as irregular_metal : 3
7a2afebe labeled as irregular_metal : 3
7a38c2a6 labeled as irregular_metal : 3
7a37fb6e labeled as irregular_metal : 3
7a293340 labeled as irregular_metal : 3
7a37f862 labeled as irregular_metal : 3
7a1fbea0 labeled as irregular_metal : 3
7a2d0006 labeled as irregular_metal : 3
7a302ed4 labeled as irregular_metal : 3
7a39a978 labeled as irregular_metal : 3
7a1f5776 labeled as irregular_metal : 3
7a2695e0 labeled as irregular_metal : 3
7a25257a labeled as irregular_metal : 3
7a244286 labeled as irregular_metal : 3
7a34c35e labeled as irregular_metal : 3
7a22ecb0 labeled as irregular_metal : 3
7a29b612 labeled as irregular_metal : 3
7a27f23c labeled as irregular_metal : 3
7a26071a labeled as irregular_metal : 3
7a3be7ba labeled as irregular_metal : 3
7a3123b6 labeled as irregular_metal : 3
7a3845ce labeled as irregular_metal : 3
7a2b8640 labeled as irregular_metal : 3
7a3852f8 labeled as irregular_metal : 3
7a3fe96e labeled as irregular_metal : 3


7a3bdc34 labeled as irregular_metal : 3
7a3212e4 labeled as irregular_metal : 3
7a36415c labeled as irregular_metal : 3
7a243f5c labeled as irregular_metal : 3
7a2dfca4 labeled as irregular_metal : 3
7a291c66 labeled as irregular_metal : 3
7a3fb160 labeled as irregular_metal : 3
7a23998a labeled as irregular_metal : 3
7a25be22 labeled as irregular_metal : 3
7a28dac6 labeled as irregular_metal : 3
7a1f5f28 labeled as irregular_metal : 3
7a3bafd4 labeled as irregular_metal : 3
7a41f6e6 labeled as irregular_metal : 3
7a295d02 labeled as irregular_metal : 3
7a24bf22 labeled as irregular_metal : 3
7a3f1caa labeled as irregular_metal : 3
7a32e05c labeled as irregular_metal : 3
7a1f2cb0 labeled as irregular_metal : 3
7a291f5e labeled as irregular_metal : 3
7a1fc4e0 labeled as irregular_metal : 3
7a433056 labeled as irregular_metal : 3
7a360bba labeled as irregular_metal : 3
7a2d9386 labeled as irregular_metal : 3
7a33c274 labeled as irregular_metal : 3
7a2243b4 labeled as irregular_metal : 3


7a379db8 labeled as irregular_metal : 3
7a270b88 labeled as irregular_metal : 3
7a334600 labeled as irregular_metal : 3
7a30bfa2 labeled as irregular_metal : 3
7a2057a2 labeled as irregular_metal : 3
7a2f1440 labeled as irregular_metal : 3
7a2e2828 labeled as irregular_metal : 3
7a3d212a labeled as irregular_metal : 3
7a2fc3a4 labeled as irregular_metal : 3
7a1e9b74 labeled as irregular_metal : 3
7a350922 labeled as irregular_metal : 3
7a222cd0 labeled as irregular_metal : 3
7a3c3a8a labeled as irregular_metal : 3
7a422936 labeled as irregular_metal : 3
7a2dc7f2 labeled as irregular_metal : 3
7a3e7f98 labeled as irregular_metal : 3
7a240db6 labeled as irregular_metal : 3
7a381fa4 labeled as irregular_metal : 3
7a3995f0 labeled as irregular_metal : 3
7a2fa3e2 labeled as irregular_metal : 3
7a41cae0 labeled as irregular_metal : 3
7a3e8204 labeled as irregular_metal : 3
7a38a622 labeled as irregular_metal : 3
7a2957ee labeled as irregular_metal : 3
7a3d2260 labeled as irregular_metal : 3


7a2bddfc labeled as irregular_metal : 3
7a311b78 labeled as irregular_metal : 3
7a2c375c labeled as irregular_metal : 3
7a29f49c labeled as irregular_metal : 3
7a3918dc labeled as irregular_metal : 3
7a360e9e labeled as irregular_metal : 3
7a3ec39a labeled as irregular_metal : 3
7a391d6e labeled as irregular_metal : 3
7a3ea57c labeled as irregular_metal : 3
7a3ce3fe labeled as irregular_metal : 3
7a21bc78 labeled as irregular_metal : 3
7a336dec labeled as irregular_metal : 3
7a2b07d8 labeled as irregular_metal : 3
7a1e9408 labeled as irregular_metal : 3
7a37c1da labeled as irregular_metal : 3
7a388afc labeled as irregular_metal : 3
7a30bed0 labeled as irregular_metal : 3
7a3ede70 labeled as irregular_metal : 3
7a3bbd08 labeled as irregular_metal : 3
7a361df8 labeled as irregular_metal : 3
7a35a33c labeled as irregular_metal : 3
7a1d868a labeled as irregular_metal : 3
7a233d50 labeled as irregular_metal : 3
7a3d4fa6 labeled as irregular_metal : 3
7a2bd01e labeled as irregular_metal : 3


7a2cbac4 labeled as irregular_metal : 3
7a3f3000 labeled as irregular_metal : 3
7a426f22 labeled as irregular_metal : 3
7a3172ee labeled as irregular_metal : 3
7a30ed88 labeled as irregular_metal : 3
7a330334 labeled as irregular_metal : 3
7a225bce labeled as irregular_metal : 3
7a3c1046 labeled as irregular_metal : 3
7a26c466 labeled as irregular_metal : 3
7a3bf3ae labeled as irregular_metal : 3
7a1fec22 labeled as irregular_metal : 3
7a245096 labeled as irregular_metal : 3
7a427af8 labeled as irregular_metal : 3
7a340fcc labeled as irregular_metal : 3
7a2ebb30 labeled as irregular_metal : 3
7a3aaba2 labeled as irregular_metal : 3
7a25b120 labeled as irregular_metal : 3
7a2839f4 labeled as irregular_metal : 3
7a2ce378 labeled as irregular_metal : 3
7a4430c8 labeled as irregular_metal : 3
7a2c3f86 labeled as irregular_metal : 3
7a20cff2 labeled as irregular_metal : 3
7a2a9ce4 labeled as irregular_metal : 3
7a2152a6 labeled as irregular_metal : 3
7a28331e labeled as irregular_metal : 3


7a38837c labeled as irregular_metal : 3
7a370d76 labeled as irregular_metal : 3
7a3391b4 labeled as irregular_metal : 3
7a32923c labeled as irregular_metal : 3
7a3eaefa labeled as irregular_metal : 3
7a2715f6 labeled as irregular_metal : 3
7a42c490 labeled as irregular_metal : 3
7a38e3bc labeled as irregular_metal : 3
7a3840f6 labeled as irregular_metal : 3
7a44079c labeled as irregular_metal : 3
7a395900 labeled as irregular_metal : 3
7a1cfa08 labeled as irregular_metal : 3
7a321d52 labeled as irregular_metal : 3
7a1d6af6 labeled as irregular_metal : 3
7a33b59a labeled as irregular_metal : 3
7a33fe2e labeled as irregular_metal : 3
7a4090ee labeled as irregular_metal : 3
7a3c8ed6 labeled as irregular_metal : 3
7a222898 labeled as irregular_metal : 3
7a24fece labeled as irregular_metal : 3
7a3c6582 labeled as irregular_metal : 3
7a3b19f2 labeled as irregular_metal : 3
7a2c22ee labeled as irregular_metal : 3
7a3771e4 labeled as irregular_metal : 3
7a41df12 labeled as irregular_metal : 3


7a22f07a labeled as irregular_metal : 3
7a21fb20 labeled as irregular_metal : 3
7a1d9ec2 labeled as irregular_metal : 3
7a278324 labeled as irregular_metal : 3
7a20363c labeled as irregular_metal : 3
7a2f5522 labeled as irregular_metal : 3
7a2af572 labeled as irregular_metal : 3
7a3d051e labeled as irregular_metal : 3
7a39b3be labeled as irregular_metal : 3
7a1d1b64 labeled as irregular_metal : 3
7a3f833e labeled as irregular_metal : 3
7a3ba64c labeled as irregular_metal : 3
7a25441a labeled as irregular_metal : 3
7a3717bc labeled as irregular_metal : 3
7a3de5c4 labeled as irregular_metal : 3
7a42aa8c labeled as irregular_metal : 3
7a2b43e2 labeled as irregular_metal : 3
7a312c58 labeled as irregular_metal : 3
7a3d3d9a labeled as irregular_metal : 3
7a207246 labeled as irregular_metal : 3
7a2edfca labeled as irregular_metal : 3
7a383034 labeled as irregular_metal : 3
7a3797fa labeled as irregular_metal : 3
7a37709a labeled as irregular_metal : 3
7a21d294 labeled as irregular_metal : 3


7a24da0c labeled as irregular_metal : 3
7a2d3472 labeled as irregular_metal : 3
7a236fdc labeled as irregular_metal : 3
7a35e87e labeled as irregular_metal : 3
7a1e9d7c labeled as irregular_metal : 3
7a224d82 labeled as irregular_metal : 3
7a2e1afe labeled as irregular_metal : 3
7a1cd8a2 labeled as irregular_metal : 3
7a1ec3ec labeled as irregular_metal : 3
7a258894 labeled as irregular_metal : 3
7a3b6966 labeled as irregular_metal : 3
7a4201ea labeled as irregular_metal : 3
7a39e58c labeled as irregular_metal : 3
7a2f3e52 labeled as irregular_metal : 3
7a1e3792 labeled as irregular_metal : 3
7a36fade labeled as irregular_metal : 3
7a28d350 labeled as irregular_metal : 3
7a2c5354 labeled as irregular_metal : 3
7a222406 labeled as irregular_metal : 3
7a1c89ba labeled as irregular_metal : 3
7a30d924 labeled as irregular_metal : 3
7a27e95e labeled as irregular_metal : 3
7a3f64e4 labeled as irregular_metal : 3
7a21f6ac labeled as irregular_metal : 3
7a31b13c labeled as irregular_metal : 3


7a2e8c6e labeled as irregular_metal : 3
7a333f7a labeled as irregular_metal : 3
7a34a2e8 labeled as irregular_metal : 3
7a348920 labeled as irregular_metal : 3
7a2ebd38 labeled as irregular_metal : 3
7a412d60 labeled as irregular_metal : 3
7a43aafe labeled as irregular_metal : 3
7a40d3f6 labeled as irregular_metal : 3
7a2542da labeled as irregular_metal : 3
7a364440 labeled as irregular_metal : 3
7a2560d0 labeled as irregular_metal : 3
7a3e950a labeled as irregular_metal : 3
7a36a4d0 labeled as irregular_metal : 3
7a41257c labeled as irregular_metal : 3
7a30a080 labeled as irregular_metal : 3
7a43f1a8 labeled as irregular_metal : 3
7a29d8f4 labeled as irregular_metal : 3
7a2ae26c labeled as irregular_metal : 3
7a3c009c labeled as irregular_metal : 3
7a24d890 labeled as irregular_metal : 3
7a40ead0 labeled as irregular_metal : 3
7a424b46 labeled as irregular_metal : 3
7a374930 labeled as irregular_metal : 3
7a25e172 labeled as irregular_metal : 3
7a27d022 labeled as irregular_metal : 3


7a1ee80e labeled as irregular_metal : 3
7a41cd6a labeled as irregular_metal : 3
7a21cf60 labeled as irregular_metal : 3
7a2a0432 labeled as irregular_metal : 3
7a2e64c8 labeled as irregular_metal : 3
7a37a790 labeled as irregular_metal : 3
7a3e1b8e labeled as irregular_metal : 3
7a29b400 labeled as irregular_metal : 3
7a3849a2 labeled as irregular_metal : 3
7a36d5e0 labeled as irregular_metal : 3
7a3a824e labeled as irregular_metal : 3
7a35102a labeled as irregular_metal : 3
7a285efc labeled as irregular_metal : 3
7a3b07b4 labeled as irregular_metal : 3
7a20d68c labeled as irregular_metal : 3
7a360944 labeled as other : 4
7a307a06 labeled as other : 4
7a3ec1f6 labeled as other : 4
7a3af35a labeled as other : 4
7a2e2be8 labeled as other : 4
7a220b06 labeled as other : 4
7a3f71c8 labeled as other : 4
7a319b84 labeled as other : 4
7a3e5342 labeled as other : 4
7a209b68 labeled as other : 4
7a1ccef2 labeled as other : 4
7a3f2628 labeled as other : 4
7a34c0e8 labeled as other : 4
7a277d70 l

In [9]:
features_train.head()

Unnamed: 0,id,features,label
0,7a20f7ac,"[4.636075, 2.3204336, 1.4748516, -0.0, -0.0, 1...",0
1,7a38b298,"[2.326549, 3.0096893, 0.1731861, 1.4067602, 0....",0
2,7a3a662e,"[2.7476623, 1.8925416, -0.0, 0.87210643, -0.0,...",0
3,7a3d7d28,"[6.3516073, 3.722457, 0.3522606, -0.0, -0.0, 4...",0
4,7a397142,"[0.38750732, 0.010527623, -0.0, 5.6719933, 7.7...",0


In [10]:
import pickle
from os import makedirs
from os.path import exists

pickle_path = join('..', '..', 'pickles')
if not exists(pickle_path):
    makedirs(pickle_path)

with open(join(pickle_path, 'resnet50_features_' + region + '_train.pkl' ), 'wb') as f:
    pickle.dump(features_train, f)

### Visualize the features

Reduce the dimensionality to 2D using tSNE (https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding)

In [None]:
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import matplotlib
from sklearn.decomposition import PCA
%matplotlib inline

In [None]:
def plot_tSNE(features, labels=None, number_of_materials=5):
    if labels is None:
        labels = np.zeros((features.shape[0]))

    # Visualization_
    #pca_object = PCA(n_components=50)
    #pca_features = pca_object.fit_transform(features)
    tsne_features = TSNE(n_components=2).fit_transform(features)

    # define the colormap
    cmap = plt.cm.jet
    # extract all colors from the .jet map
    cmaplist = [cmap(i) for i in range(cmap.N)]
    # create the new map
    cmap = cmap.from_list('Custom cmap', cmaplist, cmap.N)

    # define the bins and normalize
    bounds = np.linspace(0, number_of_materials, number_of_materials + 1)
    norm = matplotlib.colors.BoundaryNorm(bounds, cmap.N)
    
    print(tsne_features.shape)

    plt.figure()
    scat = plt.scatter(tsne_features[:, 0], tsne_features[:, 1], c=labels, cmap=cmap, norm=norm)
    cb = plt.colorbar(scat, spacing='proportional', ticks=bounds)

Convert features within the dataframe to a single matrix

In [None]:
feat_matrix = features_train['features'].to_numpy()
feat_matrix = np.column_stack(feat_matrix).transpose()

labels = features_train['label'].to_numpy()

In [None]:
plot_tSNE(feat_matrix, labels)

## 2. Calculate features on the test data


This procedure is similar as for the training data, except that we do not know the labels and the folder with test images is not subdivided by material

In [None]:
def calculate_features_test(test_dir):
    
    # Initialize a dataframe for the features    
    columns = ['id', 'features']
    df_features = pd.DataFrame(columns=columns)

    # Walk through all images
    for root, dirs, files in os.walk(test_dir):
        for file in files:
            img_fp = join(root, file)
            id = file.split('.')[0]
            print("Calculate features for", id)

            # Pad if size is too small, preprocess
            img = image.load_img(img_fp, target_size=(224, 224))
            img_data = image.img_to_array(img)
            img_data = np.expand_dims(img_data, axis=0)
            img_data = preprocess_input(img_data)

            # Compute features 
            features = model.predict(img_data)
            features_np = np.array(features).flatten()

            df_features = df_features.append({'id': id ,
                                              'features': features_np},
                                             ignore_index=True)

    return df_features

In [None]:
features_test = calculate_features_test(test_dir)

In [None]:
features_test.head()

In [None]:
with open(join(pickle_path, 'inceptionv3_features_' + region + '_test.pkl' ), 'wb') as f:
    pickle.dump(features_test, f)