# Calculating features

In [2]:
from keras.preprocessing import image
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input

import numpy as np
import pandas as pd
from os.path import join
import os

Using TensorFlow backend.


In [3]:
regions = ['borde_rural', 'borde_soacha', 'mixco_1_and_ebenezer', 'mixco_3']

In [None]:
region = 'mixco_3'
train_dir = join('..', '..', 'data', region, 'roofs_train')
test_dir = join('..', '..', 'data', region, 'roofs_test')
materials = {'concrete_cement':0, 'healthy_metal':1, 'incomplete':2, 'irregular_metal':3, 'other':4}

Use a pretrained network to compute features from the images, which are later classified. Import the model from the Kerase library and remove the top layer by setting `include_top=False`. The last layer is pooled such that we get outputs of size 1x2048 instead of 7x7x2048. The pooling method `'max'` is chosen empirically.

In [None]:
model = ResNet50(weights='imagenet', include_top=False, pooling='max')
model.summary()

## 1. Calculate features on the training data

The preprocessed data is stored in such a way, that every region has their own folder, that contains a subfolder defined as `train_dir` above. This folder again is subdivided by material. Images carry their `id` as filename.

Traverse all images in their corresponding folders which indicate the material and use `model.predict()` to get the features.

In [None]:
def calculate_features_train(train_dir):
    
    # Initialize a dataframe for the features    
    columns = ['id', 'features', 'label']
    df_features = pd.DataFrame(columns=columns)
    
    data = []

    # Walk through all images
    for material in materials.keys():
        material_fp = join(train_dir, material)
        for root, dirs, files in os.walk(material_fp):
            for file in files:
                img_fp = join(material_fp, file)
                label = materials[material]
                id = file.split('.')[0]
                print(id, "labeled as", material, ":", label)

                # Pad if size is too small, preprocess
                img = image.load_img(img_fp, target_size=(224, 224))
                img_data = image.img_to_array(img)
                img_data = np.expand_dims(img_data, axis=0)
                img_data = preprocess_input(img_data)

                # Compute features 
                resnet50_feature = model.predict(img_data)
                resnet50_feature_np = np.array(resnet50_feature).flatten()

                data.append({'id': id ,
                             'features': resnet50_feature_np,
                             'label': label})
                #df_features = df_features.append({'id': id ,
                #                                  'features': resnet50_feature_np,
                #                                  'label': label}, ignore_index=True)
    
    df_features = pd.DataFrame(data)
    return df_features

The results are stored in a dataframe as shown below and saved to disk as a pickle file.

In [7]:
features_train = calculate_features_train(train_dir)

7a1c6d7c labeled as concrete_cement : 0
7a1d078c labeled as concrete_cement : 0
7a1d2ff0 labeled as concrete_cement : 0
7a1d570a labeled as concrete_cement : 0
7a1d6042 labeled as concrete_cement : 0
7a1d8e1e labeled as concrete_cement : 0
7a1d99fe labeled as concrete_cement : 0
7a1d9b34 labeled as concrete_cement : 0
7a1e26e4 labeled as concrete_cement : 0
7a1e2f04 labeled as concrete_cement : 0
7a1e385a labeled as concrete_cement : 0
7a1e90c0 labeled as concrete_cement : 0
7a1e9750 labeled as concrete_cement : 0
7a1eb73a labeled as concrete_cement : 0
7a1ebf6e labeled as concrete_cement : 0
7a1ec87e labeled as concrete_cement : 0
7a1ecbd0 labeled as concrete_cement : 0
7a1f49fc labeled as concrete_cement : 0
7a1f63b0 labeled as concrete_cement : 0
7a1f76d4 labeled as concrete_cement : 0
7a1f9362 labeled as concrete_cement : 0
7a1fdd4a labeled as concrete_cement : 0
7a1fe93e labeled as concrete_cement : 0
7a214bda labeled as concrete_cement : 0
7a21585a labeled as concrete_cement : 0


7a41e048 labeled as concrete_cement : 0
7a42454c labeled as concrete_cement : 0
7a424f06 labeled as concrete_cement : 0
7a427fee labeled as concrete_cement : 0
7a42c206 labeled as concrete_cement : 0
7a42c936 labeled as concrete_cement : 0
7a43a87e labeled as concrete_cement : 0
7a43b06c labeled as concrete_cement : 0
7a43cf34 labeled as concrete_cement : 0
7a4413fe labeled as concrete_cement : 0
7a441c96 labeled as concrete_cement : 0
7a448690 labeled as concrete_cement : 0
7a1c4d74 labeled as healthy_metal : 1
7a1c5274 labeled as healthy_metal : 1
7a1c5daa labeled as healthy_metal : 1
7a1c6296 labeled as healthy_metal : 1
7a1c701a labeled as healthy_metal : 1
7a1c7484 labeled as healthy_metal : 1
7a1c7f38 labeled as healthy_metal : 1
7a1c887a labeled as healthy_metal : 1
7a1c88e8 labeled as healthy_metal : 1
7a1c964e labeled as healthy_metal : 1
7a1c9842 labeled as healthy_metal : 1
7a1c99a0 labeled as healthy_metal : 1
7a1ca3b4 labeled as healthy_metal : 1
7a1ca422 labeled as health

7a1fd840 labeled as healthy_metal : 1
7a1fdec6 labeled as healthy_metal : 1
7a1fe0f6 labeled as healthy_metal : 1
7a1fe36c labeled as healthy_metal : 1
7a1fe7fe labeled as healthy_metal : 1
7a1fe86c labeled as healthy_metal : 1
7a1fea7e labeled as healthy_metal : 1
7a1febb4 labeled as healthy_metal : 1
7a1ff320 labeled as healthy_metal : 1
7a200126 labeled as healthy_metal : 1
7a20066c labeled as healthy_metal : 1
7a2008d8 labeled as healthy_metal : 1
7a200e1e labeled as healthy_metal : 1
7a2010e4 labeled as healthy_metal : 1
7a2011b6 labeled as healthy_metal : 1
7a201ab2 labeled as healthy_metal : 1
7a201f1c labeled as healthy_metal : 1
7a2022c8 labeled as healthy_metal : 1
7a20266a labeled as healthy_metal : 1
7a202732 labeled as healthy_metal : 1
7a202868 labeled as healthy_metal : 1
7a202b9c labeled as healthy_metal : 1
7a203088 labeled as healthy_metal : 1
7a203970 labeled as healthy_metal : 1
7a203c36 labeled as healthy_metal : 1
7a203dd0 labeled as healthy_metal : 1
7a20410e lab

7a238742 labeled as healthy_metal : 1
7a238db4 labeled as healthy_metal : 1
7a238fda labeled as healthy_metal : 1
7a23925a labeled as healthy_metal : 1
7a239552 labeled as healthy_metal : 1
7a239e94 labeled as healthy_metal : 1
7a239fd4 labeled as healthy_metal : 1
7a23a11e labeled as healthy_metal : 1
7a23a25e labeled as healthy_metal : 1
7a23a2cc labeled as healthy_metal : 1
7a23a4fc labeled as healthy_metal : 1
7a23b032 labeled as healthy_metal : 1
7a23b7da labeled as healthy_metal : 1
7a23d1f2 labeled as healthy_metal : 1
7a23e0a2 labeled as healthy_metal : 1
7a23e2aa labeled as healthy_metal : 1
7a23e4bc labeled as healthy_metal : 1
7a23ee08 labeled as healthy_metal : 1
7a23eec6 labeled as healthy_metal : 1
7a23f10a labeled as healthy_metal : 1
7a23f34e labeled as healthy_metal : 1
7a2403ac labeled as healthy_metal : 1
7a24069a labeled as healthy_metal : 1
7a240848 labeled as healthy_metal : 1
7a2408b6 labeled as healthy_metal : 1
7a240ba4 labeled as healthy_metal : 1
7a240f00 lab

7a276e34 labeled as healthy_metal : 1
7a276ea2 labeled as healthy_metal : 1
7a277186 labeled as healthy_metal : 1
7a27733e labeled as healthy_metal : 1
7a277a5a labeled as healthy_metal : 1
7a277ac8 labeled as healthy_metal : 1
7a2781da labeled as healthy_metal : 1
7a278540 labeled as healthy_metal : 1
7a2785ae labeled as healthy_metal : 1
7a27896e labeled as healthy_metal : 1
7a278bf8 labeled as healthy_metal : 1
7a278d9c labeled as healthy_metal : 1
7a278e0a labeled as healthy_metal : 1
7a2791d4 labeled as healthy_metal : 1
7a279d8c labeled as healthy_metal : 1
7a27a5e8 labeled as healthy_metal : 1
7a27a656 labeled as healthy_metal : 1
7a27af20 labeled as healthy_metal : 1
7a27b3f8 labeled as healthy_metal : 1
7a27b4a2 labeled as healthy_metal : 1
7a27b6a0 labeled as healthy_metal : 1
7a27b754 labeled as healthy_metal : 1
7a27b876 labeled as healthy_metal : 1
7a27b8e4 labeled as healthy_metal : 1
7a27bbc8 labeled as healthy_metal : 1
7a27bd08 labeled as healthy_metal : 1
7a27c104 lab

7a2b62aa labeled as healthy_metal : 1
7a2b6340 labeled as healthy_metal : 1
7a2b63e0 labeled as healthy_metal : 1
7a2b748e labeled as healthy_metal : 1
7a2b7506 labeled as healthy_metal : 1
7a2b7ed4 labeled as healthy_metal : 1
7a2b7f4c labeled as healthy_metal : 1
7a2b8d5c labeled as healthy_metal : 1
7a2b8fe6 labeled as healthy_metal : 1
7a2b94aa labeled as healthy_metal : 1
7a2b9f7c labeled as healthy_metal : 1
7a2ba3c8 labeled as healthy_metal : 1
7a2ba5e4 labeled as healthy_metal : 1
7a2bacd8 labeled as healthy_metal : 1
7a2bb0d4 labeled as healthy_metal : 1
7a2bb6a6 labeled as healthy_metal : 1
7a2bbcf0 labeled as healthy_metal : 1
7a2bc0b0 labeled as healthy_metal : 1
7a2bc18c labeled as healthy_metal : 1
7a2bc47a labeled as healthy_metal : 1
7a2bdb5e labeled as healthy_metal : 1
7a2be568 labeled as healthy_metal : 1
7a2befb8 labeled as healthy_metal : 1
7a2bfbca labeled as healthy_metal : 1
7a2c0188 labeled as healthy_metal : 1
7a2c0674 labeled as healthy_metal : 1
7a2c0e8a lab

7a2edb06 labeled as healthy_metal : 1
7a2edc3c labeled as healthy_metal : 1
7a2ee038 labeled as healthy_metal : 1
7a2ee7a4 labeled as healthy_metal : 1
7a2eebb4 labeled as healthy_metal : 1
7a2eecfe labeled as healthy_metal : 1
7a2eeea2 labeled as healthy_metal : 1
7a2efb04 labeled as healthy_metal : 1
7a2efb68 labeled as healthy_metal : 1
7a2efd7a labeled as healthy_metal : 1
7a2efdde labeled as healthy_metal : 1
7a2f01f8 labeled as healthy_metal : 1
7a2f0a86 labeled as healthy_metal : 1
7a2f0c3e labeled as healthy_metal : 1
7a2f1008 labeled as healthy_metal : 1
7a2f1364 labeled as healthy_metal : 1
7a2f16c0 labeled as healthy_metal : 1
7a2f1724 labeled as healthy_metal : 1
7a2f1a76 labeled as healthy_metal : 1
7a2f1b48 labeled as healthy_metal : 1
7a2f2034 labeled as healthy_metal : 1
7a2f22b4 labeled as healthy_metal : 1
7a2f2732 labeled as healthy_metal : 1
7a2f29a8 labeled as healthy_metal : 1
7a2f2d54 labeled as healthy_metal : 1
7a2f310a labeled as healthy_metal : 1
7a2f3178 lab

7a32ac86 labeled as healthy_metal : 1
7a32adbc labeled as healthy_metal : 1
7a32ae2a labeled as healthy_metal : 1
7a32ae98 labeled as healthy_metal : 1
7a32af60 labeled as healthy_metal : 1
7a32b3de labeled as healthy_metal : 1
7a32b50a labeled as healthy_metal : 1
7a32c0b8 labeled as healthy_metal : 1
7a32c6ee labeled as healthy_metal : 1
7a32c7c0 labeled as healthy_metal : 1
7a32c9c8 labeled as healthy_metal : 1
7a32ca36 labeled as healthy_metal : 1
7a32d134 labeled as healthy_metal : 1
7a32d2e2 labeled as healthy_metal : 1
7a32df8a labeled as healthy_metal : 1
7a32e200 labeled as healthy_metal : 1
7a32e26e labeled as healthy_metal : 1
7a32e94e labeled as healthy_metal : 1
7a32e9bc labeled as healthy_metal : 1
7a32ef0c labeled as healthy_metal : 1
7a32ef70 labeled as healthy_metal : 1
7a32faf6 labeled as healthy_metal : 1
7a32fbc8 labeled as healthy_metal : 1
7a32ff1a labeled as healthy_metal : 1
7a330262 labeled as healthy_metal : 1
7a3306e0 labeled as healthy_metal : 1
7a330b04 lab

7a35efc2 labeled as healthy_metal : 1
7a35f09e labeled as healthy_metal : 1
7a35f18e labeled as healthy_metal : 1
7a35f2ec labeled as healthy_metal : 1
7a35fce2 labeled as healthy_metal : 1
7a35fdbe labeled as healthy_metal : 1
7a35fe5e labeled as healthy_metal : 1
7a35ff3a labeled as healthy_metal : 1
7a3602aa labeled as healthy_metal : 1
7a360b56 labeled as healthy_metal : 1
7a360c96 labeled as healthy_metal : 1
7a360dcc labeled as healthy_metal : 1
7a361394 labeled as healthy_metal : 1
7a36214a labeled as healthy_metal : 1
7a36249c labeled as healthy_metal : 1
7a36256e labeled as healthy_metal : 1
7a362640 labeled as healthy_metal : 1
7a362fbe labeled as healthy_metal : 1
7a3630fe labeled as healthy_metal : 1
7a363658 labeled as healthy_metal : 1
7a3639aa labeled as healthy_metal : 1
7a363b44 labeled as healthy_metal : 1
7a363fb8 labeled as healthy_metal : 1
7a3641ca labeled as healthy_metal : 1
7a36429c labeled as healthy_metal : 1
7a3644ae labeled as healthy_metal : 1
7a364576 lab

7a39b0da labeled as healthy_metal : 1
7a39b49a labeled as healthy_metal : 1
7a39b56c labeled as healthy_metal : 1
7a39b97c labeled as healthy_metal : 1
7a39c2aa labeled as healthy_metal : 1
7a39c5f2 labeled as healthy_metal : 1
7a39c660 labeled as healthy_metal : 1
7a39cae8 labeled as healthy_metal : 1
7a39d04c labeled as healthy_metal : 1
7a39d128 labeled as healthy_metal : 1
7a39d90c labeled as healthy_metal : 1
7a39db82 labeled as healthy_metal : 1
7a39e3de labeled as healthy_metal : 1
7a39e73a labeled as healthy_metal : 1
7a39ec44 labeled as healthy_metal : 1
7a39ef28 labeled as healthy_metal : 1
7a39f432 labeled as healthy_metal : 1
7a39f78e labeled as healthy_metal : 1
7a39f860 labeled as healthy_metal : 1
7a39faea labeled as healthy_metal : 1
7a3a00d0 labeled as healthy_metal : 1
7a3a0576 labeled as healthy_metal : 1
7a3a0dbe labeled as healthy_metal : 1
7a3a1048 labeled as healthy_metal : 1
7a3a19c6 labeled as healthy_metal : 1
7a3a1a34 labeled as healthy_metal : 1
7a3a1aa2 lab

7a3d0230 labeled as healthy_metal : 1
7a3d069a labeled as healthy_metal : 1
7a3d0a64 labeled as healthy_metal : 1
7a3d0e2e labeled as healthy_metal : 1
7a3d0e92 labeled as healthy_metal : 1
7a3d11ee labeled as healthy_metal : 1
7a3d1536 labeled as healthy_metal : 1
7a3d195a labeled as healthy_metal : 1
7a3d1cac labeled as healthy_metal : 1
7a3d1de2 labeled as healthy_metal : 1
7a3d2684 labeled as healthy_metal : 1
7a3d3002 labeled as healthy_metal : 1
7a3d355c labeled as healthy_metal : 1
7a3d35ca labeled as healthy_metal : 1
7a3d38a4 labeled as healthy_metal : 1
7a3d3ab6 labeled as healthy_metal : 1
7a3d3cc8 labeled as healthy_metal : 1
7a3d4150 labeled as healthy_metal : 1
7a3d4358 labeled as healthy_metal : 1
7a3d4d30 labeled as healthy_metal : 1
7a3d5078 labeled as healthy_metal : 1
7a3d52ee labeled as healthy_metal : 1
7a3d58ca labeled as healthy_metal : 1
7a3d592e labeled as healthy_metal : 1
7a3d5a00 labeled as healthy_metal : 1
7a3d636a labeled as healthy_metal : 1
7a3d68b0 lab

7a406bc8 labeled as healthy_metal : 1
7a406c36 labeled as healthy_metal : 1
7a406eac labeled as healthy_metal : 1
7a407190 labeled as healthy_metal : 1
7a4075be labeled as healthy_metal : 1
7a40762c labeled as healthy_metal : 1
7a4079f6 labeled as healthy_metal : 1
7a407c08 labeled as healthy_metal : 1
7a4081da labeled as healthy_metal : 1
7a4083f6 labeled as healthy_metal : 1
7a4084c8 labeled as healthy_metal : 1
7a408536 labeled as healthy_metal : 1
7a408900 labeled as healthy_metal : 1
7a408cc0 labeled as healthy_metal : 1
7a409364 labeled as healthy_metal : 1
7a409ae4 labeled as healthy_metal : 1
7a409cf6 labeled as healthy_metal : 1
7a409f6c labeled as healthy_metal : 1
7a40a750 labeled as healthy_metal : 1
7a40a7be labeled as healthy_metal : 1
7a40a930 labeled as healthy_metal : 1
7a40a99e labeled as healthy_metal : 1
7a40b132 labeled as healthy_metal : 1
7a40b484 labeled as healthy_metal : 1
7a40bc04 labeled as healthy_metal : 1
7a40bc68 labeled as healthy_metal : 1
7a40bf4c lab

7a44765a labeled as healthy_metal : 1
7a447970 labeled as healthy_metal : 1
7a447a4c labeled as healthy_metal : 1
7a4487da labeled as healthy_metal : 1
7a449536 labeled as healthy_metal : 1
7a449842 labeled as healthy_metal : 1
7a44a170 labeled as healthy_metal : 1
7a44a472 labeled as healthy_metal : 1
7a44a544 labeled as healthy_metal : 1
7a44b70a labeled as healthy_metal : 1
7a44b854 labeled as healthy_metal : 1
7a44bfd4 labeled as healthy_metal : 1
7a44c416 labeled as healthy_metal : 1
7a44c4f2 labeled as healthy_metal : 1
7a44c768 labeled as healthy_metal : 1
7a44c8a8 labeled as healthy_metal : 1
7a1c6c50 labeled as incomplete : 2
7a1c82f8 labeled as incomplete : 2
7a1cde6a labeled as incomplete : 2
7a1d39dc labeled as incomplete : 2
7a1d4e4a labeled as incomplete : 2
7a1d9666 labeled as incomplete : 2
7a1dbdbc labeled as incomplete : 2
7a1dc47e labeled as incomplete : 2
7a1dd28e labeled as incomplete : 2
7a1ddc70 labeled as incomplete : 2
7a1e2dce labeled as incomplete : 2
7a1e331

7a40299c labeled as incomplete : 2
7a403d56 labeled as incomplete : 2
7a4043fa labeled as incomplete : 2
7a405458 labeled as incomplete : 2
7a406498 labeled as incomplete : 2
7a409576 labeled as incomplete : 2
7a40baba labeled as incomplete : 2
7a411334 labeled as incomplete : 2
7a411758 labeled as incomplete : 2
7a411a32 labeled as incomplete : 2
7a4147f0 labeled as incomplete : 2
7a416fe6 labeled as incomplete : 2
7a41d45e labeled as incomplete : 2
7a41dea4 labeled as incomplete : 2
7a4200aa labeled as incomplete : 2
7a4213a6 labeled as incomplete : 2
7a42ade8 labeled as incomplete : 2
7a42d5de labeled as incomplete : 2
7a43287c labeled as incomplete : 2
7a432af2 labeled as incomplete : 2
7a432b60 labeled as incomplete : 2
7a439e10 labeled as incomplete : 2
7a43a3ec labeled as incomplete : 2
7a43d830 labeled as incomplete : 2
7a43ff86 labeled as incomplete : 2
7a444180 labeled as incomplete : 2
7a445abc labeled as incomplete : 2
7a4470b0 labeled as incomplete : 2
7a1c4f86 labeled as 

7a212db2 labeled as irregular_metal : 3
7a213104 labeled as irregular_metal : 3
7a213366 labeled as irregular_metal : 3
7a2140d6 labeled as irregular_metal : 3
7a21621e labeled as irregular_metal : 3
7a2169e4 labeled as irregular_metal : 3
7a216bec labeled as irregular_metal : 3
7a216d90 labeled as irregular_metal : 3
7a21761e labeled as irregular_metal : 3
7a217894 labeled as irregular_metal : 3
7a21812c labeled as irregular_metal : 3
7a2187f8 labeled as irregular_metal : 3
7a219068 labeled as irregular_metal : 3
7a2190d6 labeled as irregular_metal : 3
7a219338 labeled as irregular_metal : 3
7a21981a labeled as irregular_metal : 3
7a219d4c labeled as irregular_metal : 3
7a21ab48 labeled as irregular_metal : 3
7a21b8ea labeled as irregular_metal : 3
7a21bee4 labeled as irregular_metal : 3
7a21bfac labeled as irregular_metal : 3
7a21c952 labeled as irregular_metal : 3
7a21ca88 labeled as irregular_metal : 3
7a21cc22 labeled as irregular_metal : 3
7a21cc90 labeled as irregular_metal : 3


7a2775b4 labeled as irregular_metal : 3
7a277b36 labeled as irregular_metal : 3
7a277bcc labeled as irregular_metal : 3
7a278392 labeled as irregular_metal : 3
7a279530 labeled as irregular_metal : 3
7a27981e labeled as irregular_metal : 3
7a27a21e labeled as irregular_metal : 3
7a27ba24 labeled as irregular_metal : 3
7a27c0a0 labeled as irregular_metal : 3
7a27c974 labeled as irregular_metal : 3
7a27ceec labeled as irregular_metal : 3
7a27d72a labeled as irregular_metal : 3
7a27d7fc labeled as irregular_metal : 3
7a27e774 labeled as irregular_metal : 3
7a27ee40 labeled as irregular_metal : 3
7a27f2f0 labeled as irregular_metal : 3
7a27f3a4 labeled as irregular_metal : 3
7a27f5d4 labeled as irregular_metal : 3
7a27f96c labeled as irregular_metal : 3
7a27ff0c labeled as irregular_metal : 3
7a2801aa labeled as irregular_metal : 3
7a281546 labeled as irregular_metal : 3
7a2817c6 labeled as irregular_metal : 3
7a281dca labeled as irregular_metal : 3
7a282234 labeled as irregular_metal : 3


7a2df3ee labeled as irregular_metal : 3
7a2df452 labeled as irregular_metal : 3
7a2dfd12 labeled as irregular_metal : 3
7a2dfeb6 labeled as irregular_metal : 3
7a2e02ee labeled as irregular_metal : 3
7a2e03c0 labeled as irregular_metal : 3
7a2e1450 labeled as irregular_metal : 3
7a2e1518 labeled as irregular_metal : 3
7a2e22b0 labeled as irregular_metal : 3
7a2e238c labeled as irregular_metal : 3
7a2e2a9e labeled as irregular_metal : 3
7a2e2b7a labeled as irregular_metal : 3
7a2e2d28 labeled as irregular_metal : 3
7a2e2ed6 labeled as irregular_metal : 3
7a2e3cf0 labeled as irregular_metal : 3
7a2e46fa labeled as irregular_metal : 3
7a2e4768 labeled as irregular_metal : 3
7a2e4c0e labeled as irregular_metal : 3
7a2e5640 labeled as irregular_metal : 3
7a2e7152 labeled as irregular_metal : 3
7a2e7440 labeled as irregular_metal : 3
7a2e7670 labeled as irregular_metal : 3
7a2e7f30 labeled as irregular_metal : 3
7a2e83b8 labeled as irregular_metal : 3
7a2e9a9c labeled as irregular_metal : 3


7a32f254 labeled as irregular_metal : 3
7a32f8ee labeled as irregular_metal : 3
7a32fdda labeled as irregular_metal : 3
7a32ff7e labeled as irregular_metal : 3
7a330190 labeled as irregular_metal : 3
7a33088e labeled as irregular_metal : 3
7a3308f2 labeled as irregular_metal : 3
7a330960 labeled as irregular_metal : 3
7a330f14 labeled as irregular_metal : 3
7a331e46 labeled as irregular_metal : 3
7a3325bc labeled as irregular_metal : 3
7a33283c labeled as irregular_metal : 3
7a333476 labeled as irregular_metal : 3
7a33375a labeled as irregular_metal : 3
7a334e2a labeled as irregular_metal : 3
7a33576c labeled as irregular_metal : 3
7a3360cc labeled as irregular_metal : 3
7a336414 labeled as irregular_metal : 3
7a3388c2 labeled as irregular_metal : 3
7a3392ea labeled as irregular_metal : 3
7a339970 labeled as irregular_metal : 3
7a339bdc labeled as irregular_metal : 3
7a33a3fc labeled as irregular_metal : 3
7a33a460 labeled as irregular_metal : 3
7a33a820 labeled as irregular_metal : 3


7a380bc2 labeled as irregular_metal : 3
7a381b58 labeled as irregular_metal : 3
7a381dec labeled as irregular_metal : 3
7a384556 labeled as irregular_metal : 3
7a384bc8 labeled as irregular_metal : 3
7a38574e labeled as irregular_metal : 3
7a38582a labeled as irregular_metal : 3
7a3861f8 labeled as irregular_metal : 3
7a38715c labeled as irregular_metal : 3
7a38767a labeled as irregular_metal : 3
7a387c56 labeled as irregular_metal : 3
7a387e72 labeled as irregular_metal : 3
7a387eea labeled as irregular_metal : 3
7a38830e labeled as irregular_metal : 3
7a388660 labeled as irregular_metal : 3
7a389876 labeled as irregular_metal : 3
7a3898ee labeled as irregular_metal : 3
7a38a334 labeled as irregular_metal : 3
7a38a5b4 labeled as irregular_metal : 3
7a38a7c6 labeled as irregular_metal : 3
7a38b1c6 labeled as irregular_metal : 3
7a38ba7c labeled as irregular_metal : 3
7a38c0ee labeled as irregular_metal : 3
7a38cd5a labeled as irregular_metal : 3
7a38d25a labeled as irregular_metal : 3


7a3d3278 labeled as irregular_metal : 3
7a3d40e2 labeled as irregular_metal : 3
7a3d556e labeled as irregular_metal : 3
7a3d571c labeled as irregular_metal : 3
7a3d5c08 labeled as irregular_metal : 3
7a3d5e74 labeled as irregular_metal : 3
7a3d5ee2 labeled as irregular_metal : 3
7a3d6504 labeled as irregular_metal : 3
7a3d691e labeled as irregular_metal : 3
7a3d797c labeled as irregular_metal : 3
7a3d7ab2 labeled as irregular_metal : 3
7a3d819c labeled as irregular_metal : 3
7a3d95ba labeled as irregular_metal : 3
7a3da8c0 labeled as irregular_metal : 3
7a3da92e labeled as irregular_metal : 3
7a3dac6c labeled as irregular_metal : 3
7a3dafb4 labeled as irregular_metal : 3
7a3db856 labeled as irregular_metal : 3
7a3dbd42 labeled as irregular_metal : 3
7a3dc224 labeled as irregular_metal : 3
7a3dc8b4 labeled as irregular_metal : 3
7a3dd1ba labeled as irregular_metal : 3
7a3dde12 labeled as irregular_metal : 3
7a3ddee4 labeled as irregular_metal : 3
7a3df492 labeled as irregular_metal : 3


7a427c9c labeled as irregular_metal : 3
7a428494 labeled as irregular_metal : 3
7a429358 labeled as irregular_metal : 3
7a429434 labeled as irregular_metal : 3
7a4296b4 labeled as irregular_metal : 3
7a42a064 labeled as irregular_metal : 3
7a42a578 labeled as irregular_metal : 3
7a42a6c2 labeled as irregular_metal : 3
7a42af96 labeled as irregular_metal : 3
7a42b63a labeled as irregular_metal : 3
7a42b716 labeled as irregular_metal : 3
7a42bc20 labeled as irregular_metal : 3
7a42c198 labeled as irregular_metal : 3
7a42c2d8 labeled as irregular_metal : 3
7a42c5d0 labeled as irregular_metal : 3
7a42ceb8 labeled as irregular_metal : 3
7a42d214 labeled as irregular_metal : 3
7a42d570 labeled as irregular_metal : 3
7a42de3a labeled as irregular_metal : 3
7a42df0c labeled as irregular_metal : 3
7a42e0c4 labeled as irregular_metal : 3
7a42e772 labeled as irregular_metal : 3
7a42e98e labeled as irregular_metal : 3
7a42f0be labeled as irregular_metal : 3
7a42f2d0 labeled as irregular_metal : 3


In [8]:
features_train.head()

Unnamed: 0,id,features,label
0,7a1c6d7c,"[0.8933228, 9.503732, 0.72881216, 3.7250175, 0...",0
1,7a1d078c,"[0.0, 2.642926, 0.5762399, 0.0, 3.139349, 1.07...",0
2,7a1d2ff0,"[1.8013268, 3.5478988, 2.0254405, 0.0, 0.78705...",0
3,7a1d570a,"[4.6264086, 3.7589073, 8.721215, 0.4236287, 0....",0
4,7a1d6042,"[1.9871483, 3.8774838, 6.796729, 0.10309696, 3...",0


In [9]:
import pickle
from os import makedirs
from os.path import exists

pickle_path = join('..', '..', 'pickles')
if not exists(pickle_path):
    makedirs(pickle_path)

with open(join(pickle_path, 'resnet50_features_' + region + '_train.pkl' ), 'wb') as f:
    pickle.dump(features_train, f)

### Visualize the features

Reduce the dimensionality to 2D using tSNE (https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding)

In [10]:
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import matplotlib
from sklearn.decomposition import PCA
%matplotlib inline

In [11]:
def plot_tSNE(features, labels=None, number_of_materials=5):
    if labels is None:
        labels = np.zeros((features.shape[0]))

    # Visualization_
    #pca_object = PCA(n_components=50)
    #pca_features = pca_object.fit_transform(features)
    tsne_features = TSNE(n_components=2).fit_transform(features)

    # define the colormap
    cmap = plt.cm.jet
    # extract all colors from the .jet map
    cmaplist = [cmap(i) for i in range(cmap.N)]
    # create the new map
    cmap = cmap.from_list('Custom cmap', cmaplist, cmap.N)

    # define the bins and normalize
    bounds = np.linspace(0, number_of_materials, number_of_materials + 1)
    norm = matplotlib.colors.BoundaryNorm(bounds, cmap.N)
    
    print(tsne_features.shape)

    plt.figure()
    scat = plt.scatter(tsne_features[:, 0], tsne_features[:, 1], c=labels, cmap=cmap, norm=norm)
    cb = plt.colorbar(scat, spacing='proportional', ticks=bounds)

Convert features within the dataframe to a single matrix

In [12]:
feat_matrix = features_train['features'].to_numpy()
feat_matrix = np.column_stack(feat_matrix).transpose()

labels = features_train['label'].to_numpy()

In [None]:
plot_tSNE(feat_matrix, labels)

## 2. Calculate features on the test data


This procedure is similar as for the training data, except that we do not know the labels and the folder with test images is not subdivided by material

In [21]:
def calculate_features_test(test_dir):
    
    # Initialize a dataframe for the features    
    columns = ['id', 'features']
    df_features = pd.DataFrame(columns=columns)

    # Walk through all images
    for root, dirs, files in os.walk(test_dir):
        for file in files:
            img_fp = join(root, file)
            id = file.split('.')[0]
            print("Calculate features for", id)

            # Pad if size is too small, preprocess
            img = image.load_img(img_fp, target_size=(224, 224))
            img_data = image.img_to_array(img)
            img_data = np.expand_dims(img_data, axis=0)
            img_data = preprocess_input(img_data)

            # Compute features 
            resnet50_feature = model.predict(img_data)
            resnet50_feature_np = np.array(resnet50_feature).flatten()

            df_features = df_features.append({'id': id ,
                                              'features': resnet50_feature_np},
                                             ignore_index=True)

    return df_features

In [22]:
features_test = calculate_features_test(test_dir)

Calculate features for 7a44e1d0
Calculate features for 7a44e4c8
Calculate features for 7a44e7b6
Calculate features for 7a44e96e
Calculate features for 7a44e9dc
Calculate features for 7a44ecde
Calculate features for 7a44f4c2
Calculate features for 7a44f5f8
Calculate features for 7a44f968
Calculate features for 7a44fbd4
Calculate features for 7a44fe4a
Calculate features for 7a4506b0
Calculate features for 7a450f8e
Calculate features for 7a4516aa
Calculate features for 7a4522ee
Calculate features for 7a452708
Calculate features for 7a45311c
Calculate features for 7a4545f8
Calculate features for 7a454de6
Calculate features for 7a455be2
Calculate features for 7a455f3e
Calculate features for 7a455fac
Calculate features for 7a456452
Calculate features for 7a4572a8
Calculate features for 7a45813a
Calculate features for 7a458c98
Calculate features for 7a4591ca
Calculate features for 7a459ce2
Calculate features for 7a459fee
Calculate features for 7a45a78c
Calculate features for 7a45a7fa
Calculat



Calculate features for 7a4af85e
Calculate features for 7a4afa70
Calculate features for 7a4afe80
Calculate features for 7a4b063c
Calculate features for 7a4b0f38
Calculate features for 7a4b1226
Calculate features for 7a4b149c
Calculate features for 7a4b15d2
Calculate features for 7a4b2630
Calculate features for 7a4b2702
Calculate features for 7a4b2ab8
Calculate features for 7a4b3152
Calculate features for 7a4b35bc
Calculate features for 7a4b3e86
Calculate features for 7a4b4174
Calculate features for 7a4b46c4
Calculate features for 7a4b493a
Calculate features for 7a4b5448
Calculate features for 7a4b55ec
Calculate features for 7a4b56be
Calculate features for 7a4b61ea
Calculate features for 7a4b6bb8
Calculate features for 7a4b7bc6
Calculate features for 7a4b7cfc
Calculate features for 7a4b8396
Calculate features for 7a4b83fa
Calculate features for 7a4b89f4
Calculate features for 7a4b8c60
Calculate features for 7a4b9142
Calculate features for 7a4b976e
Calculate features for 7a4b9a48
Calculat

Calculate features for 7a503e4a
Calculate features for 7a504534
Calculate features for 7a504a20
Calculate features for 7a504ffc
Calculate features for 7a5055c4
Calculate features for 7a5056fa
Calculate features for 7a5058a8
Calculate features for 7a505f88
Calculate features for 7a5062c6
Calculate features for 7a50646a
Calculate features for 7a507e1e
Calculate features for 7a507e8c
Calculate features for 7a50815c
Calculate features for 7a509264
Calculate features for 7a50b47e
Calculate features for 7a50b6f4
Calculate features for 7a50c540
Calculate features for 7a50c748
Calculate features for 7a50d24c
Calculate features for 7a50e156
Calculate features for 7a50e980
Calculate features for 7a50f222
Calculate features for 7a50f510
Calculate features for 7a50f984
Calculate features for 7a50fa56
Calculate features for 7a50ff9c
Calculate features for 7a5112f2
Calculate features for 7a5115d6
Calculate features for 7a51224c
Calculate features for 7a513070
Calculate features for 7a5139c6
Calculat

In [23]:
features_test.head()

Unnamed: 0,id,features
0,7a44e1d0,"[6.2197113, 2.110661, 1.13708, 0.0, 0.0, 0.504..."
1,7a44e4c8,"[1.2535996, 0.0, 0.0, 0.15350294, 0.49344027, ..."
2,7a44e7b6,"[3.1035824, 3.8894126, 2.2028537, 0.0, 0.0, 2...."
3,7a44e96e,"[3.6370928, 4.50314, 2.9926593, 1.0457133, 1.0..."
4,7a44e9dc,"[0.5092753, 4.6801777, 1.3032107, 0.0, 0.0, 0...."


In [24]:
with open(join(pickle_path, 'resnet50_features_' + region + '_test.pkl' ), 'wb') as f:
    pickle.dump(features_test, f)