# Calculating features

In [1]:
from keras.preprocessing import image
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input

import numpy as np
import pandas as pd
from os.path import join
import os

Using TensorFlow backend.


In [2]:
regions = ['borde_rural', 'borde_soacha', 'mixco_1_and_ebenezer', 'mixco_3']

In [16]:
region = 'mixco_1_and_ebenezer'
train_dir = join('..', '..', 'data', region, 'roofs_train')
test_dir = join('..', '..', 'data', region, 'roofs_test')
materials = {'healthy_metal':0, 'irregular_metal':1, 'concrete_cement':2, 'incomplete':3, 'other':4}

Use a pretrained network to compute features from the images, which are later classified. Import the model from the Kerase library and remove the top layer by setting `include_top=False`. The last layer is pooled such that we get outputs of size 1x2048 instead of 7x7x2048. The pooling method `'max'` is chosen empirically.

In [17]:
model = ResNet50(weights='imagenet', include_top=False, pooling='max')
model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, None, None, 3 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, None, None, 3 0           input_2[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, None, None, 6 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, None, None, 6 256         conv1[0][0]                      
___________________________________________________________________________________________

## 1. Calculate features on the training data

The preprocessed data is stored in such a way, that every region has their own folder, that contains a subfolder defined as `train_dir` above. This folder again is subdivided by material. Images carry their `id` as filename.

Traverse all images in their corresponding folders which indicate the material and use `model.predict()` to get the features.

In [18]:
def calculate_features_train(train_dir):
    
    # Initialize a dataframe for the features    
    columns = ['id', 'features', 'label']
    df_features = pd.DataFrame(columns=columns)

    # Walk through all images
    for material in materials.keys():
        material_fp = join(train_dir, material)
        for root, dirs, files in os.walk(material_fp):
            for file in files:
                img_fp = join(material_fp, file)
                label = materials[material]
                id = file.split('.')[0]
                print(id, "labeled as", material, ":", label)

                # Pad if size is too small, preprocess
                img = image.load_img(img_fp, target_size=(224, 224))
                img_data = image.img_to_array(img)
                img_data = np.expand_dims(img_data, axis=0)
                img_data = preprocess_input(img_data)

                # Compute features 
                resnet50_feature = model.predict(img_data)
                resnet50_feature_np = np.array(resnet50_feature).flatten()

                df_features = df_features.append({'id': id ,
                                                  'features': resnet50_feature_np,
                                                  'label': label}, ignore_index=True)
    
    return df_features

The results are stored in a dataframe as shown below and saved to disk as a pickle file.

In [None]:
features_train = calculate_features_train(train_dir)

7a1c5eb8 labeled as healthy_metal : 0
7a1c6ebc labeled as healthy_metal : 0
7a1c8a28 labeled as healthy_metal : 0
7a1ca274 labeled as healthy_metal : 0
7a1cb232 labeled as healthy_metal : 0
7a1cb2be labeled as healthy_metal : 0
7a1ccba0 labeled as healthy_metal : 0
7a1cce84 labeled as healthy_metal : 0
7a1cd456 labeled as healthy_metal : 0
7a1cd7da labeled as healthy_metal : 0
7a1cdb90 labeled as healthy_metal : 0
7a1cdfa0 labeled as healthy_metal : 0
7a1ce40a labeled as healthy_metal : 0
7a1cfdaa labeled as healthy_metal : 0
7a1d017e labeled as healthy_metal : 0
7a1d0584 labeled as healthy_metal : 0
7a1d139e labeled as healthy_metal : 0
7a1d159c labeled as healthy_metal : 0
7a1d24f6 labeled as healthy_metal : 0
7a1d3716 labeled as healthy_metal : 0
7a1d3d2e labeled as healthy_metal : 0
7a1d4b20 labeled as healthy_metal : 0
7a1d4d82 labeled as healthy_metal : 0
7a1d5c3c labeled as healthy_metal : 0
7a1d60a6 labeled as healthy_metal : 0
7a1d6894 labeled as healthy_metal : 0
7a1d6cf4 lab

7a27d78e labeled as healthy_metal : 0
7a27dd06 labeled as healthy_metal : 0
7a27dd74 labeled as healthy_metal : 0
7a27e418 labeled as healthy_metal : 0
7a27e710 labeled as healthy_metal : 0
7a27e7e2 labeled as healthy_metal : 0
7a27f048 labeled as healthy_metal : 0
7a27fe62 labeled as healthy_metal : 0
7a280fba labeled as healthy_metal : 0
7a281032 labeled as healthy_metal : 0
7a2812c6 labeled as healthy_metal : 0
7a28207c labeled as healthy_metal : 0
7a282374 labeled as healthy_metal : 0
7a28338c labeled as healthy_metal : 0
7a283d50 labeled as healthy_metal : 0
7a2843a4 labeled as healthy_metal : 0
7a284dcc labeled as healthy_metal : 0
7a285632 labeled as healthy_metal : 0
7a285ce0 labeled as healthy_metal : 0
7a2862bc labeled as healthy_metal : 0
7a2863fc labeled as healthy_metal : 0
7a28674e labeled as healthy_metal : 0
7a2867bc labeled as healthy_metal : 0
7a286d98 labeled as healthy_metal : 0
7a287590 labeled as healthy_metal : 0
7a28a682 labeled as healthy_metal : 0
7a28a7d6 lab

7a31b2ea labeled as healthy_metal : 0
7a31be84 labeled as healthy_metal : 0
7a31c320 labeled as healthy_metal : 0
7a31c5fa labeled as healthy_metal : 0
7a31ca14 labeled as healthy_metal : 0
7a31e13e labeled as healthy_metal : 0
7a31e422 labeled as healthy_metal : 0
7a31e7d8 labeled as healthy_metal : 0
7a31f0fc labeled as healthy_metal : 0
7a31f93a labeled as healthy_metal : 0
7a31fb4c labeled as healthy_metal : 0
7a31fc82 labeled as healthy_metal : 0
7a3200b0 labeled as healthy_metal : 0
7a3206e6 labeled as healthy_metal : 0
7a320754 labeled as healthy_metal : 0
7a32268a labeled as healthy_metal : 0
7a323396 labeled as healthy_metal : 0
7a323d32 labeled as healthy_metal : 0
7a324a34 labeled as healthy_metal : 0
7a324cb4 labeled as healthy_metal : 0
7a324f34 labeled as healthy_metal : 0
7a324f98 labeled as healthy_metal : 0
7a32565a labeled as healthy_metal : 0
7a325bc8 labeled as healthy_metal : 0
7a327216 labeled as healthy_metal : 0
7a327856 labeled as healthy_metal : 0
7a32ac22 lab

7a3b1484 labeled as healthy_metal : 0
7a3b1556 labeled as healthy_metal : 0
7a3b15c4 labeled as healthy_metal : 0
7a3b2686 labeled as healthy_metal : 0
7a3b386a labeled as healthy_metal : 0
7a3b4e7c labeled as healthy_metal : 0
7a3b55e8 labeled as healthy_metal : 0
7a3b5c82 labeled as healthy_metal : 0
7a3b6538 labeled as healthy_metal : 0
7a3b7e38 labeled as healthy_metal : 0
7a3ba502 labeled as healthy_metal : 0
7a3ba570 labeled as healthy_metal : 0
7a3bc5e6 labeled as healthy_metal : 0
7a3bd0cc labeled as healthy_metal : 0
7a3bd27a labeled as healthy_metal : 0
7a3bd450 labeled as healthy_metal : 0
7a3be0bc labeled as healthy_metal : 0
7a3be6e8 labeled as healthy_metal : 0
7a3c0812 labeled as healthy_metal : 0
7a3c1410 labeled as healthy_metal : 0
7a3c1474 labeled as healthy_metal : 0
7a3c14e2 labeled as healthy_metal : 0
7a3c2144 labeled as healthy_metal : 0
7a3c2ef0 labeled as healthy_metal : 0
7a3c48b8 labeled as healthy_metal : 0
7a3c4d54 labeled as healthy_metal : 0
7a3c4dc2 lab

7a1c6976 labeled as irregular_metal : 1
7a1c6f98 labeled as irregular_metal : 1
7a1c76dc labeled as irregular_metal : 1
7a1c8a96 labeled as irregular_metal : 1
7a1ca4f4 labeled as irregular_metal : 1
7a1cb584 labeled as irregular_metal : 1
7a1cc768 labeled as irregular_metal : 1
7a1cd24e labeled as irregular_metal : 1
7a1cd320 labeled as irregular_metal : 1
7a1cdbf4 labeled as irregular_metal : 1
7a1cea72 labeled as irregular_metal : 1
7a1ceb44 labeled as irregular_metal : 1
7a1cf2e2 labeled as irregular_metal : 1
7a1cf346 labeled as irregular_metal : 1
7a1cfd46 labeled as irregular_metal : 1
7a1cff80 labeled as irregular_metal : 1
7a1d01ec labeled as irregular_metal : 1
7a1d0318 labeled as irregular_metal : 1
7a1d0656 labeled as irregular_metal : 1
7a1d1bc8 labeled as irregular_metal : 1
7a1d1dc6 labeled as irregular_metal : 1
7a1d1f60 labeled as irregular_metal : 1
7a1d235c labeled as irregular_metal : 1
7a1d242e labeled as irregular_metal : 1
7a1d2488 labeled as irregular_metal : 1


7a22481e labeled as irregular_metal : 1
7a224e36 labeled as irregular_metal : 1
7a2263e4 labeled as irregular_metal : 1
7a226c90 labeled as irregular_metal : 1
7a227316 labeled as irregular_metal : 1
7a227474 labeled as irregular_metal : 1
7a2276b8 labeled as irregular_metal : 1
7a227910 labeled as irregular_metal : 1
7a22819e labeled as irregular_metal : 1
7a228252 labeled as irregular_metal : 1
7a228518 labeled as irregular_metal : 1
7a2287d4 labeled as irregular_metal : 1
7a2288a6 labeled as irregular_metal : 1
7a228978 labeled as irregular_metal : 1
7a22997c labeled as irregular_metal : 1
7a229a58 labeled as irregular_metal : 1
7a229dfa labeled as irregular_metal : 1
7a22a19c labeled as irregular_metal : 1
7a22a26e labeled as irregular_metal : 1
7a22a548 labeled as irregular_metal : 1
7a22a94e labeled as irregular_metal : 1
7a22ab4c labeled as irregular_metal : 1
7a22b510 labeled as irregular_metal : 1
7a22b916 labeled as irregular_metal : 1
7a22bfb0 labeled as irregular_metal : 1


7a282ac2 labeled as irregular_metal : 1
7a282f4a labeled as irregular_metal : 1
7a28308a labeled as irregular_metal : 1
7a28345e labeled as irregular_metal : 1
7a283990 labeled as irregular_metal : 1
7a283ce2 labeled as irregular_metal : 1
7a284412 labeled as irregular_metal : 1
7a284d5e labeled as irregular_metal : 1
7a285272 labeled as irregular_metal : 1
7a2852e0 labeled as irregular_metal : 1
7a285f6a labeled as irregular_metal : 1
7a288332 labeled as irregular_metal : 1
7a2884fe labeled as irregular_metal : 1
7a2891c4 labeled as irregular_metal : 1
7a289318 labeled as irregular_metal : 1
7a28aad8 labeled as irregular_metal : 1
7a28b7bc labeled as irregular_metal : 1
7a28b910 labeled as irregular_metal : 1
7a28bb0e labeled as irregular_metal : 1
7a28bd2a labeled as irregular_metal : 1
7a28bf0a labeled as irregular_metal : 1
7a28bfe6 labeled as irregular_metal : 1
7a28cc3e labeled as irregular_metal : 1
7a28e084 labeled as irregular_metal : 1
7a28f89e labeled as irregular_metal : 1


7a2dbe38 labeled as irregular_metal : 1
7a2dc19e labeled as irregular_metal : 1
7a2dc270 labeled as irregular_metal : 1
7a2dc55e labeled as irregular_metal : 1
7a2dc996 labeled as irregular_metal : 1
7a2dcb4e labeled as irregular_metal : 1
7a2ddc56 labeled as irregular_metal : 1
7a2ded40 labeled as irregular_metal : 1
7a2e0064 labeled as irregular_metal : 1
7a2e0cd0 labeled as irregular_metal : 1
7a2e1086 labeled as irregular_metal : 1
7a2e15ea labeled as irregular_metal : 1
7a2e2170 labeled as irregular_metal : 1
7a2e25a8 labeled as irregular_metal : 1
7a2e26e8 labeled as irregular_metal : 1
7a2e2c56 labeled as irregular_metal : 1
7a2e31ba labeled as irregular_metal : 1
7a2e32fa labeled as irregular_metal : 1
7a2e33d6 labeled as irregular_metal : 1
7a2e616c labeled as irregular_metal : 1
7a2e62ac labeled as irregular_metal : 1
7a2e6748 labeled as irregular_metal : 1
7a2e67b6 labeled as irregular_metal : 1
7a2e6892 labeled as irregular_metal : 1
7a2e6e00 labeled as irregular_metal : 1


7a329cf0 labeled as irregular_metal : 1
7a32a3ee labeled as irregular_metal : 1
7a32b1d6 labeled as irregular_metal : 1
7a32b73a labeled as irregular_metal : 1
7a32bea6 labeled as irregular_metal : 1
7a32cf2c labeled as irregular_metal : 1
7a32d972 labeled as irregular_metal : 1
7a32db7a labeled as irregular_metal : 1
7a32dcb0 labeled as irregular_metal : 1
7a32dde6 labeled as irregular_metal : 1
7a32e548 labeled as irregular_metal : 1
7a32e87c labeled as irregular_metal : 1
7a32ea20 labeled as irregular_metal : 1
7a32f182 labeled as irregular_metal : 1
7a32f60a labeled as irregular_metal : 1
7a32fd08 labeled as irregular_metal : 1
7a33012c labeled as irregular_metal : 1
7a33046a labeled as irregular_metal : 1
7a3309ce labeled as irregular_metal : 1
7a330dde labeled as irregular_metal : 1
7a33139c labeled as irregular_metal : 1
7a332350 labeled as irregular_metal : 1
7a332cc4 labeled as irregular_metal : 1
7a332d28 labeled as irregular_metal : 1
7a332d8c labeled as irregular_metal : 1


7a37c2a2 labeled as irregular_metal : 1
7a37c45a labeled as irregular_metal : 1
7a37cb6c labeled as irregular_metal : 1
7a37cdf6 labeled as irregular_metal : 1
7a37d40e labeled as irregular_metal : 1
7a37d4d6 labeled as irregular_metal : 1
7a37eb38 labeled as irregular_metal : 1
7a37fd94 labeled as irregular_metal : 1
7a380a82 labeled as irregular_metal : 1
7a381428 labeled as irregular_metal : 1
7a381496 labeled as irregular_metal : 1
7a3818ce labeled as irregular_metal : 1
7a381d7e labeled as irregular_metal : 1
7a381e5a labeled as irregular_metal : 1
7a3822c4 labeled as irregular_metal : 1
7a3824e0 labeled as irregular_metal : 1
7a38262a labeled as irregular_metal : 1
7a382ac6 labeled as irregular_metal : 1
7a3834c6 labeled as irregular_metal : 1
7a384d08 labeled as irregular_metal : 1
7a38506e labeled as irregular_metal : 1
7a3859e2 labeled as irregular_metal : 1
7a385bfe labeled as irregular_metal : 1
7a38618a labeled as irregular_metal : 1
7a386266 labeled as irregular_metal : 1


7a3cb87a labeled as irregular_metal : 1
7a3cba96 labeled as irregular_metal : 1
7a3cbb68 labeled as irregular_metal : 1
7a3cbd16 labeled as irregular_metal : 1
7a3cc27a labeled as irregular_metal : 1
7a3cc34c labeled as irregular_metal : 1
7a3cc55e labeled as irregular_metal : 1
7a3cc63a labeled as irregular_metal : 1
7a3cc9f0 labeled as irregular_metal : 1
7a3ccd9c labeled as irregular_metal : 1
7a3cce78 labeled as irregular_metal : 1
7a3ccfb8 labeled as irregular_metal : 1
7a3cd3e6 labeled as irregular_metal : 1
7a3cd4b8 labeled as irregular_metal : 1
7a3ce390 labeled as irregular_metal : 1
7a3ce5a2 labeled as irregular_metal : 1
7a3cea34 labeled as irregular_metal : 1
7a3cea98 labeled as irregular_metal : 1
7a3ceffc labeled as irregular_metal : 1
7a3cf844 labeled as irregular_metal : 1
7a3cfa56 labeled as irregular_metal : 1
7a3cfe7a labeled as irregular_metal : 1
7a3d1464 labeled as irregular_metal : 1
7a3d181a labeled as irregular_metal : 1
7a3d1e50 labeled as irregular_metal : 1


7a41d0a8 labeled as irregular_metal : 1
7a41d382 labeled as irregular_metal : 1
7a41d594 labeled as irregular_metal : 1
7a41da8a labeled as irregular_metal : 1
7a41dddc labeled as irregular_metal : 1
7a41e46c labeled as irregular_metal : 1
7a41eeb2 labeled as irregular_metal : 1
7a41fb50 labeled as irregular_metal : 1
7a42017c labeled as irregular_metal : 1
7a420546 labeled as irregular_metal : 1
7a4210b8 labeled as irregular_metal : 1
7a4212d4 labeled as irregular_metal : 1
7a4214e6 labeled as irregular_metal : 1
7a421626 labeled as irregular_metal : 1
7a42231e labeled as irregular_metal : 1
7a42461e labeled as irregular_metal : 1
7a424fe2 labeled as irregular_metal : 1
7a4253c0 labeled as irregular_metal : 1
7a425dde labeled as irregular_metal : 1
7a4261a8 labeled as irregular_metal : 1
7a42706c labeled as irregular_metal : 1
7a42713e labeled as irregular_metal : 1
7a4275e4 labeled as irregular_metal : 1
7a427878 labeled as irregular_metal : 1
7a427e4a labeled as irregular_metal : 1


7a221f24 labeled as concrete_cement : 2
7a222050 labeled as concrete_cement : 2
7a22296a labeled as concrete_cement : 2
7a222e10 labeled as concrete_cement : 2
7a2260b0 labeled as concrete_cement : 2
7a22624a labeled as concrete_cement : 2
7a226592 labeled as concrete_cement : 2
7a22774e labeled as concrete_cement : 2
7a2279ba labeled as concrete_cement : 2
7a22a5ac labeled as concrete_cement : 2
7a22c226 labeled as concrete_cement : 2
7a22e012 labeled as concrete_cement : 2
7a22ea1c labeled as concrete_cement : 2
7a22ea8a labeled as concrete_cement : 2
7a22f80e labeled as concrete_cement : 2
7a22fc46 labeled as concrete_cement : 2
7a230a88 labeled as concrete_cement : 2
7a230bc8 labeled as concrete_cement : 2
7a2311c2 labeled as concrete_cement : 2
7a2318f2 labeled as concrete_cement : 2
7a232f0e labeled as concrete_cement : 2
7a233abc labeled as concrete_cement : 2
7a234bba labeled as concrete_cement : 2
7a23518c labeled as concrete_cement : 2
7a23830a labeled as concrete_cement : 2


7a2de6d8 labeled as concrete_cement : 2
7a2df4c0 labeled as concrete_cement : 2
7a2df8ee labeled as concrete_cement : 2
7a2e09f6 labeled as concrete_cement : 2
7a2e4b32 labeled as concrete_cement : 2
7a2e5456 labeled as concrete_cement : 2
7a2e7a30 labeled as concrete_cement : 2
7a2e80d4 labeled as concrete_cement : 2
7a2e8f5c labeled as concrete_cement : 2
7a2e924a labeled as concrete_cement : 2
7a2e9326 labeled as concrete_cement : 2
7a2ec44a labeled as concrete_cement : 2
7a2ec58a labeled as concrete_cement : 2
7a2ec79c labeled as concrete_cement : 2
7a2ed3a4 labeled as concrete_cement : 2
7a2ed4da labeled as concrete_cement : 2
7a2eda98 labeled as concrete_cement : 2
7a2ee178 labeled as concrete_cement : 2
7a2ee52e labeled as concrete_cement : 2
7a2ee664 labeled as concrete_cement : 2
7a2eef74 labeled as concrete_cement : 2
7a2ef32a labeled as concrete_cement : 2
7a2ef9ce labeled as concrete_cement : 2
7a2f1d50 labeled as concrete_cement : 2
7a2f2804 labeled as concrete_cement : 2


7a375934 labeled as concrete_cement : 2
7a375a06 labeled as concrete_cement : 2
7a376e1a labeled as concrete_cement : 2
7a376fc8 labeled as concrete_cement : 2
7a377be4 labeled as concrete_cement : 2
7a378850 labeled as concrete_cement : 2
7a378986 labeled as concrete_cement : 2
7a379570 labeled as concrete_cement : 2
7a37978c labeled as concrete_cement : 2
7a379ce6 labeled as concrete_cement : 2
7a37a146 labeled as concrete_cement : 2
7a37a8d0 labeled as concrete_cement : 2
7a37aa10 labeled as concrete_cement : 2
7a37b230 labeled as concrete_cement : 2
7a37b3de labeled as concrete_cement : 2
7a37c888 labeled as concrete_cement : 2
7a37e8a4 labeled as concrete_cement : 2
7a37f9b6 labeled as concrete_cement : 2
7a37fa24 labeled as concrete_cement : 2
7a380a14 labeled as concrete_cement : 2
7a381784 labeled as concrete_cement : 2
7a3829f4 labeled as concrete_cement : 2
7a3837be labeled as concrete_cement : 2
7a383bec labeled as concrete_cement : 2
7a384236 labeled as concrete_cement : 2


7a40b420 labeled as concrete_cement : 2
7a40bb96 labeled as concrete_cement : 2
7a40bcd6 labeled as concrete_cement : 2
7a40c51e labeled as concrete_cement : 2
7a40d964 labeled as concrete_cement : 2
7a40edbe labeled as concrete_cement : 2
7a40f002 labeled as concrete_cement : 2
7a40ff66 labeled as concrete_cement : 2
7a4103ee labeled as concrete_cement : 2
7a410ec0 labeled as concrete_cement : 2
7a41112c labeled as concrete_cement : 2
7a4111fe labeled as concrete_cement : 2
7a411b0e labeled as concrete_cement : 2
7a413116 labeled as concrete_cement : 2
7a414070 labeled as concrete_cement : 2
7a4148c2 labeled as concrete_cement : 2
7a414d4a labeled as concrete_cement : 2
7a414db8 labeled as concrete_cement : 2
7a41510a labeled as concrete_cement : 2
7a415b1e labeled as concrete_cement : 2
7a416a8c labeled as concrete_cement : 2
7a416e42 labeled as concrete_cement : 2
7a417270 labeled as concrete_cement : 2
7a417694 labeled as concrete_cement : 2
7a417edc labeled as concrete_cement : 2


In [None]:
import pickle
from os import makedirs
from os.path import exists

pickle_path = join('..', '..', 'pickles')
if not exists(pickle_path):
    makedirs(pickle_path)

with open(join(pickle_path, 'resnet50_features_' + region + '_train.pkl' ), 'wb') as f:
    pickle.dump(features_train, f)

### Visualize the features

Reduce the dimensionality to 2D using tSNE (https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding)

In [None]:
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import matplotlib
from sklearn.decomposition import PCA
%matplotlib inline

In [None]:
def plot_tSNE(features, labels=None, number_of_materials=5):
    if labels is None:
        labels = np.zeros((features.shape[0]))

    # Visualization_
    #pca_object = PCA(n_components=50)
    #pca_features = pca_object.fit_transform(features)
    tsne_features = TSNE(n_components=2).fit_transform(features)

    # define the colormap
    cmap = plt.cm.jet
    # extract all colors from the .jet map
    cmaplist = [cmap(i) for i in range(cmap.N)]
    # create the new map
    cmap = cmap.from_list('Custom cmap', cmaplist, cmap.N)

    # define the bins and normalize
    bounds = np.linspace(0, number_of_materials, number_of_materials + 1)
    norm = matplotlib.colors.BoundaryNorm(bounds, cmap.N)
    
    print(tsne_features.shape)
    
    # Plot D-Vectors
    plt.figure()
    scat = plt.scatter(tsne_features[:, 0], tsne_features[:, 1], c=labels, cmap=cmap, norm=norm)
    cb = plt.colorbar(scat, spacing='proportional', ticks=bounds)

Convert features within the dataframe to a single matrix

In [None]:
feat_matrix = features_train['features'].to_numpy()
feat_matrix = np.column_stack(feat_matrix).transpose()

labels = features_train['label'].to_numpy()

In [None]:
plot_tSNE(feat_matrix, labels)

## 2. Calculate features on the test data


This procedure is similar as for the training data, except that we do not know the labels and the folder with test images is not subdivided by material

In [None]:
def calculate_features_test(test_dir):
    
    # Initialize a dataframe for the features    
    columns = ['id', 'features']
    df_features = pd.DataFrame(columns=columns)

    # Walk through all images
    for root, dirs, files in os.walk(test_dir):
        for file in files:
            img_fp = join(root, file)
            id = file.split('.')[0]
            print("Predict features for", id)

            # Pad if size is too small, preprocess
            img = image.load_img(img_fp, target_size=(224, 224))
            img_data = image.img_to_array(img)
            img_data = np.expand_dims(img_data, axis=0)
            img_data = preprocess_input(img_data)

            # Compute features 
            resnet50_feature = model.predict(img_data)
            resnet50_feature_np = np.array(resnet50_feature).flatten()

            df_features = df_features.append({'id': id ,
                                              'features': resnet50_feature_np},
                                             ignore_index=True)

    return df_features

In [None]:
features_test = calculate_features_test(test_dir)

In [None]:
features_test.head()

In [None]:
with open(join(pickle_path, 'resnet50_features_' + region + '_test.pkl' ), 'wb') as f:
    pickle.dump(features_test, f)