# Tutorial 02: Making predictions

In this notebook we learn how to make predictions using components explained in previous tutorials.

In [25]:
from math import ceil
from time import time
import pandas as pd
import logging
from functools import reduce

from damage.models import CNN
from damage.data import DataStream, load_experiment_results

First, we will load the features generated on the first notebook and the experiment results generated when validating the models

In [14]:
features = pd.read_pickle('../logs/features/test.p')
features.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,damage_num,destroyed,raster_date,latitude,longitude,location_index,image
city,patch_id,date,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
daraa,960-3520,2013-09-07,0.0,0,2017-02-07,32.642095,36.073268,444970884350,"[[[90, 73, 74, 33, 20, 16], [90, 77, 74, 16, 4..."
daraa,1600-3520,2013-09-07,0.0,0,2017-02-07,32.642095,36.076701,444970884359,"[[[107, 97, 90, 82, 61, 49], [107, 97, 90, 90,..."
daraa,1600-4160,2013-09-07,0.0,0,2017-02-07,32.638662,36.076701,444958290923,"[[[123, 121, 123, 99, 89, 82], [132, 125, 123,..."
daraa,2240-3520,2013-09-07,0.0,0,2017-02-07,32.642095,36.080134,444970884368,"[[[123, 121, 107, 255, 227, 206], [115, 117, 1..."
daraa,2240-4160,2013-09-07,0.0,0,2017-02-07,32.638662,36.080134,444958290932,"[[[189, 178, 173, 255, 178, 165], [156, 150, 1..."


In [9]:
EXPERIMENTS_PATH = '../logs/experiments/'
experiment_results = load_experiment_results(EXPERIMENTS_PATH)
experiment_results.head()

Unnamed: 0,accuracy,false_negatives,false_positives,features,id,loss,model,negatives,patch_size,positives,...,val_accuracy,val_false_negatives,val_false_positives,val_loss,val_negatives,val_positives,val_precision,val_recall,val_true_negatives,val_true_positives
0,"[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[147.66667, 147.66667, 147.66667, 147.66667, 1...",,1557911509,"[0.025914643131062856, 0.053637569527989544, 0...",ABCMeta,"[147.66667, 147.66667, 147.66667, 147.66667, 1...",320.0,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",...,"[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[111.0, 111.0, 111.0, 111.0, 111.0, 111.0, 111...","[7.666618824005127, 7.666618824005127, 7.66661...","[111.0, 111.0, 111.0, 111.0, 111.0, 111.0, 111...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[111.0, 111.0, 111.0, 111.0, 111.0, 111.0, 111...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,[0.5],[0.0],[41.666668],test_df.p,1558465204,[0.0],<class 'damage.models.cnn.CNN'>,[41.666668],,[0.0],...,,,,,,,,,,
2,[0.5],[0.15436241],[46.744965],,1557867008,[1.2168138407993876],ABCMeta,[46.744965],64.0,[0.15436241],...,[0.5],[0.0],[47.0],[0.6935411691665649],[47.0],[0.0],[0.0],[0.0],[47.0],[0.0]
3,"[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, ...","[0.4, 0.6, 0.4, 0.6, 0.4, 0.6, 0.4, 0.6, 0.4, ...","[112.8, 112.6, 112.8, 105.8, 112.8, 112.6, 112...",,1557911004,"[0.04893791731512799, 0.05580751121360085, 0.0...",ABCMeta,"[112.8, 112.6, 112.8, 105.8, 112.8, 112.6, 112...",320.0,"[0.4, 0.6, 0.4, 0.6, 0.4, 0.6, 0.4, 0.6, 0.4, ...",...,"[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[110.0, 110.0, 110.0, 110.0, 110.0, 110.0, 110...","[7.667031764984131, 7.667031764984131, 7.66703...","[110.0, 110.0, 110.0, 110.0, 110.0, 110.0, 110...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.009009009, 0.009009009, 0.009009009, 0.0090...","[0.9999999, 0.9999999, 0.9999999, 0.9999999, 0...","[110.0, 110.0, 110.0, 110.0, 110.0, 110.0, 110...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ..."


Now we choose the results according to some logic (e.g. best results, last experiment...). In this case, we will just take the last experiment, which we can find using the experiment id column (timestamp of generation).

In [12]:
Model = CNN
experiment_results_single_model = experiment_results.loc[experiment_results['model'] == str(Model)]
space = experiment_results_single_model.loc[experiment_results_single_model['id'].idxmax(), 'space']
space

{'learning_rate': 0.35564803062231287,
 'batch_size': 43,
 'convolutional_layers': [{'kernel_size': [11, 11],
   'pool_size': [7, 7],
   'filters': 203},
  {'kernel_size': [7, 7], 'pool_size': [6, 6], 'filters': 16},
  {'kernel_size': [14, 14], 'pool_size': [2, 2], 'filters': 111}],
 'epochs': 1,
 'class_weight': {0: 0.0, 1: 1.0}}

Now we adjust the class weight.

In [15]:
space['class_weight'] = {
    0: min(features['destroyed'].mean(), 0.1),
    1: max(1 - features['destroyed'].mean(), 0.9),
}

In [33]:
data_stream = DataStream(batch_size=space['batch_size'], train_proportion=0.6)
train_index_generator, test_index_generator = data_stream.split_by_patch_id(features['image'])
train_generator = data_stream.get_data_generator_from_index([features['image'], features['destroyed']],
                                                            train_index_generator)
test_indices = list(test_index_generator)
test_generator = data_stream.get_data_generator_from_index([features['image']], test_indices)

num_batches = ceil(len(features) / space['batch_size'])
model = Model(**space)
model.fit_generator(train_generator,
                    steps_per_epoch=num_batches,
                    validation_steps=1,
                    **space)

predictions = model.predict_generator(test_generator, steps=len(test_indices))
predictions = pd.DataFrame({
    'prediction': predictions[:, 1],
}, index=reduce(lambda l, r: l.union(r), test_indices))



W0523 12:42:11.420438 140735601300352 training_generator.py:244] Your dataset iterator ran out of data; interrupting training. Make sure that your iterator can generate at least `steps_per_epoch * epochs` batches (in this case, 4 batches). You may need touse the repeat() function when building your dataset.


In [34]:
predictions.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,prediction
city,patch_id,date,Unnamed: 3_level_1
daraa,10560-6720,2013-09-07,1.0
daraa,12480-6080,2013-09-07,1.0
daraa,1600-4160,2013-09-07,1.0
daraa,2240-4160,2013-09-07,1.0
daraa,2880-4160,2013-09-07,1.0


In [35]:
RESULTS_PATH = '../logs/predictions'
file_name = '{}/prediction_test.p'.format(RESULTS_PATH)
predictions.to_pickle(file_name)
print('Stored predictions on file: {}'.format(file_name))

Stored predictions on file: ../logs/predictions/prediction_test.p
