In [19]:
from os.path import join

import boto3
from botocore.exceptions import ClientError

from rastervision.common.settings import results_path
from rastervision.common.utils import _makedirs


rob_run_names = [
    'tagging/7_11_17/dense_ensemble2/0',
    'tagging/7_11_17/dense_ensemble2/1',
    'tagging/7_11_17/dense_ensemble2/2',
    'tagging/7_11_17/dense_ensemble2/3',
    'tagging/7_11_17/dense_ensemble2/4',
    'tagging/7_5_17/ensemble/0',
    'tagging/7_5_17/ensemble/1',
    'tagging/7_5_17/ensemble/2',
    'tagging/7_5_17/ensemble/3',
    'tagging/7_5_17/ensemble/4',
    'tagging/7_5_17/ensemble/avg',
    'tagging/6_30_17/rerun_best-test_aug',
    'tagging/7_3_17/baseline-branch-tiffdrop',
    'tagging/7_3_17/dual_resnet-stage4-moretrain',
    'tagging/7_3_17/dual_resnet-stage5-moretrain',
    'tagging/7_7_17/tif_ensemble_0/0',
    'tagging/7_7_17/tif_ensemble_0/1',
    'tagging/7_7_17/tif_ensemble_0/2',
    'tagging/7_7_17/tif_ensemble_0/3',
    'tagging/7_7_17/jpg_ensemble_0/0',
    'tagging/7_7_17/jpg_ensemble_0/1',
    'tagging/7_7_17/jpg_ensemble_0/2',
    'tagging/7_7_17/jpg_ensemble_0/3',
    'tagging/7_7_17/baseline_cyclic_1',
    'tagging/7_7_17/baseline_cyclic_1200steps',
    'tagging/7_7_17/baseline_cyclic_2',
    'tagging/7_6_17/baseline_cyclic',
    'tagging/7_2_17/IRRGtiff_v5actual_dense121_3x10epochs_0',
    'tagging/6_30_17/dense121_dropout_1',
    'tagging/6_30_17/dense121_dropout_2',
    'tagging/6_30_17/dense121_dropout_3',
    'tagging/6_29_17/dense121_3x10epochs_0',
    'tagging/6_29_17/IRRGBtiff_v5_dense121_3x10epochs_0',
    'tagging/6_28_17/baseline_testrot_0'
]

best_densenet_run_names = [
    'tagging/7_5_17/ensemble/0',
    'tagging/7_5_17/ensemble/1',
    'tagging/7_5_17/ensemble/2',
    'tagging/7_5_17/ensemble/3',
    'tagging/7_5_17/ensemble/4',
    'tagging/7_11_17/dense_ensemble2/0',
    'tagging/7_11_17/dense_ensemble2/1',
    'tagging/7_11_17/dense_ensemble2/2',
    'tagging/7_11_17/dense_ensemble2/3',
    'tagging/7_11_17/dense_ensemble2/4',
    'tagging/7_7_17/jpg_ensemble_0/0',
    'tagging/7_7_17/jpg_ensemble_0/1',
    'tagging/7_7_17/jpg_ensemble_0/2',
    'tagging/7_7_17/jpg_ensemble_0/3'
]

other_densenet_run_names = [
    'tagging/6_30_17/dense121_dropout_1',
    'tagging/6_30_17/dense121_dropout_2',
    'tagging/6_30_17/dense121_dropout_3',
    'tagging/6_29_17/dense121_3x10epochs_0'
]

resnet_run_names = [
    'tagging/7_7_17/baseline_cyclic_1',
    'tagging/7_7_17/baseline_cyclic_1200steps',
    'tagging/7_7_17/baseline_cyclic_2',
    'tagging/7_6_17/baseline_cyclic',
    'tagging/6_28_17/baseline_testrot_0',
    'tagging/7_10_17/cyclic_2400steps'
]

tiff_run_names = [
    'tagging/7_3_17/baseline-branch-tiffdrop',
    'tagging/7_3_17/dual_resnet-stage4-moretrain',
    'tagging/7_3_17/dual_resnet-stage5-moretrain',
    'tagging/7_2_17/dual_resnet_30epoch',
    'tagging/7_7_17/tif_ensemble_0/0',
    'tagging/7_7_17/tif_ensemble_0/1',
    'tagging/7_7_17/tif_ensemble_0/2',
    'tagging/7_7_17/tif_ensemble_0/3',
    'tagging/7_2_17/IRRGtiff_v5actual_dense121_3x10epochs_0',
    'tagging/6_29_17/IRRGBtiff_v5_dense121_3x10epochs_0'
]

grouped_run_names = best_densenet_run_names + other_densenet_run_names + resnet_run_names + tiff_run_names

all_run_names = list(set(all_run_names) | set(rob_run_names))

test_pred_fn = 'test_preds.csv'
scores_fn = 'scores.json'
s3_bucket = 'raster-vision'


def s3_download(run_name, file_name, new_file_name=None):
    if new_file_name is None:
        new_file_name = file_name
    s3_key = 'results/{}/{}'.format(run_name, file_name)
    run_path = join('/opt/data/results/', run_name, new_file_name)
    s3 = boto3.resource('s3')
    s3.Bucket(s3_bucket).download_file(s3_key, run_path)
    
for run_name in all_run_names:
    _makedirs(join('/opt/data/results/', run_name))
    try:
        s3_download(run_name, test_pred_fn)
    except:
        s3_download(run_name, 'test_predictions.csv', test_pred_fn)
        
    s3_download(run_name, scores_fn)



In [45]:
import json

run_f2 = {}
for run_name in all_run_names:
    run_path = join(results_path, run_name)
    scores_path = join(run_path, 'scores.json')
    with open(scores_path, 'r') as scores_file:
        scores = json.load(scores_file)
        if 'f2' in scores:
            f2 = scores['f2']
        elif 'f2_samples' in scores:
            f2 = scores['f2_samples']
    run_f2[run_name] = f2

In [56]:
rob_f2_list = list(map(lambda run_name: (run_name, run_f2[run_name]), rob_run_names))
decent_f2_list = list(filter(lambda x: x[1] > 0.92, rob_f2_list))
bad_f2_list = list(filter(lambda x: x[1] < 0.92, rob_f2_list))
decent_run_names = list(map(lambda x: x[0], decent_f2_list))

print(len(rob_run_names))
print(len(rob_f2_list))
print(len(decent_f2_list))
print(len(bad_f2_list))
print(bad_f2_list)


34
34
27
7
[('tagging/7_7_17/tif_ensemble_0/0', 0.9153), ('tagging/7_7_17/tif_ensemble_0/1', 0.91378), ('tagging/7_7_17/tif_ensemble_0/2', 0.91531), ('tagging/7_7_17/tif_ensemble_0/3', 0.9159), ('tagging/7_6_17/baseline_cyclic', 0.90561), ('tagging/6_30_17/dense121_dropout_2', 0.91174), ('tagging/6_30_17/dense121_dropout_3', 0.89206)]


In [28]:
import numpy as np
import math

from rastervision.tagging.data.planet_kaggle import TagStore

run_name = all_run_names[0]
run_path = join(results_path, run_name)
test_pred_path = join(run_path, test_pred_fn)
test_file_inds = list(TagStore(test_pred_path).file_ind_to_tags.keys())

def make_test_preds(run_names, out_path):
    test_preds_list = []
    for run_name in run_names:
        run_path = join(results_path, run_name)
        test_pred_path = join(run_path, test_pred_fn) 
        test_preds = TagStore(test_pred_path).get_tag_array(test_file_inds)
        test_preds_list.append(np.expand_dims(test_preds, axis=2))

    test_preds = np.concatenate(test_preds_list, axis=2)
    test_preds_sum = np.sum(test_preds, axis=2)
    weight_sum = test_preds.shape[2]
    true_thresh = math.ceil(weight_sum / 2)
    test_preds = test_preds_sum >= true_thresh

    tag_store = TagStore()
    for i in range(test_preds.shape[0]):
        tag_store.add_tags(test_file_inds[i], test_preds[i, :])

    tag_store.save(out_path)

In [29]:
# ensemble 5 is made of rob_run_names
# 0.93217 LB

# ensemble 5 minus models with < 0.92 val f2
# 0.93201 LB
out_path = '/opt/data/results/jupyter_out/'
_makedirs(out_path)
make_test_preds(decent_run_names, join(out_path, 'decent_preds.csv'))

In [58]:
# ensemble 5 plus 5 new densenets
# 0.93202 LB
out_path = '/opt/data/results/jupyter_out/'
new_dn_run_names = [
    'tagging/7_11_17/dense_ensemble2/0',
    'tagging/7_11_17/dense_ensemble2/1',
    'tagging/7_11_17/dense_ensemble2/2',
    'tagging/7_11_17/dense_ensemble2/3',
    'tagging/7_11_17/dense_ensemble2/4'
]
make_test_preds(rob_run_names + new_dn_run_names, join(out_path, 'all_preds.csv'))

In [59]:
# ensemble 5 minus 5 densenets
# 0.93221 LB
second_densenet_run_names = [
    'tagging/7_5_17/ensemble/0',
    'tagging/7_5_17/ensemble/1',
    'tagging/7_5_17/ensemble/2',
    'tagging/7_5_17/ensemble/3',
    'tagging/7_5_17/ensemble/4',
    'tagging/7_5_17/ensemble/avg'
]

make_test_preds(list(set(rob_run_names) - set(second_densenet_run_names)), join(out_path, 'minus_second_dn.csv'))