In [48]:
from __future__ import division
import os
import pandas as pd

## Load data

In [49]:
expected_tags = {}
all_tags = set()
with open("./rawInput/train-augmented.csv", "r") as train_labels:
    train_labels.readline()
    for line in train_labels:
        img, tags = line.rstrip("\n").split(",")
        expected_tags[img] = tags.split(" ")
        all_tags.update(train_tags[img])
print len(expected_tags)

323832


In [50]:
predicted_tags = {}
with open("./predict/train-predict.csv", "r") as predict_labels:
    predict_labels.readline()
    for line in predict_labels:
        img, tags = line.rstrip("\n").split(",")
        predicted_tags[img] = tags.split(" ")
print len(predicted_tags)

323832


In [51]:
training_set = set([])
validation_set = set([])
with open("./train/training-files.csv", "r") as training_files:
    original_training_set = set(training_files.readline().strip().split(","))
    for f in os.listdir("./rawInput/train-jpg"):
        filename = f.split(".")[0]
        s = training_set if filename in original_training_set else validation_set
        for i in range(8):
            s.add("{name}--{i}".format(name=filename, i=i))

print "original images:", len(os.listdir("./rawInput/train-jpg"))
print "total images:", len(os.listdir("./rawInput/train-jpg-augmented"))
print "training set:", len(training_set)
print "validation set:", len(validation_set)

original images: 40479
total images: 323832
training set: 291448
validation set: 32384


## Read predictions

### Per label score

In [61]:
# First value is number of correctly predicted
# Second is number of not detected (false negative)
# Third is number of inaccurately predicted (false positive)
tags_score = {t: { "tp":0, "fn": 0, "fp": 0} for t in all_tags}


def scores(tp, fp, fn):
    p = tp / (tp + fp)
    r = tp / (tp + fn)
    if p == 0 or r == 0:
        f2 = 0
    else:
        b = 2
        f2 = (1 + b**2) * p * r / (b**2*p + r)
    return p, r, f2
    
    
for img in validation_set:
    true_tags = train_tags[img]
    predicted_tags = pred_tags[img]
    for t in true_tags:
        if t in predicted_tags:
            tags_score[t]["tp"] += 1
        else:
            tags_score[t]["fn"] += 1
    for t in predicted_tags:
        if t not in true_tags:
            tags_score[t]["fp"] += 1

data = []
for tag in all_tags:
    s = tags_score[tag]
    p, r, f2 = scores(s["tp"], s["fp"], s["fn"])
    data.append([s["tp"], s["fp"], s["fn"], p, r, f2])

columns = ["tp", "fp", "fn", "validation precision", "validation recall", "f2"]
df = pd.DataFrame(data=data, index=all_tags, columns=columns)
df

Unnamed: 0,tp,fp,fn,validation precision,validation recall,f2
slash_burn,17,57,159,0.22973,0.096591,0.109254
clear,21679,1016,985,0.955232,0.956539,0.956277
blooming,36,182,124,0.165138,0.225,0.20979
primary,29188,804,596,0.973193,0.979989,0.978622
cloudy,1427,401,405,0.780635,0.77893,0.77927
conventional_mine,74,35,46,0.678899,0.616667,0.628183
water,4187,1080,1621,0.79495,0.720902,0.734587
haze,1430,716,826,0.666356,0.633865,0.640107
cultivation,1847,1287,2017,0.589343,0.478002,0.496772
partly_cloudy,5069,646,563,0.886964,0.900036,0.897391


## F2 score

In [62]:
LABELS = ['clear', 'cloudy', 'haze', 'partly_cloudy', 'agriculture', 'artisinal_mine', 'bare_ground', 'blooming', 'blow_down', 'conventional_mine', 'cultivation', 'habitation', 'primary', 'road', 'selective_logging', 'slash_burn', 'water']

In [63]:
import numpy as np
from sklearn.metrics import fbeta_score
def F2Score(predicted, actual):
    # see https://www.kaggle.com/c/planet-understanding-the-amazon-from-space#evaluation
    predicted = set(predicted)
    actual = set(actual)
    tp = len(predicted & actual)
    tn = len(LABELS) - len(predicted | actual)
    fp = len(predicted) - tp
    fn = (len(LABELS) - len(predicted)) - tn
    return scores(tp, fp, fn)[2]

def scoreSet(data_set):
    f2, n = 0, 0
    for img in data_set:
        real_tags = train_tags[img]
        predicted_tags = pred_tags[img]
        f2 += F2Score(real_tags, predicted_tags)
        n += 1
    return f2 / n

In [64]:
print "F2 score for training set", scoreSet(training_set)
print "F2 score validation set", scoreSet(validation_set)

F2 score for training set 0.994026038266
F2 score validation set 0.894884424695
