In [18]:
import preprocessor
import models
import numpy as np
from keras.models import model_from_json

from tqdm import tqdm

Using TensorFlow backend.


In [4]:
data = preprocessor.read_data('../train.csv')[35000:]

In [9]:
file_names = list(map(lambda x: x.name, data))
print(file_names.__len__())

5479


In [19]:
x_test = []
y_test = []

for item in tqdm(data):
    x_test.append(models.AnnotatedRecord.small_image(item))
    y_test.append(models.AnnotatedRecord.some_hot(item))

x_data = np.array(x_test, np.float16) / 255.
y_data = np.array(y_test, np.uint8)

print(x_data.shape)
print(y_data.shape)

print("*** Loading Model ***")
# load json and create model
json_file = open('model.json', 'r')
model_json = json_file.read()
json_file.close()
model = model_from_json(model_json)

model.load_weights("model.h5")

print("*** Compiling Model ***")
model.compile(loss='binary_crossentropy',
              # We NEED binary here, since categorical_crossentropy l1 norms the output before calculating loss.
              optimizer='adam',
              metrics=['accuracy'])

print("*** Generating Predictions ***")
#scores = model.evaluate(x_train, y_train, verbose=0)
#print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

predictions = model.predict(x_data)

100%|██████████| 5479/5479 [00:00<00:00, 7635.35it/s]


(5479, 32, 32, 3)
(5479, 17)
*** Loading Model ***
*** Compiling Model ***
*** Generating Predictions ***


In [109]:
thresholds = [0.2] * len(models.inv_mapping)

predictions_labels = []
for prediction in predictions:
    labels = [models.inv_mapping[i] for i, value in enumerate(prediction) if value > thresholds[i]]
    predictions_labels.append(labels)
    
print(*predictions_labels[:10], sep='\n')

['primary', 'agriculture', 'water', 'cultivation', 'road', 'clear']
['primary', 'agriculture', 'water', 'cultivation', 'habitation', 'road', 'clear']
['primary', 'agriculture', 'water', 'cultivation', 'habitation', 'road', 'clear']
['primary', 'agriculture', 'water', 'cultivation', 'clear']
['primary', 'clear']
['primary', 'clear']
['primary', 'agriculture', 'water', 'cultivation', 'habitation', 'road', 'clear', 'partly_cloudy']
['primary', 'agriculture', 'water', 'partly_cloudy']
['primary', 'agriculture', 'water', 'road', 'partly_cloudy']
['primary', 'agriculture', 'water', 'habitation', 'road', 'clear', 'partly_cloudy']


In [110]:
y_labels = []
for record in y_data:
    labels = [models.inv_mapping[i] for i, value in enumerate(record) if value == 1]
    y_labels.append(labels)
print(*y_labels[:10], sep='\n')

['primary', 'cultivation', 'haze', 'slash_burn']
['primary', 'agriculture', 'road', 'clear']
['primary', 'agriculture', 'water', 'cultivation', 'clear']
['primary', 'road', 'clear', 'selective_logging']
['primary', 'clear']
['primary', 'clear']
['primary', 'water', 'partly_cloudy']
['primary', 'partly_cloudy']
['primary', 'partly_cloudy']
['primary', 'agriculture', 'cultivation', 'habitation', 'road', 'partly_cloudy']


In [29]:
predict_data = []
for prediction in predictions:
    predict_data.append(list(map(lambda x: int(x > .20), prediction)))
    
print(*predict_data[:5], sep='\n')
print(*y_data[:5], sep='\n')

[1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[1 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0]
[1 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0]
[1 1 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0]
[1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 1 0]
[1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]


In [30]:
total_num = y_data.__len__()
print("Total number of items: {}".format(total_num))

Total number of items: 5479


In [108]:
index = 0

print("\nFeature: ", models.inv_mapping[index], "\n")

total_features = sum([x[index]==1 for x in y_data])
print("Total: ", total_features)

true_positives = sum([x[index]==1 and y[index]==1 for x, y in zip(y_data, predict_data)])
print("True Positives: ", true_positives)

false_negatives = sum([x[index]==1 and y[index]==0 for x, y in zip(y_data, predict_data)])
print("False Negatives: ", false_negatives)

false_positives = sum([x[index]==0 and y[index]==1 for x, y in zip(y_data, predict_data)])
print("False Positives: ", false_positives)

accuracy = true_positives / (true_positives + false_positives)
print("Accuracy ", accuracy)

recall = true_positives / (true_positives + false_negatives)
print("Recall ", recall)


Feature:  primary 

Total:  5100
True Positives:  5096
False Negatives:  4
False Positives:  318
Accuracy  0.941263391208
Recall  0.999215686275
