In [10]:
# Imports and settings
import numpy as np
import pickle

from collections import Counter
from itertools import chain

from osgeo import gdal, ogr

from sklearn import metrics

from utils import (
    print_cm, test_class_name_to_train_label, train_label_to_test_class_name, 
    extract_test_mask, add_label_from_reference_name,
    separate_files_by_date, CLASS_NAME_TO_INT, INT_TO_CLASS_NAME)

from landsat8 import write_geotiff

# Settings
mask_by_lot = 'mask_lote_id.pickle'
verification_pixels_path = 'mask_lote_by_class.pickle'


with open(mask_by_lot, 'rb') as flot:
    mask_lot = pickle.load(flot)

with open(verification_pixels_path, 'rb') as fverification_pixels:
    verification_pixels = pickle.load(fverification_pixels)
    
# We need projection and GeoTransform
data_date = "150321"
raster_data_path = "real_data/%s/img/L8_229_82_%s.tif" % (data_date, data_date)

print("Reading the input: %s" % raster_data_path)
try:
    raster_dataset = gdal.Open(raster_data_path, gdal.GA_ReadOnly)
except RuntimeError as e:
    report_and_exit(str(e))
geo_transform = raster_dataset.GetGeoTransform()
proj = raster_dataset.GetProjectionRef()
# ##################################

homogenize_lot_data_path = 'homogenize_lot_%s.pickle' % (data_date)
classified_data_path = 'classifier_predicted_%s.pickle' % data_date

with open(classified_data_path, 'rb') as fclassified:
    classified_data = pickle.load(fclassified)
    classified_data = classified_data['classification']

Reading the input: real_data/150321/img/L8_229_82_150321.tif


In [11]:
def get_most_repited_value(array):
    cter = Counter(array)
    return sorted(cter, key=lambda x:cter[x])[-1]

In [12]:
lot_ids = np.unique(mask_lot)

for lot_id in lot_ids:
    # gets the lot isolated
    f = (mask_lot == lot_id)
    
    # gets the lot with predicted values
    predicted_lot = classified_data[f]
    most_repited_value = get_most_repited_value(predicted_lot)
    # homogenizes the lot with the most predicted value
    classified_data[f] = most_repited_value

In [13]:
with open(homogenize_lot_data_path, 'wb') as fhomogenize:
    pickle.dump(classified_data, fhomogenize)
output_fname = "homogenize_by_lote_id_%s.tiff" % data_date
write_geotiff(output_fname, classified_data, geo_transform, proj)
print("Verification image created: %s" % output_fname)

Verification image created: homogenize_by_lote_id_150321.tiff


In [14]:
for_verification = verification_pixels.nonzero()
verification_labels = verification_pixels[for_verification]
predicted_labels = classified_data[for_verification]

In [15]:
cm = metrics.confusion_matrix(verification_labels, predicted_labels)

In [16]:
classes_labels = [train_label_to_test_class_name(str(int(c)))[:3] for c in np.unique(predicted_labels)]
print_cm(cm, classes_labels)


   	MAI	MAN	 PN	RAS	SOJ	SOR	
MAI	6223	1270	471	1550	17019	272	
MAN	688	2231	781	  0	5422	  0	
 PN	  0	487	216	384	235	  0	
RAS	  0	  0	  0	  0	  0	  0	
SOJ	15910	6237	1072	2555	67846	1141	
SOR	769	  0	319	  0	2017	158	


In [17]:
print("Classification accuracy: %f" %
      metrics.accuracy_score(verification_labels, predicted_labels))

Classification accuracy: 0.566809


In [18]:
print("Classification report:\n%s" %
      metrics.classification_report(verification_labels, predicted_labels,
                                    target_names=classes_labels))

Classification report:
             precision    recall  f1-score   support

        MAI       0.26      0.23      0.25     26805
        MAN       0.22      0.24      0.23      9122
         PN       0.08      0.16      0.10      1322
        RAS       0.00      0.00      0.00         0
        SOJ       0.73      0.72      0.72     94761
        SOR       0.10      0.05      0.07      3263

avg / total       0.58      0.57      0.57    135273



  'recall', 'true', average, warn_for)
