In [1]:
import numpy as np
import os
import pickle

from matplotlib import pyplot as plt
from osgeo import gdal
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

from landsat8 import vectors_to_raster, write_geotiff

gdal.UseExceptions()

In [2]:
data_date = "150321"
raster_data_path = "real_data/%s/img/L8_229_82_%s.tif" % (data_date, data_date)
train_data_path = "real_data/%s/train/" % data_date

print("Reading the input: %s" % raster_data_path)
try:
    raster_dataset = gdal.Open(raster_data_path, gdal.GA_ReadOnly)
except RuntimeError as e:
    report_and_exit(str(e))

geo_transform = raster_dataset.GetGeoTransform()
proj = raster_dataset.GetProjectionRef()
bands_data = []
for b in range(1, raster_dataset.RasterCount + 1):
    band = raster_dataset.GetRasterBand(b)
    bands_data.append(band.ReadAsArray())

bands_data = np.dstack(bands_data)
rows, cols, n_bands = bands_data.shape
# A sample is a vector with all the bands data. Each pixel (independent of its position) is a
# sample.
n_samples = rows * cols

Reading the input: real_data/150321/img/L8_229_82_150321.tif


# Training

In [3]:
print("Process the training data")
vector_files_sufix = "%s.shp" % data_date
try:
    files = [f for f in os.listdir(train_data_path) 
             if f.endswith(vector_files_sufix) and not f.startswith('ROI')]
    shapefiles = [os.path.join(train_data_path, f) for f in files if f.endswith('.shp')]
except OSError.FileNotFoundError as e:
    report_and_exit(str(e))

Process the training data


In [4]:
training = vectors_to_raster(shapefiles, rows, cols, geo_transform, proj)

In [5]:
training['reference']

{}

In [6]:
labeled_pixels = training['raster']
is_train = np.nonzero(labeled_pixels)
training_labels = labeled_pixels[is_train]
training_samples = bands_data[is_train]

In [7]:
write_geotiff("training_labels_%s.tiff"%data_date, labeled_pixels, geo_transform, proj)

In [8]:
classifier = RandomForestClassifier(n_jobs=7)
print("Train the classifier: %s" % str(classifier))
classifier.fit(training_samples, training_labels)

Train the classifier: RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=7,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=7,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [9]:
reference = {os.path.basename(f).split('_')[0]: v for f, v in training['reference'].items()}
reference

{'ALFA': 1,
 'MAIZ': 2,
 'MN': 3,
 'MONTE': 4,
 'PN': 5,
 'RASTROJO': 6,
 'SOJA': 7,
 'SORGO': 8}

In [10]:
print("Saving trained classifier and the classes reference...")
results = {
    'classifier': classifier,
    'reference': reference
}
with open('classifier_trained_%s.pickle' % data_date, 'wb') as classifier_file:    
    pickle.dump(results, classifier_file)

Saving trained classifier and the classes reference...


# Results

Created a pickle file: **classifier_trained.pickle**. Unpickle to get a trained RandomForestClassifier.