# CNN evaluation and plots

This script compares prediction results to ground truth (test data) and prints out confusion matrix and classification report.
It also plots out the CNN results.

#### Labels
* Multiclass classification raster: 1 - forest, 2 - fields, 3 - water, 4 - urban, 0 - everything else.

In [None]:
import os
import matplotlib.pyplot as plt
import matplotlib.colors
import numpy as np
import rasterio
import rasterio.mask
from rasterio.plot import show
from rasterio.windows import from_bounds
from sklearn.metrics import classification_report, confusion_matrix
%matplotlib inline

In [None]:
### File paths.
# Folders
user = os.environ.get('USER')
base_folder = os.path.join('/scratch/project_2002044', user, '2022/GeoML')
dataFolder = os.path.join(base_folder,'data')
cnn_folder = os.path.join(base_folder, '08_cnn_segmentation') 

# Data file, only for plotting
image_file = os.path.join(dataFolder, 'image.tif')

# Set labels image and prediction image paths.
test_image_path_binary = os.path.join(dataFolder, 'labels_forest.tif')
predicted_image_output_path_binary = os.path.join(cnn_folder, 'CNN_2.tif')

test_image_path_multiclass = os.path.join(dataFolder, 'labels_multiclass.tif')
predicted_image_output_path_multiclass = os.path.join(cnn_folder, 'CNN_5.tif')

Function, to calculate results for binary classification based on model raw output.

In [None]:
def get_binary_results(prediction_data, prediction_treshold):
    binary_prediction_data = prediction_data.copy()
    binary_prediction_data[(binary_prediction_data >= prediction_treshold)] = 1
    binary_prediction_data[(binary_prediction_data < prediction_treshold)] = 0
    binary_prediction_data = binary_prediction_data.astype('int')
    print('Prediction_treshold: ', prediction_treshold) 
    return binary_prediction_data

Function, to calculate confuction matrix and classification report based on predicted and test data. Before running this on test data, one should run it on validation data, to select the prediction_treshold or to retrain the model.

In [None]:
def estimateModel(predicted_image_output_path, test_image_path, no_of_classes, prediction_treshold=0.5):
    # Open image files of predicted data and test data
    with rasterio.open(predicted_image_output_path, 'r') as prediction_dataset:      
        with rasterio.open(test_image_path, 'r') as test_labels_dataset:           
            
            #Find out the overlappin area of two images.
            #Because of tiling the prediction image is slightly smaller than the original clip.
            left = max(prediction_dataset.bounds.left,test_labels_dataset.bounds.left)
            right = min(prediction_dataset.bounds.right,test_labels_dataset.bounds.right)
            top = min(prediction_dataset.bounds.top,test_labels_dataset.bounds.top)
            bottom = top - 512*20            
            
            common_bbox = [{
                        "type": "Polygon",
                        "coordinates": [[
                            [left, bottom],
                            [left, top],
                            [right, top],
                            [right, bottom],
                            [left, bottom]]]}]
                        
            # Read data from only the overlapping area
            y_pred, transform = rasterio.mask.mask(prediction_dataset, common_bbox, crop=True)
            y_true, transform = rasterio.mask.mask(test_labels_dataset, common_bbox, crop=True)
            
            #print(y_pred.shape)
            #print(y_true.shape)
            
            # Reshape data for scikit-learn
            y_pred2 = y_pred.reshape(-1)
            y_true2 = y_true.reshape(-1)
            #print(y_pred2.shape)
            #print(y_true2.shape)
                                
            # If results of binary classification, reclassify the data based on the treshold.
            if no_of_classes == 2: 
                y_pred2 = get_binary_results(y_pred2, prediction_treshold)
                                                            
            print('Confusion Matrix')  
            print(confusion_matrix(y_true2, y_pred2))
            print('Classification Report')
            print(classification_report(y_true2, y_pred2, zero_division=0))
            
            

Estimate binary classification.

For binary classification a treshold must be given for dividing the pixels between two classes. Try to look from map a good value, or just try different ones.

In [None]:
prediction_treshold = 0.35  
estimateModel(predicted_image_output_path_binary, test_image_path_binary, 2, prediction_treshold)

Estimate multi-class classification.
TO-CHANGE. Uncomment next cell, after multiclass files are available. 

In [None]:
# estimateModel(test_image_path_multiclass, predicted_image_output_path_multiclass, 5)

## Plot the results

In [None]:
# BBOX for plotting, to familiar area, we use here the bbox from previous exercises.
minx = 240500
miny = 6775500
maxx = 253500
maxy = 6788500 

In [None]:
### Help function to normalize band values and enhance contrast. Just like what QGIS does automatically
def normalize(array):
    min_percent = 2   # Low percentile
    max_percent = 98  # High percentile
    lo, hi = np.percentile(array, (min_percent, max_percent))
    return (array - lo) / (hi - lo)

In [None]:
### Create a subplot for 6 images 
fig, ax = plt.subplots(ncols=2, nrows=3, figsize=(10, 15))
cmap_binary = matplotlib.colors.LinearSegmentedColormap.from_list("", ["white","green"])
cmap_multiclass = matplotlib.colors.LinearSegmentedColormap.from_list("", ["white","green","orange","blue","violet"])

# Forest CNN prediction, raw
with rasterio.open(predicted_image_output_path_binary) as prediction_binary_dataset:
    prediction_data_binary = prediction_binary_dataset.read(window=from_bounds(minx, miny, maxx, maxy, prediction_binary_dataset.transform))  
    show(prediction_data_binary, ax=ax[0, 0], cmap='gray', title='Forest CNN prediction, raw')

# Forest CNN prediction, reclassified
prediction_data_binary_reclassified = get_binary_results(prediction_data_binary, prediction_treshold) 
show(prediction_data_binary_reclassified, cmap=cmap_binary, ax=ax[0, 1], title='Forest CNN prediction, reclassified')

# Sentinel image 
with rasterio.open(image_file) as image_dataset:
    image_data = image_dataset.read(window=from_bounds(minx, miny, maxx, maxy, image_dataset.transform)) 

    nir, red, green = image_data[7,], image_data[3,], image_data[1,]
    nirn, redn, greenn = normalize(nir), normalize(red), normalize(green)
    stacked = np.stack((nirn, redn, greenn))
    show(stacked, ax=ax[1,0], title='Sentinel image') 

# Forest labels  
with rasterio.open(test_image_path_binary) as binary_labels_dataset:
    binary_labels_data = binary_labels_dataset.read(window=from_bounds(minx, miny, maxx, maxy, binary_labels_dataset.transform))  
    show(binary_labels_data, ax=ax[1,1], cmap=cmap_binary, title='Forest labels')

# TO-CHANGE. Uncomment the last lines, after multiclass files are available. Shortcut: Ctrl + Numpad /
# Multiclass labels
# with rasterio.open(test_image_path_multiclass) as multiclass_labels_dataset:
#    multiclass_labels_data = multiclass_labels_dataset.read(window=from_bounds(minx, miny, maxx, maxy, multiclass_labels_dataset.transform)) 
#    show(multiclass_labels_data, ax=ax[2, 0], cmap=cmap_multiclass, title='Multiclass labels')

# # Multiclass CNN prediction
# with rasterio.open(predicted_image_output_path_multiclass) as prediction_multiclass_dataset:
#    multiclass_prediction_data = prediction_multiclass_dataset.read(window=from_bounds(minx, miny, maxx, maxy, prediction_multiclass_dataset.transform))  
#    show(multiclass_prediction_data, ax=ax[2, 1], cmap=cmap_multiclass, title='Multiclass CNN prediction')