### Aim of this assignment is to predict water extent from Sentine 2 imagery using Random forrest. 

In [None]:
import numpy as np
import pandas as pd
import rasterio as rio
import matplotlib.pyplot as plt
import os

In [None]:
#Download data from data_RF folder
os.chdir(r"C:\the\file\location\on\your\system") #changing directories to where data is stored

In [None]:
os.getcwd() #confirm curent directory

In [None]:
# PS: Not to be run! Skip to the next cell!!

## Dataset Contents are 

# M_test: Test Sen2 water mask (validation)
# I_test: Test Sen2 image
# M_train: Training water mask
# I_train: Training Sen2 image

In [None]:
#Preparing M_train for Machine Learning

ds = rio.open(r"./M_train.tif")
arr = ds.read()
arr # run to view array
# arr.shape

#to use machine learning libraries, alter the shape, moving the axis representing the bands to the last position, 
#current position band, height (rows), width (colomns)

arr = np.moveaxis(arr, 0, -1)
#this moves the first axis (band) to last axis position
#afterwards new axis arrangement is height, width, band. to view run -arr.shape-

# arr.shape -- to get count of all values in all dimentions


#to create a two dimensional array
M_train = np.reshape(arr, (arr.shape[0] * (arr.shape[1]), arr.shape[2])) #multiplies the shape of dimension 1 (height) by shape of dimension 2(width), in terms of dimension 3 (bands)
M_train.shape

nrow_train = arr[0]
ncol_train = arr[1]

ds.close

In [None]:
#Preparing I_train for Machine Learning

ds_Itrain = rio.open(r"./I_train.tif")
arr_Itrain = ds_Itrain.read()
arr_Itrain # run to view array

arr_Itrain.shape #run to get the current shape

#ITrain shape (row and colom) currently does not match MTrain, so we delete the excess 

# Delete the excess (last 2 rows and 3 colomns)
Itrain_reshaped = np.delete(np.delete(arr_Itrain, np.s_[-2:], axis=1), np.s_[-3:], axis=2) 

Itrain_reshaped.shape # the row and colom now match Mtrain

#To use machine learning libraries, we must alter the shape, moving the axis representing the bands to the last position, 
#current position band, height (rows), width (colomns). 

#PS: We will be using the Itrain_reshaped for the rest of the analysis

Itrain_reshaped = np.moveaxis(Itrain_reshaped, 0, -1)
#this moves the first axis (band) to last axis position
#afterwards new axis arrangement is height, width, band. 

Itrain_reshaped.shape # run to see new shape

#to create a two dimensional array from this existing one
I_train = np.reshape(Itrain_reshaped, (Itrain_reshaped.shape[0] * (Itrain_reshaped.shape[1]), Itrain_reshaped.shape[2])) #multiplies the shape of dimension 1 (height) by shape of dimension 2(width), in terms of dimension 3 (bands)
I_train.shape

nrow_train = arr[0]
ncol_train = arr[1]

ds.close

In [None]:
#Preparing I_test for Machine Learning

ds = rio.open(r"./I_test.tif")
arr = ds.read()
arr # run to view array
# arr.shape

#to use machine learning libraries, alter the shape, moving the axis representing the bands to the last position, 
#current position band, height (rows), width (colomns)

arr = np.moveaxis(arr, 0, -1)
#this moves the first axis (band) to last axis position
#afterwards new axis arrangement is height, width, band. to view run -arr.shape-

# arr.shape -- to get count of all values in all dimensions


#to create a two dimensional array
I_test = np.reshape(arr, (arr.shape[0] * (arr.shape[1]), arr.shape[2])) #multiplies the shape of dimension 1 (height) by shape of dimension 2(width), in terms of dimension 3 (bands)
print(I_test.shape)

nrow_test = arr[0]
ncol_test = arr[1]

ds.close

In [None]:
#Preparing M_train for Machine Learning

ds = rio.open(r"./M_test.tif")
arr = ds.read()
# run -arr- to view array
# nan vaues represent boundary pixels with no values, we need to convert them
arr = np.where(np.isnan(arr), 0, arr) ##where statement where np value is nan, replace with 0, else nan
#now you get an array with 0 instead of nan values

#to use machine learning libraries, alter the shape, moving the axis representing the bands to the last position, 
#current position band, height (rows), width (colomns)

arr = np.moveaxis(arr, 0, -1)
#this moves the first axis (band) to last axis position
#afterwards new axis arrangement is height, width, band. to view run -arr.shape-

# arr.shape -- to get count of all values in all dimentions


#to create a two dimensional array
M_test = np.reshape(arr, (arr.shape[0] * (arr.shape[1]), arr.shape[2])) #multiplies the shape of dimension 1 (height) by shape of dimension 2(width), in terms of dimension 3 (bands)
print(M_test.shape)

nrow_test = arr.shape[0]
ncol_test = arr.shape[1]

ds.close

In [None]:
# Building the random forest predictor using the sklearn library
from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(n_estimators=100, random_state=42, verbose=3, n_jobs=-1)
rfc.fit(I_train, M_train)

In [None]:
#Predict a new mask called Y_pred from running the rf we just trained on a new Imagery file (I_Test)
Y_pred = rfc.predict(I_test)

In [None]:
#generating confusion matrix to compare the generated Y_pred mask to the manually created M_test mask

from sklearn.metrics import classification_report

print(classification_report(M_test, Y_pred))


#Results show high F value for pridicted. 0.87

In [None]:
#confirm shape of predicted matches input
Y_pred.shape

In [None]:
Y_pred_reshaped = Y_pred.reshape(nrow_test, ncol_test)
M_test_reshaped = M_test.reshape(nrow_test, ncol_test)
I_test_reshaped = I_test.reshape(nrow_test, ncol_test, 4)
print(Y_pred_reshaped.shape, I_test_reshaped.shape)


In [None]:
#Visulaize 

fig, axes = plt.subplots(nrows=1, ncols=4, sharex=True, sharey=True, figsize=(15,7))
ax1, ax2, ax3, ax4 = axes.flatten()

ax1.set_title("RGB", fontweight='bold', fontsize='16')
ax1.imshow(I_test_reshaped[:,:,:3])

ax2.set_title("NRI", fontweight='bold', fontsize='16')
ax2.imshow(I_test_reshaped[:,:,-1])

ax3.set_title("Ground Truth", fontweight='bold', fontsize='16')
ax3.imshow(M_test_reshaped[:,:])

ax4.set_title("Predicted", fontweight='bold', fontsize='16')
ax4.imshow(Y_pred_reshaped[:,:])