# Random Forest Classification (RFC) - vegetation 
* SACFOR and Reflectance
* aided by: 
https://www.datacamp.com/tutorial/random-forests-classifier-python

In [213]:
import numpy as np
import pandas as pd
import geopandas as gpd
import rasterio as rio
from rasterio import sample

# Modeling
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, ConfusionMatrixDisplay

# Tree Visualisation
from sklearn.tree import export_graphviz
from IPython.display import Image
import graphviz

ModuleNotFoundError: No module named 'graphviz'

### Load Training Data

In [192]:
# load the sacfor shape file
sacfor = gpd.read_file('/Volumes/Mere/UMB/Sidescan_surveys/gooseberries_sacfor_raster_samples/gooseberries_sacfor_raster_samples.shp')
sacfor['SACFOR_veg'] = sacfor['SACFOR_veg'].replace('s', 'S')

# map the sacfor scales to integer scale
sacfor['SACFOR_veg target'] = sacfor['SACFOR_veg'].map({'S':5,'A':4,'C':3,'F':2,'O':1,'R':0})


# Load the hyperspectral data
img_file = '/Volumes/Mere/UMB/Headwall/merged/rasterio_1m_smoothed_deglinted_clean.tif'


### Sort Training Data
* Elevation threshold == > -4 m
* Sample associated spectra from the headwall raster

In [193]:
# Load the hyperspectral data from the .tif file
with rio.open(img_file) as src:
    arr = src.read()
    meta = src.meta
    
    # sort out deep elevation
    sacfor = sacfor[sacfor['Elev(m)']>=-3.5]
    
    # convert the headwall crs
    sacfor = sacfor.to_crs(src.crs)

    # Extract the X and Y coordinates of the points into separate columns
    sacfor['X'] = sacfor.geometry.x
    sacfor['Y'] = sacfor.geometry.y
    
    temp = [sacfor['geometry'].x, sacfor['geometry'].y]
    coords = list(map(list, zip(*temp)))    
    
    # Extract the headwall R spectra and insert numpy array into sacfor dataframe 
    sacfor['spectra'] = [sample for sample in src.sample(coords)]
    #sacfor['spectra'] = sacfor.apply(lambda row: row['spectra'].reshape(-1,1),axis=1)
    sacfor['spectra_sum'] = sacfor.apply(lambda row: row['spectra'].sum(), axis=1)


### RFC - Setup
* SACFOR rating is the target (y) variable
* Headwall R spectra is the feature (x) variable

In [209]:
# Create dataframe with only features (X) and target (Y)

training_data = pd.DataFrame()

training_data['sacfor_y'] =  sacfor['SACFOR_veg target']
training_data['headwall_X'] = sacfor['spectra_sum']
training_data = training_data.dropna()

X = training_data.drop('sacfor_y',axis=1)
y = training_data['sacfor_y']

### RFC - Training

In [210]:
# Split the data into training and testing sets

ts = 0.2
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=ts)#, random_state=42)


### RFC - Fitting and Evaluating

In [211]:
# Create a random forest classifier and fit the training data
rf = RandomForestClassifier()#n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = rf.predict(X_test)

# Evaluate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

print("Accuracy:", accuracy)
print("Confusion matrix:\n", cm)

Accuracy: 0.56
Confusion matrix:
 [[ 1  1  0  0  0]
 [ 0  0  0  0  1]
 [ 0  0  1  0  1]
 [ 1  2  1  1  0]
 [ 0  1  2  1 11]]


### RFC - Visualizing the Results

In [212]:
# Export the first three decision trees from the forest

for i in range(3):
    tree = rf.estimators_[i]
    dot_data = export_graphviz(tree,
                               feature_names=X_train.columns,  
                               filled=True,  
                               max_depth=2, 
                               impurity=False, 
                               proportion=True)
    graph = graphviz.Source(dot_data)
    display(graph)

NameError: name 'graphviz' is not defined