In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score, classification_report
import scikitplot as skplt

import numpy as np
import matplotlib.pyplot as plt
from joblib import dump

### Load Data

In [None]:
data = np.load('gt_data.npy')

X = np.stack((data[0], data[1]), axis=1)
y = data[2]

In [None]:
land, water = len(y[np.where(y==0)]), len(y[np.where(y==1)])
print('Total Pixels:', len(y))
print('Land: ', land)
print('Water: ', water)

### Split Train/Test data

In [None]:
# 70% Train, 30% Test
X_train, X_test, y_train, y_test = train_test_split(X, y , test_size=0.3)

print('Training: ', y_train.shape)
print('Test: ',y_test.shape)

### Fit & Predict

In [None]:
clf = RandomForestClassifier(n_estimators=4, max_depth=4, n_jobs=-1)

In [None]:
clf.fit(X_train, y_train)

In [None]:
pred = clf.predict(X_test)

### Results

In [None]:
cm = confusion_matrix(y_test, pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['land', 'water'])

disp.plot()
plt.show()

In [None]:
accuracy_score(y_test, pred)

In [None]:
print(classification_report(y_test, pred, target_names=['land', 'water']))

In [None]:
dump(clf, 'RandomForest.joblib')