# Traditional methods

In this study we have evaluate the traditional methods including:
- Random Forest
- Support Vector Machine
- Gradient Boosting Classifier

In [None]:
import os
import sys
import copy
import shutil
import pickle
import warnings
import numpy as np
import tensorflow as tf
import randomimport time
from PIL import Image as im
from sklearn.svm import SVC
from datetime import datetime, timezone, timedelta
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import f1_score, precision_score,recall_score, cohen_kappa_score

## Initialize the process

In [None]:
# specify the smaple folder path
input_data = './samples/'
model_path = './models/'
prediction_path = './predicts/'
log_path = './logs/'

# Data location
# 'covington' 'rowancreek'
location = 'covington'

# Classifier
# "SVM", "RF", or "GBC"
classifier = "SVM"

save_model_path = model_path+location+'_'+classifier

## Train the classifications

In [None]:
def classifier_train(train_X, train_Y, save_model_path, classifier):
    """
    Train a classifier model using the provided training data and save the trained model.

    Args:
        train_X (array-like): Training data features.
        train_Y (array-like): Training data labels.
        save_model_path (str): Path to save the trained model.
        classifier (str): Classifier type: "SVM", "RF", or "GBC".

    Returns:
        model: Trained classifier model.
    """
    start_time = time.time()

    if classifier == "SVM":
        print("_"*30)
        print('[INFO] Training a Support Vector Machine model.')
        model = SVC(kernel='linear')
    elif classifier == "RF":
        print("_"*30)
        print('[INFO] Training a Random Forest model.')
        model = RandomForestClassifier(n_estimators=15, criterion='entropy', max_features='auto',
                                       max_depth=8, min_samples_split=3, min_samples_leaf=18, random_state=42)
    elif classifier == "GBC":
        print("[INFO] Training a Gradient Boosting model.")
        model = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)

    model.fit(train_X, train_Y)

    end_time = time.time()
    filename = save_model_path + '.pkl'
    with open(filename, 'wb') as file:
        pickle.dump(model, file)

    print("_"*30)
    print('[INFO] Training complete.')
    print('training time: %.4f s' % (end_time - start_time))

    return model


## Prepare training data and label

In [None]:
# Load training data and labels from files
X_train = np.load(input_data + location + '/train_data.npy').astype(np.float32)
Y_train = np.load(input_data + location + '/train_label.npy').astype(np.float32)

# Reshape the training data for further processing
X_train_reshaped = X_train.reshape(X_train.shape[0] * X_train.shape[1] * X_train.shape[2], X_train.shape[3])
X_train_reshaped = X_train_reshaped.flatten()
print('Reshaped training data shape:', X_train_reshaped.shape)

# Reshape and convert training labels to integers
Y_train_reshaped = Y_train.reshape(Y_train.shape[0] * Y_train.shape[1] * Y_train.shape[2], Y_train.shape[3]).astype('int')

# Find indices of pixels representing streams
stream_idx = np.where(Y_train_reshaped == 1)

# Generate an array of indices for all stream pixels
stream_px_all = np.arange(len(stream_idx[0]))

# Select a random subset of stream pixels
n = 10000
stream_pxs_idx = np.random.choice(stream_px_all, size=n, replace=False)

In [None]:
model_SVM = classifier_train(X_train_reshaped[stream_pxs_idx], Y_train_reshaped[stream_pxs_idx], save_model_path, classifier)

## Use the trained models to predict the bottom half


In [None]:
# load the test data
X_test = np.load(input_data+location+'/bottom_half_test_data.npy').astype(np.float32)
X_test_reshaped = X_test.reshape(X_test.shape[0]*X_test.shape[1]*X_test.shape[2], X_test.shape[3])
X_test_reshaped.shape

In [None]:
def test(test_X, save_model_path, classifier):
    """
    Test the trained model on test data and save the prediction results.

    Args:
        test_X (array-like): Test data features.
        save_model_path (str): Path where the trained model is saved.
        classifier (str): Classifier type: "SVM", "RF", or "GBC".
    """
    prediction_path = './predicts/'

    # Load the trained model
    pkl_filename = save_model_path + classifier + '.pkl'
    with open(pkl_filename, 'rb') as file:
        model = pickle.load(file)

    # Predict using the trained model
    pred = model.predict(test_X)

    # Reshape the prediction results
    predict_reshaped = pred.reshape((test_X.shape[0], test_X.shape[1], test_X.shape[2], 1))

    # Save the prediction results
    save_path = prediction_path + location + '_' + classifier + '_predict.npy'
    np.save(save_path, predict_reshaped)

    print("Prediction Results saved:", save_path)

    return save_path


In [None]:
prediction_npy_path = test(X_test_reshaped,save_model_path, classifier)

# Evaluate the prediction results

In [None]:
pred_SVM_npy = prediction_path+location+'_'+classifier+'_predict.npy'
text_path = prediction_path+'prediction_results.txt'
mask_npy = input_data + location + '/bottom_half_test_mask.npy'
label_npy = input_data + location + '/bottom_half_test_label.npy'

In [None]:
def evaluate_prediction_result(location, pred_npy, mask_npy, label_npy, model_path):
    """
    Evaluate the prediction results using various metrics and save the evaluation results to a text file.

    Args:
        location (str): Location identifier.
        pred_npy (str): Path to the prediction numpy file.
        mask_npy (str): Path to the mask numpy file.
        label_npy (str): Path to the label numpy file.
        model_path (str): Path to the model used for prediction.
        txt_path (str): Path to save the evaluation results as a text file.
    """
    prediction_npy = np.load(pred_npy)
    prediction_mask_npy = np.load(mask_npy)
    predition_label_npy = np.load(label_npy)

    dim = predition_label_npy.shape
    buf = 30
    numr = dim[0] // (224 - buf * 2)
    numc = dim[1] // (224 - buf * 2)
    count = -1
    for i in range(numr):
        if (location == 'covington' and i == 20):
            break

        # Concatenate each column to create row ith
        numc_con = int(numc/2) - 1 if(location == 'covington') else numc
        for j in range(numc_con):
            count += 1
            temp = prediction_npy[count][buf:-buf, buf:-buf]
            if j == 0:
                rows = temp
            else:
                rows = np.concatenate((rows, temp), axis=1)

        # Concatenate the row ith to the total prediction
        if i == 0:
            prediction_map = copy.copy(rows)
        else:
            prediction_map = np.concatenate((prediction_map, rows), axis=0)

    prediction_map = prediction_map[:, :, 0]

    # Load mask and ground truth data
    mask = prediction_mask_npy[:prediction_map.shape[0], :prediction_map.shape[1]]
    [lr, lc] = np.where(mask == 1)
    groundtruth = predition_label_npy[:prediction_map.shape[0], :prediction_map.shape[1]]
    groundtruthlist = predition_label_npy[:prediction_map.shape[0], :prediction_map.shape[1]][lr, lc]
    prediction = np.logical_and(prediction_map, mask)
    predictionlist = np.logical_and(prediction_map, mask)[lr, lc]

    # Calculate evaluation metrics
    f1_nonstream = f1_score(groundtruthlist, predictionlist, labels=[0], average='micro')
    f1_stream = f1_score(groundtruthlist, predictionlist, labels=[1], average='micro')
    precision_nonstream = precision_score(groundtruthlist, predictionlist, labels=[0], average='micro')
    precision_stream = precision_score(groundtruthlist, predictionlist, labels=[1], average='micro')
    recall_nonstream = recall_score(groundtruthlist, predictionlist, labels=[0], average='micro')
    recall_stream = recall_score(groundtruthlist, predictionlist, labels=[1], average='micro')
    cohen_kappa = cohen_kappa_score(groundtruthlist, predictionlist)

    # Print and save evaluation results
    print('Model path:', model_path, '  Run at:', str(datetime.now(timezone(timedelta(hours=-6), 'utc'))))
    print('F1 score of Nonstream:', str(f1_nonstream))
    print('F1 score of Stream:', str(f1_stream))
    print('Precision of Nonstream:', str(precision_nonstream))
    print('Precision of Stream:', str(precision_stream))
    print('Recall of Nonstream:', str(recall_nonstream))
    print('Recall of Stream:', str(recall_stream))
    print('Cohen Kappa:', str(cohen_kappa))


In [None]:
evaluate_prediction_result(location, prediction_npy_path, mask_npy, label_npy, save_model_path+classifier+'.pkl')