In [59]:
from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import confusion_matrix
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.metrics import balanced_accuracy_score

from matplotlib.patches import Rectangle
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import sys
import numpy as np
import json
import time
import os
# User imports
from Misc import read_binary

In [60]:
def get_run_logdir(root_logdir):
    import time
    run_id = time.strftime("run__%Y_%m_%d-%H_%M_%S")
    return os.path.join(root_logdir, run_id)

In [61]:
os.chdir("../../../../../")
root = "data/full/"
train_root_path = f"{root}/prepared/train"
reference_data_root = f"data/full/data_bcgw/"
raw_data_root = f"{root}data_img/"

In [62]:

# Prepare Folders
target_all = {
    "conifer" : "CONIFER.bin",
    "ccut" : "CCUTBL.bin",
    "water": "WATER.bin",
    "broadleaf" : "BROADLEAF.bin",
    "shrub" : "SHRUB.bin",
    "mixed" : "MIXED.bin",
    "herb" : "HERB.bin",
    "exposed" : "EXPOSED.bin",
    "river" : "Rivers.bin",
    # "road" : "ROADS.bin",
    # "vri" : "vri_s3_objid2.tif_proj.bin",
}
classes = ["unlabelled"]

keys = list(target_all.keys())
for key in keys:
    classes.append(key)





In [63]:
outdir = os.path.join(os.curdir,'outs')
if not os.path.exists(outdir):
    os.mkdir(outdir)
outdir = os.path.join(outdir, 'RandomForestBinary')
if not os.path.exists(outdir):
    os.mkdir(outdir)
outdir = os.path.join(outdir, 'KFold')
if not os.path.exists(outdir):
    os.mkdir(outdir)
outdir = get_run_logdir(outdir)
os.mkdir(outdir)

In [64]:
X = np.load(f'{train_root_path}/full-img.npy')

In [68]:
sub_img_shape = (4835//5,3402)
fold_length = sub_img_shape[0] * sub_img_shape[1]

In [69]:
n_est = 250

In [None]:
for target in target_all.keys():
        reference_path = f'{reference_data_root}{target_all[target]}'
        if os.path.exists(reference_path):
            cols, rows, bands, y = read_binary(reference_path, to_string=False)
            # encode the values to 0/1
            ones = np.ones((cols * rows))
            vals = np.sort(np.unique(y))
            # create an array populate with the false value
            t = ones * vals[len(vals) - 1]
            if target == 'water':
                y = np.not_equal(y, t)
            else:
                y = np.logical_and(y, t)

            """----------------------------------------------------------------------------------------------------------------------------
            * KFold Training
            """
            for test_idx in range(5):
                print(test_idx)
                path = os.path.join(outdir, f"fold_{test_idx}")
                if not os.path.exists(path):
                    os.mkdir(path)

                params = {
                        'n_estimators': n_est,
                        'max_depth': 6,
                        'max_features': 0.2,
                        'verbose': 0,
                        'n_jobs': -1,
                        # 'bootstrap': False,
                        'oob_score': True,
                        'warm_start': True
                    }
                clf = RandomForestClassifier(**params,)

                processing_time = {'fit': [], 'predict': []}

                X_test = X[test_idx * fold_length : (test_idx + 1)*fold_length, :]
                y_test = y[test_idx * fold_length : (test_idx + 1)*fold_length]

                for train_idx in range(5):
                    if train_idx == test_idx:
                        continue
                    X_train = X[train_idx * fold_length : (train_idx + 1)*fold_length, :]
                    y_train = y[train_idx * fold_length : (train_idx + 1)*fold_length]
                    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, train_size=0.9)
                    print("Train IDX ", train_idx)
                    start_fit = time.time()
                    clf.fit(X_train, y_train)
                    clf.n_estimators = clf.n_estimators + n_est
                    end_fit = time.time()
                    fit_time = round(end_fit - start_fit, 2)
                    processing_time['fit'].append(fit_time)

                test_pred_confidence = clf.predict_proba(X_test)
                np.save(f'{path}/{target}', test_pred_confidence[:,1])
                test_pred_class = clf.predict(X_test)
                f, ax = plt.subplots(2,1, sharey=True, figsize=(30,15))
                f.suptitle(f"{target} Test Reference vs Prediction")
                colormap_y = ax[0].imshow(y_test.reshape(sub_img_shape) , cmap='gray', vmin=0, vmax=1)
                ax[0].set_title('Ground Reference')
                ax[1].imshow(test_pred_confidence[:,1].reshape(sub_img_shape), cmap='gray', vmin=0, vmax=1)
                ax[1].set_title('Prediction Confidence')

                plt.savefig(f'{path}/{target}_test_predvsref')
                plt.close()
                score = str(accuracy_score(y_test, test_pred_class))
                bal_score = str(balanced_accuracy_score(y_test, test_pred_class))
                print(test_idx)
                print(score)
                print(bal_score)
                with open(path + f"/{target}_results.txt", "w") as f:
                     f.write("Accuracy Test: " + score )
                     f.write("\nBalanace Accuracy Test: " + bal_score)
                    # f.write("\nScore Train: " +str(score_train))
                    # f.write("\nMSE Test: " + str(mse_test))
                    # f.write("\nMSE Train: " + str(mse_train))
                    # f.write("\nProcessing Times:")
                    # f.write(json.dumps(processing_time, indent=4, separators=(',', ': ')))
                    # f.write("\nOob Score: " + str(clf.oob_score_))
                    # f.write("\nFeature Importance: " + str(clf.feature_importances_))


	samples 3402 lines 4835 bands 1
+r data/full/data_bcgw/CONIFER.bin
0
Train IDX  1
Train IDX  2
Train IDX  3


In [None]:
import pandas as pd

In [52]:
outdir = "/home/brad/Projects/fuel-for-fire/outs/RandomForestBinary/KFold/confidencemap_5Fold"

In [None]:
dfs = list()


for idx in range(5):
    df = pd.DataFrame(columns= target_all.keys())
    path = os.path.join(outdir, f"fold_{idx}")
    for idx, target in enumerate(target_all.keys()):
        df[target] = np.load(f'{path}/{target}.npy')
    np_df = df.to_numpy()
    np_df = np.argmax(np_df, axis=1)
    np_df = np_df + 1

    dfs.append(np_df)

In [None]:
for idx, np_df in enumerate(dfs):
    print(np.unique(np_df, return_counts=True))
    plt.figure(figsize = (30,15))
    colormap = plt.imshow(np_df.reshape(sub_img_shape), cmap='cubehelix', vmin=0, vmax=10)
    
    cbar = plt.colorbar(colormap,
                  orientation='vertical',
                 boundaries=range(11), shrink=.95,extend='max', extendrect=True, drawedges=True, spacing='uniform')
    cbar.ax.set_yticklabels(["unlabelled",
        "conifer",
        "ccut",
        "water",
        "broadleaf",
        "shrub",
        "mixed",
        "herb",
        "exposed",
        "river",""])
    plt.savefig(f'{outdir}/{idx}_confidence_map')
    plt.show()

In [None]:

""" Relies on prep_data to read the base image in"""


def main():

    ## Two sets of targets, the testing image will have portions
    ## of the classes that we are not training for
   

    
        # Everything has a model
  

            # plt.title("Test Confusion Matrix")
            # plt.matshow(confmatTest, cmap=plt.cm.Blues, alpha=0.5)
            # plt.gcf().subplots_adjust(left=.5)
            # for i in range(confmatTest.shape[0]):
            #     for j in range(confmatTest.shape[1]):
            #         plt.text(x=j, y=i,
            #                 s=round(confmatTest[i,j],3), fontsize=6, horizontalalignment='center') s
            # plt.xticks(np.arange(10), labels=classes)
            # plt.yticks(np.arange(10), labels=classes)
            # plt.tick_params('both', labelsize=8, labelrotation=45)
            # plt.xlabel('predicted label')
            # plt.ylabel('reference label', rotation=90)
            # plt.savefig(f'{path}/test_confusion_matrix')
            # print(f'+w {path}/test_confusion_matrix')
            # plt.close()


    #     # Save the image as output for reference
    #     plt.title('Test Reference')
    #     colormap = plt.imshow(y_test.reshape(sub_img_shape), cmap='cubehelix')
    #     cbar = plt.colorbar(colormap,
    #                   orientation='vertical',
    #                  boundaries=range(11), shrink=.95,extend='max', extendrect=True, drawedges=True, spacing='uniform')
    #     cbar.ax.set_yticklabels(["unlabelled",
    #         "conifer",
    #         "ccut",
    #         "water",
    #         "broadleaf",
    #         "shrub",
    #         "mixed",
    #         "herb",
    #         "exposed",
    #         "river",""])
    #     plt.savefig(f'{path}/{test_idx}-reference')
    #     plt.close()
    #     print(f'+w {path}/{test_idx}-reference')

    #     print("Test Index", test_idx)
    #     print("X_test shape", X_test.shape)
    #     print("y_test shape", y_test.shape)



    #     params = {
    #     'n_estimators': n_est,
    #     'max_depth': 8,
    #     'verbose': 1,
    #     'n_jobs': -1,
    #     # 'bootstrap': False,
    #     'oob_score': True,
    #     'warm_start': True
    #     }
    #     clf = RandomForestClassifier(**params,)

    #     """----------------------------------------------------------------------------------------------------------------------------
    #     * Training
    #     """
    #     X_train_subs = list()
    #     y_train_subs = list()
    #     for train_idx in range(5):
    #         if train_idx == test_idx:
    #             continue
    #         X_train = X[train_idx * fold_length : (train_idx + 1)*fold_length, :]
    #         y_train = y[train_idx * fold_length : (train_idx + 1)*fold_length]

    #         # Save this to make some metrics later
    #         X_train_subs.append(X_train)
    #         y_train_subs.append(y_train)
    #         print("Training Index", train_idx)
    #         print("\tX_train shape", X_train.shape)
    #         print("\ty_train shape", y_train.shape)


    #         start_fit = time.time()
    #         clf.fit(X_train, y_train)
    #         clf.n_estimators = clf.n_estimators + n_est
    #         end_fit = time.time()
    #         fit_time = round(end_fit - start_fit, 2)
    #         processing_time['fit'].append(fit_time)

    #     """----------------------------------------------------------------------------------------------------------------------------
    #     * Prediction and Metrics
    #     """
    #     print("Test Predict")
    #     start_pred = time.time()
    #     pred = clf.predict(X_test)
    #     end_pred = time.time()

    #     predict_time = round(end_pred - start_pred, 2)
    #     processing_time['predict'].append(predict_time)

    #     print("Test Score")
    #     score = balanced_accuracy_score(y_test, pred)
    #     print("Test Confusion Matrix")
    #     confmatTest = confusion_matrix(
    #              y_true=y_test, y_pred=pred)
    #     print("Test MSE")
    #     mse_test = mean_squared_error(y_test, pred)

    #     f, ax = plt.subplots(2,1, sharey=True, figsize=(30,15))
    #     f.suptitle("Test Reference vs Prediction")
    #     y_test = y_test.reshape(sub_img_shape)
    #     colormap_y = ax[0].imshow(y_test, cmap='cubehelix', vmin=0, vmax=12)
    #     ax[0].set_title('Ground Reference')
    #     ax[1].imshow(pred.reshape(sub_img_shape), cmap='cubehelix', vmin=0, vmax=12)
    #     ax[1].set_title('Prediction')


    #     cbar = f.colorbar(colormap_y,
    #                     ax=ax.ravel().tolist(),
    #                     orientation='vertical',
    #                     boundaries=range(11), shrink=.95,extend='max', extendrect=True, drawedges=True, spacing='uniform')
    #     cbar.ax.set_yticklabels(["unlabelled",
    #             "conifer",
    #             "ccut",
    #             "water",
    #             "broadleaf",
    #             "shrub",
    #             "mixed",
    #             "herb",
    #             "exposed",
    #             "river",""],fontsize=20)
    #     plt.savefig(f'{path}/test_predvsref')
    #     plt.close()

    #     plt.title("Test Confusion Matrix")
    #     plt.matshow(confmatTest, cmap=plt.cm.Blues, alpha=0.5)
    #     plt.gcf().subplots_adjust(left=.5)
    #     for i in range(confmatTest.shape[0]):
    #         for j in range(confmatTest.shape[1]):
    #             plt.text(x=j, y=i,
    #                     s=round(confmatTest[i,j],3), fontsize=6, horizontalalignment='center')
    #     plt.xticks(np.arange(10), labels=classes)
    #     plt.yticks(np.arange(10), labels=classes)
    #     plt.tick_params('both', labelsize=8, labelrotation=45)
    #     plt.xlabel('predicted label')
    #     plt.ylabel('reference label', rotation=90)
    #     plt.savefig(f'{path}/test_confusion_matrix')
    #     print(f'+w {path}/test_confusion_matrix')
    #     plt.close()



    #     # We crash if we try to do the whole X_train at once
    #     # Sln: Chunk it up and iteratively produce results
    #     train_scores = 0
    #     mse_train = 0
    #     for idx, (X_train, y_train) in enumerate(zip(X_train_subs, y_train_subs)):

    #         print("Train Predict")
    #         train_pred = clf.predict(X_train)
    #         print("Train Confusion Matrix")
    #         confmat_train = confusion_matrix(y_true=y_train, y_pred=train_pred)
    #         print("Train Score")
    #         train_scores += balanced_accuracy_score(y_train, train_pred)
    #         print("Train MSE")
    #         mse_train += mean_squared_error(y_train, train_pred)


    #         f, ax = plt.subplots(2,1, sharey=True, figsize=(30,15))
    #         f.suptitle("Train Reference vs Prediction")
    #         y_train = y_train.reshape(sub_img_shape)
    #         colormap_y = ax[0].imshow(y_train, cmap='cubehelix', vmin=0, vmax=12)
    #         ax[0].set_title('Ground Reference')
    #         ax[1].imshow(train_pred.reshape(sub_img_shape), cmap='cubehelix', vmin=0, vmax=12)
    #         ax[1].set_title('Prediction')

    #         cbar = f.colorbar(colormap_y,
    #                         ax=ax.ravel().tolist(),
    #                         orientation='vertical',
    #                         boundaries=range(11), shrink=.95,extend='max', extendrect=True, drawedges=True, spacing='uniform')
    #         cbar.ax.set_yticklabels(["unlabelled",
    #                 "conifer",
    #                 "ccut",
    #                 "water",
    #                 "broadleaf",
    #                 "shrub",
    #                 "mixed",
    #                 "herb",
    #                 "exposed",
    #                 "river",""],fontsize=20)
    #         plt.savefig(f'{path}/{idx}_train_predvsref')
    #         plt.close()


    #         plt.title("Train Confusion Matrix")
    #         plt.matshow(confmat_train, cmap=plt.cm.Blues, alpha=0.5)
    #         plt.gcf().subplots_adjust(left=.5)
    #         for i in range(confmat_train.shape[0]):
    #             for j in range(confmat_train.shape[1]):
    #                 plt.text(x=j, y=i,
    #                         s=round(confmat_train[i,j],3), fontsize=6, horizontalalignment='center')
    #         plt.xticks(np.arange(10), labels=classes)
    #         plt.yticks(np.arange(10), labels=classes)
    #         plt.tick_params('both', labelsize=8, labelrotation=45)
    #         plt.xlabel('predicted label')
    #         plt.ylabel('reference label', rotation=90)
    #         plt.savefig(f'{path}/{idx}_train_confusion_matrix')
    #         print(f'+w {path}/{idx}_train_confusion_matrix')
    #         plt.close()


    #     score_train = round(train_scores / 4, 3)
    #     mse_train = round(mse_train / 4,  3)

    #     with open(path + "/results.txt", "w") as f:
    #         f.write("Score Test: " + str(score))
    #         f.write("\nScore Train: " +str(score_train))
    #         f.write("\nMSE Test: " + str(mse_test))
    #         f.write("\nMSE Train: " + str(mse_train))
    #         f.write("\nProcessing Times:")
    #         f.write(json.dumps(processing_time, indent=4, separators=(',', ': ')))
    #         f.write("\nOob Score: " + str(clf.oob_score_))
    #         f.write("\nFeature Importance: " + str(clf.feature_importances_))


"""Builds a visualization of the confusion matrix
"""
def build_vis(prediction, y, shape):

    visualization = np.zeros((len(y), 3))
    idx = 0
    try:
        for pixel in zip(prediction, y):

            if int(pixel[0]) and pixel[1]:
                # True Positive
                visualization[idx,] = [0,idx,0]

            elif int(pixel[0]) and not pixel[1]:
                # False Positive
                visualization[idx,] = [idx,0,0]

            elif not int(pixel[0]) and pixel[1]:
                # False Negative
                visualization[idx,] = [idx,idx//2,0]

            elif not int(pixel[0]) and not pixel[1]:
                # True Negative
                visualization[idx, ] = [0,0,idx]
                # visualization[idx, ] = rgb
            else:
                raise Exception("There was a problem predicting the pixel", idx)

            idx += 1
    except:
        # on the final iteration, an error is thrown, but our visualization has
        # been built correctly so let's ignore it for now
        pass

    return visualization.reshape(shape)


"""Automated determination of a binary interpretation of the
pixels within the reference images
"""
def encode_one_hot(target, xs, xl, array=True):
    """encodes the provided dict into a dense numpy array
    of class values.

    Caveats: The result of this encoding is dependant and naive, in that
    any conflicts of pixel labels are not intelligently resolved. For our
    purposes, at least until now, we don't care. If an instance belongs
    to multiple classes, that instance will be considered a member of
    the last class it encounters, ie, the target that comes latest
    in the dictionary
    """
    if array:
        result = np.zeros((xs*xl, len(target)))
    else:
        result = list()

    result = np.zeros((xl * xs))
    reslist = []
    for idx, key in enumerate(target.keys()):
        ones = np.ones((xl * xs))
        s,l,b,tmp = read_binary(f"{reference_data_root}/%s" % target[key])

        # same shape as the raw image
        assert int(s) == int(xs)
        assert int(l) == int(xl)

        # last index is the targets false value
        vals = np.sort(np.unique(tmp))

        # create an array populate with the false value
        t = ones * vals[len(vals) - 1]

        if key == 'water':
            arr = np.not_equal(tmp,t)
        else:
            arr = np.logical_and(tmp,t)
        # at this stage we have an array that has
        # ones where the class exists
        # and zeoes where it doesn't
        _, c = np.unique(arr, return_counts=True)
        reslist.append(c)
        result[arr > 0] = idx+1
        # # How did the caller ask for the data
        # if array:
        #     result[:,idx] = arr
        # else:
        #     result.append((key, arr))

    # vals, counts = np.unique(result, return_counts=True)
    # print(vals)
    # print(counts)
    # print(reslist)
    # print(target.keys())
    return result

"""Use this if you want to show the raw image in the output
"""
def visRGB(s, l, b,X):

    data_r = X.reshape(b, s * l)
    rgb = np.zeros((l, s, 3))

    for i in range(0, 3):
        rgb[:, :, i] = data_r[3 - i, :].reshape((l, s))
    for i in range(0,3):
        rgb[:, :, i] = rescale(rgb[:, :, i])
    del X
    return rgb




if __name__ == "__main__":

   main()
