# Floodplain Predict

**Timm Nawrocki**  
Alaska Center for Conservation Science  
2019-04-20

In [1]:
# -*- coding: utf-8 -*-
# ---------------------------------------------------------------------------
# Floodplain Predict
# Author: Timm Nawrocki, Alaska Center for Conservation Science
# Created on: 2019-04-20
# Usage: Must be executed as a Jupyter Notebook in an Anaconda 3 installation.
# Description: "Floodplain Predict" applies the trained classifier to data in regular point grid format stored in csv files to create a prediction representing the distribution of floodplains.
# ---------------------------------------------------------------------------

## 1. Initialize Environment

In [2]:
# Import packages
import datetime
import matplotlib.pyplot as plot
import numpy as np
import pandas as pd
import os
import seaborn as sns
import time
# Import modules for model selection, cross validation, random forest, and performance from Scikit Learn
from sklearn.ensemble import RandomForestClassifier
from sklearn.externals import joblib

# Set root directory
drive = 'E:/'
root_directory = os.path.join(drive, 'ACCS_Work/Projects/VegetationEcology/BristolBay_Vegetation/Project_GIS/Data_Output')

# Define inputs
subwatershed_path = os.path.join(root_directory, 'prediction_tables')
floodplain_classifier = os.path.join(root_directory, 'model_floodplain/classifier_floodplain.joblib')
threshold_file = os.path.join(root_directory, 'model_floodplain/threshold.txt')

# Define output location
output_path = os.path.join(root_directory, 'model_floodplain/output_tables')

In [3]:
# Define variable sets
classifier_features = ['compoundTopographic', 'elevation', 'exposure', 'heatLoad', 'moisture', 'roughness', 'slope', 'summerWarmth', 'surfaceArea', 'surfaceRelief', 'X05May_ndsi', 'X05May_ndvi', 'X05May_ndwi', 'X06June_4_red', 'X06June_ndsi', 'X06June_ndvi', 'X06June_ndwi', 'X07July_4_red', 'X07July_6_redEdge2', 'X07July_7_redEdge3', 'X07July_8_nearInfrared', 'X07July_11_shortInfrared1', 'X07July_12_shortInfrared2', 'X07July_nbr', 'X07July_ndmi', 'X07July_ndvi', 'X07July_ndwi', 'X08August_ndvi', 'X09September_1_ultraBlue', 'X09September_4_red', 'X09September_11_shortInfrared1', 'X09September_ndvi', 'X09September_ndwi', 'X10October_2_blue', 'X10October_3_green', 'X10October_4_red', 'X10October_5_redEdge1', 'X10October_6_redEdge2', 'X10October_11_shortInfrared1', 'X10October_12_shortInfrared2', 'X10October_nbr', 'X10October_ndmi', 'X10October_ndsi', 'X10October_ndvi', 'X10October_ndwi']
coordinates = ['POINT_X', 'POINT_Y']
predict = ['presence']
output_variables = coordinates + predict

## 2. Prepare data and model

In [4]:
# Define a function to read threshold values from text file
def readThreshold(inFile):
    threshold_reader = open(inFile, "r")
    threshold = threshold_reader.readlines()
    threshold_reader.close()
    outThreshold = float(threshold[0])
    return outThreshold

In [None]:
# Import the trained classifier
classifier = joblib.load(floodplain_classifier)
# Read thresholds from text files in the workspace folder and store as variables
threshold = readThreshold(threshold_file)

In [None]:
# Define list of prediction grids
grid_list = []
for file in os.listdir(subwatershed_path):
    if file.endswith('csv'):
        grid_list.append(os.path.join(subwatershed_path, file))
print('{0} grid tiles will be processed...'.format(len(grid_list)))

53 grid tiles will be processed...


In [None]:
# Loop through the prediction function for all input files
for grid in grid_list:
    output_csv = os.path.join(output_path, os.path.split(grid)[1])
    if os.path.isfile(output_csv) == False:
        # Start timing function execution
        start = time.time()
        print('Predicting grid {0} out of {1}...'.format(str(grid_list.index(grid) + 1), str(len(grid_list))))
        # Define the output csv file
        output_csv = os.path.join(output_path, os.path.split(grid)[1])
        # Load the input data
        input_data = pd.read_csv(grid)
        input_data[classifier_features] = input_data[classifier_features].astype(float)
        # Define the X data
        X_data = input_data[classifier_features]
        # Predict the classifier
        classification = classifier.predict_proba(X_data)
        # Concatenate predicted values to input data frame
        input_data = pd.concat([input_data, pd.DataFrame(classification)], axis=1)
        input_data = input_data.rename(index=int, columns={0: 'absence', 1: 'presence'})
        # Export prediction to csv
        output_data = input_data[output_variables]
        output_data.to_csv(output_csv, header=True, index=False, sep=',', encoding='utf-8')
        # End timing
        end = time.time()
        elapsed = int(end - start)
        success_time = datetime.datetime.now()
        # Report process success
        out_process = 'Succeeded at {0} (Elapsed time: {1})'.format(success_time.strftime("%Y-%m-%d %H:%M"),
                                                                    datetime.timedelta(seconds=elapsed))
        print(out_process)
        print('----------')

Predicting grid 1 out of 53...
Succeeded at 2019-04-20 17:23 (Elapsed time: 0:09:21)
----------
Predicting grid 2 out of 53...
