# Abundance Predict

**Written by Timm Nawrocki**

*Last updated Monday March 29, 2021.*

In [1]:
# -*- coding: utf-8 -*-
# ---------------------------------------------------------------------------
# Predict Vegetation Abundance
# Author: Timm Nawrocki, Alaska Center for Conservation Science
# Created on: 2021-03-29
# Usage: Must be executed as a Jupyter Notebook in an Anaconda 3 installation.
# Description: "Predict Vegetation Abundance" applies the trained classifier and regressor to data in regular point grid
# format stored in csv files to create a composite prediction representing the distribution and proportional abundance of the
# target species.
# ---------------------------------------------------------------------------

In [2]:
# Import os
import os

In [3]:
# Set root directory
root_folder = 'N:/ACCS_Work/Projects/WildlifeEcology/Moose_SouthwestAlaska/Data/Data_Output'
# Define calf status
map_group = 'Calf_RF'

In [4]:
# Define grid folder
grid_folder = os.path.join(root_folder,
                           'extracted_grids')
# Define model folder
model_folder = os.path.join(root_folder,
                           'model_results',
                           map_group)
# Define prediction folder
prediction_folder = os.path.join(root_folder,
                                'predicted_tables',
                                map_group)

In [5]:
# Make output directory if it does not already exist
if os.path.exists(prediction_folder) == 0:
    os.mkdir(prediction_folder)

In [6]:
# Define variable sets
predictor_all = ['elevation_mean', 'roughness_mean', 'forest_edge_mean', 'tundra_edge_mean', 'alnus_mean', 'betshr_mean',
                 'dectre_mean', 'erivag_mean', 'picgla_mean', 'picmar_mean', 'salshr_mean', 'wetsed_mean']
coordinates = ['x', 'y']
absence = ['absence']
presence = ['presence']
prediction = ['prediction']
selection = ['selection']
output_columns = coordinates + selection

In [7]:
# Import packages for file manipulation, data manipulation, and plotting
import glob
import numpy as np
import pandas as pd
# Import Random Forest implementation
from sklearn.ensemble import RandomForestClassifier
# Import joblib
import joblib
# Import timing packages
import time
import datetime

In [8]:
# Define a function to read threshold values from text file
def readThreshold(inFile):
    threshold_reader = open(inFile, "r")
    threshold = threshold_reader.readlines()
    threshold_reader.close()
    outThreshold = float(threshold[0])
    return outThreshold

In [9]:
# Create a function to composite model results
def compositeSelection(input_data, presence, threshold, output):
    # Determine positive and negative ranges
    positive_range = input_data[presence[0]].max() - threshold
    negative_range = threshold - input_data[presence[0]].min()
    # Define a function to threshold presences and absences and standardize values from -1 (avoidance) to 1 (selection)
    def compositeRows(row):
        if row[presence[0]] == threshold:
            return 0
        elif row[presence[0]] > threshold:
            adjusted_value = (row[presence[0]] - threshold) / positive_range
            return adjusted_value
        elif row[presence[0]] < threshold:
            adjusted_value = (row[presence[0]] - threshold) / negative_range
            return adjusted_value
    # Apply function to all rows in data
    input_data[output[0]] = input_data.apply(lambda row: compositeRows(row), axis=1)
    # Return the data frame with composited results
    return input_data

In [10]:
# Import the trained model
classifier = joblib.load(os.path.join(model_folder, 'classifier.joblib'))

In [11]:
# Read thresholds from text files in the workspace folder and store as variables
threshold = readThreshold(os.path.join(model_folder, 'threshold.txt'))

In [12]:
# Create a list of input files for the prediction step
os.chdir(grid_folder)
grid_files = glob.glob('*.csv')
grid_length = len(grid_files)

In [None]:
# Loop through the prediction function for all input files
count = 1
for grid in grid_files:
    # Define the output csv file
    output_csv = os.path.join(prediction_folder, grid)
    
    # Predict output table if it does not already exist
    if os.path.exists(output_csv) == 0:
        total_start = time.time()
        print(f'Predicting grid {count} of {grid_length}...')
    
        # Identify file path to the input csv file
        print('\tLoading grid data into memory...')
        iteration_start = time.time()
        input_csv = os.path.join(grid_folder, grid)
        # Load the input data
        input_data = pd.read_csv(input_csv)
        input_data = input_data.rename(columns={'elevation': 'elevation_mean',
                                               'roughness': 'roughness_mean',
                                               'forest_edge': 'forest_edge_mean',
                                               'tundra_edge': 'tundra_edge_mean',
                                               'alnus': 'alnus_mean',
                                               'betshr': 'betshr_mean',
                                               'dectre': 'dectre_mean',
                                               'erivag': 'erivag_mean',
                                               'picgla': 'picgla_mean',
                                               'picmar': 'picmar_mean',
                                               'salshr': 'salshr_mean',
                                               'wetsed': 'wetsed_mean'})
        input_data = input_data.dropna(axis=0, how='any')
        input_data[predictor_all] = input_data[predictor_all].astype(float)
        # Define the X data
        X_data = input_data[predictor_all].astype(float)
        iteration_end = time.time()
        iteration_elapsed = int(iteration_end - iteration_start)
        iteration_success_time = datetime.datetime.now()
        print(f'\tCompleted at {iteration_success_time.strftime("%Y-%m-%d %H:%M")} (Elapsed time: {datetime.timedelta(seconds=iteration_elapsed)})')
        print('\t----------')
    
        # Predict the classifier
        print('\tClassifying presence-absence...')
        iteration_start = time.time()
        classification = classifier.predict_proba(X_data)
        # Concatenate predicted values to input data frame
        input_data = input_data.assign(absence = classification[:,0])
        input_data = input_data.assign(presence = classification[:,1])
        iteration_end = time.time()
        iteration_elapsed = int(iteration_end - iteration_start)
        iteration_success_time = datetime.datetime.now()
        print(f'\tCompleted at {iteration_success_time.strftime("%Y-%m-%d %H:%M")} (Elapsed time: {datetime.timedelta(seconds=iteration_elapsed)})')
        print('\t----------')
    
        # Convert to selection
        print('\tExporting results...')
        iteration_start = time.time()
        input_data = compositeSelection(input_data, presence, threshold, selection)
        # Export prediction to csv
        output_data = input_data[output_columns]
        output_data.to_csv(output_csv, header=True, index=False, sep=',', encoding='utf-8')
        iteration_end = time.time()
        iteration_elapsed = int(iteration_end - iteration_start)
        iteration_success_time = datetime.datetime.now()
        print(f'\tCompleted at {iteration_success_time.strftime("%Y-%m-%d %H:%M")} (Elapsed time: {datetime.timedelta(seconds=iteration_elapsed)})')
        print('\t----------')
    
        total_end = time.time()
        total_elapsed = int(total_end - total_start)
        total_success_time = datetime.datetime.now()
        print(f'Iteration completed at {total_success_time.strftime("%Y-%m-%d %H:%M")} (Elapsed time: {datetime.timedelta(seconds=total_elapsed)})')
        print('----------')
    
    else:
        print(f'Grid {count} of {grid_length} already predicted.')
        print('----------')
    
    # Increase counter
    count += 1

Predicting grid 1 of 744...
	Loading grid data into memory...
	Completed at 2021-03-29 21:28 (Elapsed time: 0:00:01)
	----------
	Classifying presence-absence...
	Completed at 2021-03-29 21:28 (Elapsed time: 0:00:24)
	----------
	Exporting results...
	Completed at 2021-03-29 21:29 (Elapsed time: 0:00:14)
	----------
Iteration completed at 2021-03-29 21:29 (Elapsed time: 0:00:41)
----------
Predicting grid 2 of 744...
	Loading grid data into memory...
	Completed at 2021-03-29 21:29 (Elapsed time: 0:00:01)
	----------
	Classifying presence-absence...
	Completed at 2021-03-29 21:29 (Elapsed time: 0:00:26)
	----------
	Exporting results...
	Completed at 2021-03-29 21:29 (Elapsed time: 0:00:15)
	----------
Iteration completed at 2021-03-29 21:29 (Elapsed time: 0:00:43)
----------
Predicting grid 3 of 744...
	Loading grid data into memory...
	Completed at 2021-03-29 21:29 (Elapsed time: 0:00:01)
	----------
	Classifying presence-absence...
	Completed at 2021-03-29 21:30 (Elapsed time: 0:00:2