## ACCURACY ASSESSMENT

- Accuracy assessment will be performed via confusion matrix
- Through analysis of true negative, true positive, false negative, and false positive values, a ranking of models will be created
- Models are all Huff Models, but with different alpha values (1.5, 1.75, and 2)

In [42]:
# Library for data manipulation and analysis
import pandas as pd

# Library for working with ArcGIS tools and functionalities
import arcpy
import arcgis

# Libraries for working with PostgreSQL
import psycopg2
from psycopg2 import sql

# Library for handling JSON data
import json

# Library for making HTTP requests and interacting with web services
import requests

# Library for interacting with the operating system, managing file paths, and executing system commands
import os

# Library for handling warnings generated during code execution
import warnings

# Library for displaying images in Jupyter Notebooks
from IPython.display import Image

# Library for generating random numbers
import random

# Library for handling zip files and working with IO streams
import zipfile
import io

# Library for working with dates
from datetime import date

import urllib.request

In [43]:
#path to local database
file_gdb = r"F:\1. UMN MGIS\1. Semesters\4th Semester\1. ArcGIS II\4. Project\ArcPro Files\ArcII_Final Project\ArcII_Final Project.gdb"

In [44]:
sim_fields = ['City','TP','TN','FP','FN']

# Use BMSB "ground" data to find cities which already have BMSB presence
def confusion_matrix(bmsb_data, sim_data, bmsb_fields, sim_fields):
    
    print('Creating confusion matrix...')
    
    # Create a list of cities with BMSB presence from the ground truth data
    bmsb_presence_cities = []
    with arcpy.da.SearchCursor(in_table = bmsb_data, field_names = bmsb_fields) as s_cursor:
        for row in s_cursor:
            bmsb_presence_cities.append(row[0].lower())
    
    # Cycle through each sim
    for i in range(0,100):
        sim_fields.append('BMSB_Sim' + str(i))
        
    # Update confusion matrix fields
    with arcpy.da.UpdateCursor(in_table = sim_data, field_names = sim_fields) as u_cursor:
        for row in u_cursor:
                
            # Reset numbers with each row
            TP = 0
            TN = 0
            FP = 0
            FN = 0
            
            # Iterate through each simulation
            for i in range(5,105):

                # Set all confusion matrix values to zero initially
                row[1] = 0
                row[2] = 0
                row[3] = 0
                row[4] = 0
                
                # If sim and ground presence: TP; if no sim and ground presence: TN; if no sim presence, but ground presence: FN; if sim presence, but no ground presence: FP
                if row[0].lower() in bmsb_presence_cities and row[i] == 1:
                    TP += 1
                elif row[0].lower() not in bmsb_presence_cities and row[i] == 0:
                    TN += 1
                elif row[0].lower() not in bmsb_presence_cities and row[i] == 1:
                    FP += 1
                elif row[0].lower() in bmsb_presence_cities and row[i] == 0:
                    FN += 1
                
                # Add values to confusion matrix
                row[1] = TP
                row[2] = TN
                row[3] = FP
                row[4] = FN
                    
            print(f'{row[0]}: TP - {row[1]} | TN - {row[2]} | FP - {row[3]} | FN - {row[4]}')
            
            u_cursor.updateRow(row)
         
    print('Done')

#### Calling the Confusion Matrix with different alpha values

In [45]:
# Confusion matrix for alpha = 1.5
confusion_matrix(
    bmsb_data = os.path.join(file_gdb,'BMSBDataClean_Filtered'), 
    sim_data = 'MN_Cities_15', 
    bmsb_fields = ['City'], 
    sim_fields = ['City','TP','TN','FP','FN']
)

Creating confusion matrix...
Ada: TP - 0 | TN - 99 | FP - 1 | FN - 0
Adams: TP - 0 | TN - 99 | FP - 1 | FN - 0
Adrian: TP - 0 | TN - 100 | FP - 0 | FN - 0
Afton: TP - 0 | TN - 52 | FP - 48 | FN - 0
Aitkin: TP - 0 | TN - 93 | FP - 7 | FN - 0
Akeley: TP - 0 | TN - 98 | FP - 2 | FN - 0
Albany: TP - 0 | TN - 92 | FP - 8 | FN - 0
Albert Lea: TP - 0 | TN - 63 | FP - 37 | FN - 0
Alberta: TP - 0 | TN - 100 | FP - 0 | FN - 0
Albertville: TP - 0 | TN - 24 | FP - 76 | FN - 0
Alden: TP - 0 | TN - 100 | FP - 0 | FN - 0
Aldrich: TP - 0 | TN - 100 | FP - 0 | FN - 0
Alexandria: TP - 0 | TN - 82 | FP - 18 | FN - 0
Alpha: TP - 0 | TN - 99 | FP - 1 | FN - 0
Altura: TP - 0 | TN - 98 | FP - 2 | FN - 0
Alvarado: TP - 0 | TN - 100 | FP - 0 | FN - 0
Amboy: TP - 0 | TN - 99 | FP - 1 | FN - 0
Andover: TP - 0 | TN - 0 | FP - 100 | FN - 0
Annandale: TP - 0 | TN - 83 | FP - 17 | FN - 0
Anoka: TP - 0 | TN - 0 | FP - 100 | FN - 0
Apple Valley: TP - 0 | TN - 0 | FP - 100 | FN - 0
Appleton: TP - 0 | TN - 99 | FP - 1 |

Crystal: TP - 0 | TN - 0 | FP - 100 | FN - 0
Currie: TP - 0 | TN - 100 | FP - 0 | FN - 0
Cuyuna: TP - 0 | TN - 99 | FP - 1 | FN - 0
Cyrus: TP - 0 | TN - 100 | FP - 0 | FN - 0
Dakota: TP - 0 | TN - 100 | FP - 0 | FN - 0
Dalton: TP - 0 | TN - 100 | FP - 0 | FN - 0
Danube: TP - 0 | TN - 99 | FP - 1 | FN - 0
Danvers: TP - 0 | TN - 100 | FP - 0 | FN - 0
Darfur: TP - 0 | TN - 100 | FP - 0 | FN - 0
Darwin: TP - 0 | TN - 100 | FP - 0 | FN - 0
Dassel: TP - 0 | TN - 93 | FP - 7 | FN - 0
Dawson: TP - 0 | TN - 95 | FP - 5 | FN - 0
Dayton: TP - 0 | TN - 5 | FP - 95 | FN - 0
De Graff: TP - 0 | TN - 100 | FP - 0 | FN - 0
Deephaven: TP - 0 | TN - 17 | FP - 83 | FN - 0
Deer Creek: TP - 0 | TN - 100 | FP - 0 | FN - 0
Deer River: TP - 0 | TN - 100 | FP - 0 | FN - 0
Deerwood: TP - 0 | TN - 99 | FP - 1 | FN - 0
Delano: TP - 0 | TN - 36 | FP - 64 | FN - 0
Delavan: TP - 0 | TN - 100 | FP - 0 | FN - 0
Delhi: TP - 0 | TN - 100 | FP - 0 | FN - 0
Dellwood: TP - 22 | TN - 0 | FP - 0 | FN - 78
Denham: TP - 0 | TN 

Herman: TP - 0 | TN - 100 | FP - 0 | FN - 0
Hermantown: TP - 0 | TN - 75 | FP - 25 | FN - 0
Heron Lake: TP - 0 | TN - 100 | FP - 0 | FN - 0
Hewitt: TP - 0 | TN - 100 | FP - 0 | FN - 0
Hibbing: TP - 0 | TN - 83 | FP - 17 | FN - 0
Hill City: TP - 0 | TN - 100 | FP - 0 | FN - 0
Hillman: TP - 0 | TN - 100 | FP - 0 | FN - 0
Hills: TP - 0 | TN - 100 | FP - 0 | FN - 0
Hilltop: TP - 0 | TN - 29 | FP - 71 | FN - 0
Hinckley: TP - 0 | TN - 97 | FP - 3 | FN - 0
Hitterdal: TP - 0 | TN - 100 | FP - 0 | FN - 0
Hoffman: TP - 0 | TN - 100 | FP - 0 | FN - 0
Hokah: TP - 0 | TN - 99 | FP - 1 | FN - 0
Holdingford: TP - 0 | TN - 100 | FP - 0 | FN - 0
Holland: TP - 0 | TN - 99 | FP - 1 | FN - 0
Hollandale: TP - 0 | TN - 98 | FP - 2 | FN - 0
Holloway: TP - 0 | TN - 100 | FP - 0 | FN - 0
Holt: TP - 0 | TN - 100 | FP - 0 | FN - 0
Hopkins: TP - 0 | TN - 0 | FP - 100 | FN - 0
Houston: TP - 0 | TN - 99 | FP - 1 | FN - 0
Howard Lake: TP - 0 | TN - 90 | FP - 10 | FN - 0
Hoyt Lakes: TP - 0 | TN - 99 | FP - 1 | FN - 0

Monticello: TP - 0 | TN - 22 | FP - 78 | FN - 0
Montrose: TP - 0 | TN - 66 | FP - 34 | FN - 0
Moorhead: TP - 0 | TN - 75 | FP - 25 | FN - 0
Moose Lake: TP - 0 | TN - 92 | FP - 8 | FN - 0
Mora: TP - 0 | TN - 89 | FP - 11 | FN - 0
Morgan: TP - 0 | TN - 99 | FP - 1 | FN - 0
Morris: TP - 0 | TN - 95 | FP - 5 | FN - 0
Morristown: TP - 0 | TN - 97 | FP - 3 | FN - 0
Morton: TP - 0 | TN - 98 | FP - 2 | FN - 0
Motley: TP - 0 | TN - 100 | FP - 0 | FN - 0
Mound: TP - 0 | TN - 15 | FP - 85 | FN - 0
Mounds View: TP - 0 | TN - 0 | FP - 100 | FN - 0
Mountain Iron: TP - 0 | TN - 99 | FP - 1 | FN - 0
Mountain Lake: TP - 0 | TN - 96 | FP - 4 | FN - 0
Murdock: TP - 0 | TN - 100 | FP - 0 | FN - 0
Myrtle: TP - 0 | TN - 100 | FP - 0 | FN - 0
Nashua: TP - 0 | TN - 100 | FP - 0 | FN - 0
Nashwauk: TP - 0 | TN - 100 | FP - 0 | FN - 0
Nassau: TP - 0 | TN - 100 | FP - 0 | FN - 0
Nelson: TP - 0 | TN - 100 | FP - 0 | FN - 0
Nerstrand: TP - 0 | TN - 98 | FP - 2 | FN - 0
Nevis: TP - 0 | TN - 99 | FP - 1 | FN - 0
New 

Seaforth: TP - 0 | TN - 100 | FP - 0 | FN - 0
Sebeka: TP - 0 | TN - 99 | FP - 1 | FN - 0
Sedan: TP - 0 | TN - 100 | FP - 0 | FN - 0
Shafer: TP - 0 | TN - 88 | FP - 12 | FN - 0
Shakopee: TP - 0 | TN - 0 | FP - 100 | FN - 0
Shelly: TP - 0 | TN - 100 | FP - 0 | FN - 0
Sherburn: TP - 0 | TN - 98 | FP - 2 | FN - 0
Shevlin: TP - 0 | TN - 100 | FP - 0 | FN - 0
Shoreview: TP - 100 | TN - 0 | FP - 0 | FN - 0
Shorewood: TP - 0 | TN - 12 | FP - 88 | FN - 0
Silver Bay: TP - 0 | TN - 98 | FP - 2 | FN - 0
Silver Lake: TP - 0 | TN - 95 | FP - 5 | FN - 0
Skyline: TP - 0 | TN - 100 | FP - 0 | FN - 0
Slayton: TP - 0 | TN - 98 | FP - 2 | FN - 0
Sleepy Eye: TP - 0 | TN - 93 | FP - 7 | FN - 0
Sobieski: TP - 0 | TN - 99 | FP - 1 | FN - 0
Solway: TP - 0 | TN - 100 | FP - 0 | FN - 0
South Haven: TP - 0 | TN - 100 | FP - 0 | FN - 0
South Saint Paul: TP - 0 | TN - 0 | FP - 100 | FN - 0
Spicer: TP - 0 | TN - 97 | FP - 3 | FN - 0
Spring Grove: TP - 0 | TN - 99 | FP - 1 | FN - 0
Spring Hill: TP - 0 | TN - 99 | FP 

In [46]:
# Confusion matrix for alpha = 1.75
confusion_matrix(
    bmsb_data = os.path.join(file_gdb,'BMSBDataClean_Filtered'), 
    sim_data = os.path.join(file_gdb,'MN_Cities_175'), 
    bmsb_fields = ['City'], 
    sim_fields = ['City','TP','TN','FP','FN']
)

Creating confusion matrix...
Ada: TP - 0 | TN - 100 | FP - 0 | FN - 0
Adams: TP - 0 | TN - 99 | FP - 1 | FN - 0
Adrian: TP - 0 | TN - 99 | FP - 1 | FN - 0
Afton: TP - 0 | TN - 59 | FP - 41 | FN - 0
Aitkin: TP - 0 | TN - 97 | FP - 3 | FN - 0
Akeley: TP - 0 | TN - 100 | FP - 0 | FN - 0
Albany: TP - 0 | TN - 96 | FP - 4 | FN - 0
Albert Lea: TP - 0 | TN - 81 | FP - 19 | FN - 0
Alberta: TP - 0 | TN - 100 | FP - 0 | FN - 0
Albertville: TP - 0 | TN - 32 | FP - 68 | FN - 0
Alden: TP - 0 | TN - 99 | FP - 1 | FN - 0
Aldrich: TP - 0 | TN - 100 | FP - 0 | FN - 0
Alexandria: TP - 0 | TN - 94 | FP - 6 | FN - 0
Alpha: TP - 0 | TN - 100 | FP - 0 | FN - 0
Altura: TP - 0 | TN - 100 | FP - 0 | FN - 0
Alvarado: TP - 0 | TN - 100 | FP - 0 | FN - 0
Amboy: TP - 0 | TN - 99 | FP - 1 | FN - 0
Andover: TP - 0 | TN - 0 | FP - 100 | FN - 0
Annandale: TP - 0 | TN - 92 | FP - 8 | FN - 0
Anoka: TP - 0 | TN - 1 | FP - 99 | FN - 0
Apple Valley: TP - 0 | TN - 0 | FP - 100 | FN - 0
Appleton: TP - 0 | TN - 98 | FP - 2 | 

Crystal: TP - 0 | TN - 0 | FP - 100 | FN - 0
Currie: TP - 0 | TN - 100 | FP - 0 | FN - 0
Cuyuna: TP - 0 | TN - 99 | FP - 1 | FN - 0
Cyrus: TP - 0 | TN - 100 | FP - 0 | FN - 0
Dakota: TP - 0 | TN - 100 | FP - 0 | FN - 0
Dalton: TP - 0 | TN - 100 | FP - 0 | FN - 0
Danube: TP - 0 | TN - 99 | FP - 1 | FN - 0
Danvers: TP - 0 | TN - 100 | FP - 0 | FN - 0
Darfur: TP - 0 | TN - 100 | FP - 0 | FN - 0
Darwin: TP - 0 | TN - 100 | FP - 0 | FN - 0
Dassel: TP - 0 | TN - 94 | FP - 6 | FN - 0
Dawson: TP - 0 | TN - 97 | FP - 3 | FN - 0
Dayton: TP - 0 | TN - 16 | FP - 84 | FN - 0
De Graff: TP - 0 | TN - 100 | FP - 0 | FN - 0
Deephaven: TP - 0 | TN - 25 | FP - 75 | FN - 0
Deer Creek: TP - 0 | TN - 100 | FP - 0 | FN - 0
Deer River: TP - 0 | TN - 98 | FP - 2 | FN - 0
Deerwood: TP - 0 | TN - 98 | FP - 2 | FN - 0
Delano: TP - 0 | TN - 42 | FP - 58 | FN - 0
Delavan: TP - 0 | TN - 99 | FP - 1 | FN - 0
Delhi: TP - 0 | TN - 100 | FP - 0 | FN - 0
Dellwood: TP - 33 | TN - 0 | FP - 0 | FN - 67
Denham: TP - 0 | TN -

Herman: TP - 0 | TN - 100 | FP - 0 | FN - 0
Hermantown: TP - 0 | TN - 88 | FP - 12 | FN - 0
Heron Lake: TP - 0 | TN - 100 | FP - 0 | FN - 0
Hewitt: TP - 0 | TN - 100 | FP - 0 | FN - 0
Hibbing: TP - 0 | TN - 95 | FP - 5 | FN - 0
Hill City: TP - 0 | TN - 100 | FP - 0 | FN - 0
Hillman: TP - 0 | TN - 100 | FP - 0 | FN - 0
Hills: TP - 0 | TN - 100 | FP - 0 | FN - 0
Hilltop: TP - 0 | TN - 24 | FP - 76 | FN - 0
Hinckley: TP - 0 | TN - 98 | FP - 2 | FN - 0
Hitterdal: TP - 0 | TN - 100 | FP - 0 | FN - 0
Hoffman: TP - 0 | TN - 99 | FP - 1 | FN - 0
Hokah: TP - 0 | TN - 99 | FP - 1 | FN - 0
Holdingford: TP - 0 | TN - 100 | FP - 0 | FN - 0
Holland: TP - 0 | TN - 100 | FP - 0 | FN - 0
Hollandale: TP - 0 | TN - 99 | FP - 1 | FN - 0
Holloway: TP - 0 | TN - 99 | FP - 1 | FN - 0
Holt: TP - 0 | TN - 100 | FP - 0 | FN - 0
Hopkins: TP - 0 | TN - 0 | FP - 100 | FN - 0
Houston: TP - 0 | TN - 98 | FP - 2 | FN - 0
Howard Lake: TP - 0 | TN - 88 | FP - 12 | FN - 0
Hoyt Lakes: TP - 0 | TN - 100 | FP - 0 | FN - 0


Monticello: TP - 0 | TN - 39 | FP - 61 | FN - 0
Montrose: TP - 0 | TN - 75 | FP - 25 | FN - 0
Moorhead: TP - 0 | TN - 85 | FP - 15 | FN - 0
Moose Lake: TP - 0 | TN - 97 | FP - 3 | FN - 0
Mora: TP - 0 | TN - 91 | FP - 9 | FN - 0
Morgan: TP - 0 | TN - 100 | FP - 0 | FN - 0
Morris: TP - 0 | TN - 98 | FP - 2 | FN - 0
Morristown: TP - 0 | TN - 99 | FP - 1 | FN - 0
Morton: TP - 0 | TN - 100 | FP - 0 | FN - 0
Motley: TP - 0 | TN - 100 | FP - 0 | FN - 0
Mound: TP - 0 | TN - 10 | FP - 90 | FN - 0
Mounds View: TP - 0 | TN - 0 | FP - 100 | FN - 0
Mountain Iron: TP - 0 | TN - 98 | FP - 2 | FN - 0
Mountain Lake: TP - 0 | TN - 99 | FP - 1 | FN - 0
Murdock: TP - 0 | TN - 100 | FP - 0 | FN - 0
Myrtle: TP - 0 | TN - 100 | FP - 0 | FN - 0
Nashua: TP - 0 | TN - 100 | FP - 0 | FN - 0
Nashwauk: TP - 0 | TN - 100 | FP - 0 | FN - 0
Nassau: TP - 0 | TN - 100 | FP - 0 | FN - 0
Nelson: TP - 0 | TN - 100 | FP - 0 | FN - 0
Nerstrand: TP - 0 | TN - 95 | FP - 5 | FN - 0
Nevis: TP - 0 | TN - 99 | FP - 1 | FN - 0
New

Seaforth: TP - 0 | TN - 100 | FP - 0 | FN - 0
Sebeka: TP - 0 | TN - 100 | FP - 0 | FN - 0
Sedan: TP - 0 | TN - 100 | FP - 0 | FN - 0
Shafer: TP - 0 | TN - 97 | FP - 3 | FN - 0
Shakopee: TP - 0 | TN - 0 | FP - 100 | FN - 0
Shelly: TP - 0 | TN - 100 | FP - 0 | FN - 0
Sherburn: TP - 0 | TN - 99 | FP - 1 | FN - 0
Shevlin: TP - 0 | TN - 100 | FP - 0 | FN - 0
Shoreview: TP - 100 | TN - 0 | FP - 0 | FN - 0
Shorewood: TP - 0 | TN - 11 | FP - 89 | FN - 0
Silver Bay: TP - 0 | TN - 100 | FP - 0 | FN - 0
Silver Lake: TP - 0 | TN - 96 | FP - 4 | FN - 0
Skyline: TP - 0 | TN - 99 | FP - 1 | FN - 0
Slayton: TP - 0 | TN - 99 | FP - 1 | FN - 0
Sleepy Eye: TP - 0 | TN - 93 | FP - 7 | FN - 0
Sobieski: TP - 0 | TN - 100 | FP - 0 | FN - 0
Solway: TP - 0 | TN - 100 | FP - 0 | FN - 0
South Haven: TP - 0 | TN - 100 | FP - 0 | FN - 0
South Saint Paul: TP - 0 | TN - 0 | FP - 100 | FN - 0
Spicer: TP - 0 | TN - 97 | FP - 3 | FN - 0
Spring Grove: TP - 0 | TN - 99 | FP - 1 | FN - 0
Spring Hill: TP - 0 | TN - 100 | F

In [67]:
# Confusion matrix for alpha = 2
confusion_matrix(
    bmsb_data = os.path.join(file_gdb,'BMSBDataClean_Filtered'), 
    sim_data = os.path.join(file_gdb,'MN_Cities_2'), 
    bmsb_fields = ['City'], 
    sim_fields = ['City','TP','TN','FP','FN']
)

Creating confusion matrix...
Ada: TP - 0 | TN - 99 | FP - 1 | FN - 0
Adams: TP - 0 | TN - 100 | FP - 0 | FN - 0
Adrian: TP - 0 | TN - 100 | FP - 0 | FN - 0
Afton: TP - 0 | TN - 61 | FP - 39 | FN - 0
Aitkin: TP - 0 | TN - 96 | FP - 4 | FN - 0
Akeley: TP - 0 | TN - 99 | FP - 1 | FN - 0
Albany: TP - 0 | TN - 96 | FP - 4 | FN - 0
Albert Lea: TP - 0 | TN - 82 | FP - 18 | FN - 0
Alberta: TP - 0 | TN - 100 | FP - 0 | FN - 0
Albertville: TP - 0 | TN - 26 | FP - 74 | FN - 0
Alden: TP - 0 | TN - 100 | FP - 0 | FN - 0
Aldrich: TP - 0 | TN - 100 | FP - 0 | FN - 0
Alexandria: TP - 0 | TN - 93 | FP - 7 | FN - 0
Alpha: TP - 0 | TN - 100 | FP - 0 | FN - 0
Altura: TP - 0 | TN - 100 | FP - 0 | FN - 0
Alvarado: TP - 0 | TN - 100 | FP - 0 | FN - 0
Amboy: TP - 0 | TN - 99 | FP - 1 | FN - 0
Andover: TP - 0 | TN - 0 | FP - 100 | FN - 0
Annandale: TP - 0 | TN - 86 | FP - 14 | FN - 0
Anoka: TP - 0 | TN - 0 | FP - 100 | FN - 0
Apple Valley: TP - 0 | TN - 0 | FP - 100 | FN - 0
Appleton: TP - 0 | TN - 99 | FP - 1

Crystal: TP - 0 | TN - 0 | FP - 100 | FN - 0
Currie: TP - 0 | TN - 100 | FP - 0 | FN - 0
Cuyuna: TP - 0 | TN - 99 | FP - 1 | FN - 0
Cyrus: TP - 0 | TN - 100 | FP - 0 | FN - 0
Dakota: TP - 0 | TN - 100 | FP - 0 | FN - 0
Dalton: TP - 0 | TN - 100 | FP - 0 | FN - 0
Danube: TP - 0 | TN - 100 | FP - 0 | FN - 0
Danvers: TP - 0 | TN - 100 | FP - 0 | FN - 0
Darfur: TP - 0 | TN - 100 | FP - 0 | FN - 0
Darwin: TP - 0 | TN - 100 | FP - 0 | FN - 0
Dassel: TP - 0 | TN - 99 | FP - 1 | FN - 0
Dawson: TP - 0 | TN - 98 | FP - 2 | FN - 0
Dayton: TP - 0 | TN - 8 | FP - 92 | FN - 0
De Graff: TP - 0 | TN - 100 | FP - 0 | FN - 0
Deephaven: TP - 0 | TN - 23 | FP - 77 | FN - 0
Deer Creek: TP - 0 | TN - 100 | FP - 0 | FN - 0
Deer River: TP - 0 | TN - 100 | FP - 0 | FN - 0
Deerwood: TP - 0 | TN - 100 | FP - 0 | FN - 0
Delano: TP - 0 | TN - 54 | FP - 46 | FN - 0
Delavan: TP - 0 | TN - 100 | FP - 0 | FN - 0
Delhi: TP - 0 | TN - 100 | FP - 0 | FN - 0
Dellwood: TP - 28 | TN - 0 | FP - 0 | FN - 72
Denham: TP - 0 | T

Herman: TP - 0 | TN - 100 | FP - 0 | FN - 0
Hermantown: TP - 0 | TN - 76 | FP - 24 | FN - 0
Heron Lake: TP - 0 | TN - 100 | FP - 0 | FN - 0
Hewitt: TP - 0 | TN - 100 | FP - 0 | FN - 0
Hibbing: TP - 0 | TN - 94 | FP - 6 | FN - 0
Hill City: TP - 0 | TN - 100 | FP - 0 | FN - 0
Hillman: TP - 0 | TN - 100 | FP - 0 | FN - 0
Hills: TP - 0 | TN - 100 | FP - 0 | FN - 0
Hilltop: TP - 0 | TN - 18 | FP - 82 | FN - 0
Hinckley: TP - 0 | TN - 94 | FP - 6 | FN - 0
Hitterdal: TP - 0 | TN - 100 | FP - 0 | FN - 0
Hoffman: TP - 0 | TN - 99 | FP - 1 | FN - 0
Hokah: TP - 0 | TN - 100 | FP - 0 | FN - 0
Holdingford: TP - 0 | TN - 99 | FP - 1 | FN - 0
Holland: TP - 0 | TN - 100 | FP - 0 | FN - 0
Hollandale: TP - 0 | TN - 100 | FP - 0 | FN - 0
Holloway: TP - 0 | TN - 100 | FP - 0 | FN - 0
Holt: TP - 0 | TN - 100 | FP - 0 | FN - 0
Hopkins: TP - 0 | TN - 0 | FP - 100 | FN - 0
Houston: TP - 0 | TN - 99 | FP - 1 | FN - 0
Howard Lake: TP - 0 | TN - 88 | FP - 12 | FN - 0
Hoyt Lakes: TP - 0 | TN - 98 | FP - 2 | FN - 0

Monticello: TP - 0 | TN - 37 | FP - 63 | FN - 0
Montrose: TP - 0 | TN - 78 | FP - 22 | FN - 0
Moorhead: TP - 0 | TN - 90 | FP - 10 | FN - 0
Moose Lake: TP - 0 | TN - 99 | FP - 1 | FN - 0
Mora: TP - 0 | TN - 93 | FP - 7 | FN - 0
Morgan: TP - 0 | TN - 96 | FP - 4 | FN - 0
Morris: TP - 0 | TN - 97 | FP - 3 | FN - 0
Morristown: TP - 0 | TN - 98 | FP - 2 | FN - 0
Morton: TP - 0 | TN - 98 | FP - 2 | FN - 0
Motley: TP - 0 | TN - 99 | FP - 1 | FN - 0
Mound: TP - 0 | TN - 20 | FP - 80 | FN - 0
Mounds View: TP - 0 | TN - 0 | FP - 100 | FN - 0
Mountain Iron: TP - 0 | TN - 100 | FP - 0 | FN - 0
Mountain Lake: TP - 0 | TN - 98 | FP - 2 | FN - 0
Murdock: TP - 0 | TN - 100 | FP - 0 | FN - 0
Myrtle: TP - 0 | TN - 100 | FP - 0 | FN - 0
Nashua: TP - 0 | TN - 100 | FP - 0 | FN - 0
Nashwauk: TP - 0 | TN - 99 | FP - 1 | FN - 0
Nassau: TP - 0 | TN - 100 | FP - 0 | FN - 0
Nelson: TP - 0 | TN - 100 | FP - 0 | FN - 0
Nerstrand: TP - 0 | TN - 100 | FP - 0 | FN - 0
Nevis: TP - 0 | TN - 99 | FP - 1 | FN - 0
New A

Seaforth: TP - 0 | TN - 100 | FP - 0 | FN - 0
Sebeka: TP - 0 | TN - 100 | FP - 0 | FN - 0
Sedan: TP - 0 | TN - 100 | FP - 0 | FN - 0
Shafer: TP - 0 | TN - 96 | FP - 4 | FN - 0
Shakopee: TP - 0 | TN - 0 | FP - 100 | FN - 0
Shelly: TP - 0 | TN - 100 | FP - 0 | FN - 0
Sherburn: TP - 0 | TN - 100 | FP - 0 | FN - 0
Shevlin: TP - 0 | TN - 100 | FP - 0 | FN - 0
Shoreview: TP - 100 | TN - 0 | FP - 0 | FN - 0
Shorewood: TP - 0 | TN - 12 | FP - 88 | FN - 0
Silver Bay: TP - 0 | TN - 99 | FP - 1 | FN - 0
Silver Lake: TP - 0 | TN - 99 | FP - 1 | FN - 0
Skyline: TP - 0 | TN - 99 | FP - 1 | FN - 0
Slayton: TP - 0 | TN - 98 | FP - 2 | FN - 0
Sleepy Eye: TP - 0 | TN - 94 | FP - 6 | FN - 0
Sobieski: TP - 0 | TN - 100 | FP - 0 | FN - 0
Solway: TP - 0 | TN - 100 | FP - 0 | FN - 0
South Haven: TP - 0 | TN - 98 | FP - 2 | FN - 0
South Saint Paul: TP - 0 | TN - 0 | FP - 100 | FN - 0
Spicer: TP - 0 | TN - 99 | FP - 1 | FN - 0
Spring Grove: TP - 0 | TN - 100 | FP - 0 | FN - 0
Spring Hill: TP - 0 | TN - 100 | F

In [62]:
# calculateing the presence of BMSB based on the counts of TP and FP 

def presence(in_table, TP_field, FP_field, presence_field):
    
    print('Calculating presence...')
    
    # Find number of true positives or false positives (depending on what was found)
    # basically a conditional statement based on TP and FP counts
    arcpy.management.CalculateField(
        in_table = in_table,
        field = presence_field,
        expression = f'presence(!{TP_field}!,!{FP_field}!)', 
        expression_type = 'PYTHON3',
        code_block = '''def presence(TP, FP): #  presence value is calculated for each city and updated
            if TP == 0:
                return FP/100
            if FP == 0:
                return TP/100
        '''
    )
    
    print('Done')

In [63]:
# Presence calculation for alpha = 1.5
presence(
    'MN_Cities_15', 
    'TP', 
    'FP', 
    'Presence'
)

Calculating presence...
Done


In [64]:
# Presence calculation for alpha = 1.75
presence(
    os.path.join(file_gdb,'MN_Cities_175'), 
    'TP', 
    'FP', 
    'Presence'
)

Calculating presence...
Done


In [68]:
# Presence calculation for alpha = 2
presence(
    os.path.join(file_gdb,'MN_Cities_2'), 
    'TP', 
    'FP', 
    'Presence'
)

Calculating presence...
Done


## FIND BEST MODEL

- In this case, the confusion matrix accuracy and RMSE values will be used to find the best model

In [60]:
def accuracy(in_table, TP_field, TN_field, FP_field, FN_field, accuracy_field):
    
    print('Calculating accuracy...')
    
    arcpy.management.CalculateField(
        in_table = in_table,
        field = accuracy_field,
        expression = f'(!{TP_field}! + !{TN_field}!) / (!{TP_field}! + !{TN_field}! + !{FP_field}! + !{FN_field}!)'
    )
    
    print('Done')

In [53]:
accuracy(
    'MN_Cities_15', 
    'TP', 
    'TN', 
    'FP', 
    'FN', 
    'Accuracy'
)

Calculating accuracy...
Done


In [54]:
accuracy(
    os.path.join(file_gdb,'MN_Cities_175'), 
    'TP', 
    'TN', 
    'FP', 
    'FN', 
    'Accuracy'
)

Calculating accuracy...
Done


In [69]:
accuracy(
    os.path.join(file_gdb,'MN_Cities_2'), 
    'TP', 
    'TN', 
    'FP', 
    'FN', 
    'Accuracy'
)

Calculating accuracy...
Done


###  Determine which model is better!!

* Mean Accuracy Value indicates the average accuracy of the model in predicting the presence or absence of BMSB. HIGHER the better.
* Mean True Positive Value represents the average number of true positive predictions, i.e., the average number of correct predictions of BMSB presence. Again HIGHER is desirable.
* Mean False Positive Value represents the average number of false positive predictions, i.e., the average number of incorrect predictions of BMSB presence when it is actually absent. LOWER number is desirable 

In [70]:
def confusion_matrix_accuracy(in_table, accuracy_field, TP_field, FP_field):
    
    print('Creating confusion matrix accuracy statistics table...')
    
    # Create tables to find mean and median accuracy, true positive, and false positive values
    arcpy.analysis.Statistics(
        in_table = in_table,
        out_table = in_table + '_stats',
        statistics_fields = [[accuracy_field,'MEAN'],[TP_field,'MEAN'],[FP_field,'MEAN']]
    )
    
    with arcpy.da.SearchCursor(in_table = in_table + '_stats', field_names = ['MEAN_' + accuracy_field,'MEAN_' + TP_field,'MEAN_' + FP_field]) as cursor:
        for row in cursor:
            print(f'Mean accuracy value: {row[0]} | Mean true positive value: {row[1]} | Mean false positive value: {row[2]}')
    
    print('Done')

In [71]:
# Find confusion matrix accuracy for alpha = 1.5 model
confusion_matrix_accuracy(
    'MN_Cities_15', 
    'Accuracy',
    'TP',
    'FP'
)

Creating confusion matrix accuracy statistics table...
Mean accuracy value: 0.8643676814988317 | Mean true positive value: 3.858313817330211 | Mean false positive value: 12.386416861826698
Done


In [72]:
# Find confusion matrix accuracy for alpha = 1.75 model
confusion_matrix_accuracy(
    os.path.join(file_gdb,'MN_Cities_175'), 
    'Accuracy',
    'TP',
    'FP'
)

Creating confusion matrix accuracy statistics table...
Mean accuracy value: 0.8729976580796283 | Mean true positive value: 3.6451990632318503 | Mean false positive value: 11.310304449648712
Done


In [73]:
# Find confusion matrix accuracy for alpha = 2 model
confusion_matrix_accuracy(
    os.path.join(file_gdb,'MN_Cities_2'), 
    'Accuracy',
    'TP',
    'FP'
)

Creating confusion matrix accuracy statistics table...
Mean accuracy value: 0.8735831381733039 | Mean true positive value: 3.649882903981265 | Mean false positive value: 11.256440281030445
Done


## RANKING ASSESSMENT

- The Huff Model with an alpha value of 2 is most accurate according to the accuracy assessment
    - Ranking will be done with this model
- An area with more traps is not as important as areas without traps
- False positives are the most important confusion matrix value here
    - BMSB presence likely in these areas, just have not been identified

In [105]:
def priority_set_up(target_features, join_features):
    
    # Find cities where a BMSB trap has been found within 10 miles of the area
    # Performing a spatial join 
    arcpy.analysis.SpatialJoin(
        target_features = target_features,
        join_features = join_features,
        out_feature_class = os.path.join(file_gdb,'Cities_Near_BMSB_' + target_features[-1:0]),
        match_option = 'WITHIN_A_DISTANCE',
        search_radius = '5 Miles'
    )

    # Change "Join_Count" field to "Traps" to indicate the number of traps close to the city
    arcpy.management.AlterField(
        in_table = os.path.join(file_gdb,'Cities_Near_BMSB_' + target_features[-1:0]),
        field = 'Join_Count',
        new_field_name = 'Traps',
        new_field_alias = 'Traps'
    )
    
    # Join "Traps" field to original feature class
    arcpy.management.JoinField(
        in_data = target_features,
        in_field = 'City',
        join_table = os.path.join(file_gdb,'Cities_Near_BMSB_' + target_features[-1:0]),
        join_field = 'City',
        fields = 'Traps'
    )

    # "Cities_Near_BMSB" no longer needed
    arcpy.management.Delete(
        in_data = os.path.join(file_gdb,'Cities_Near_BMSB_' + target_features[-1:0])
    )
    
    # Add rank index and priority fields
    arcpy.management.AddField(
        in_table = target_features,
        field_name = 'Priority',
        field_type = 'SHORT'
    )

In [106]:
def rank_and_priority(in_table, FP_field, traps_field, rank_field, priority_field):
    
    print('Calculating rank index...')
    
    # Calculate rank through number of false positives and number of traps (more FPs = higher rank, more traps = lower rank)
    arcpy.management.CalculateField(
        in_table = in_table,
        field = rank_field,
        expression = f'rank_index(!{traps_field}!,!{FP_field}!)',
        expression_type = 'PYTHON3',
        code_block = '''def rank_index(traps, FP):
            if traps != 0:
                return FP/traps
            if traps == 0:
                return FP/0.1
        '''
    )
    
    print('Ranking done')
    
    print('Determining city priority...')
    
    # Sort by rank
    arcpy.management.Sort(
        in_dataset = in_table,
        out_dataset = in_table + '_ranked',
        sort_field = [[rank_field,'Descending']]
    )
    
    # Set priorities for adding BMSB traps for cities
    with arcpy.da.UpdateCursor(in_table = in_table + '_ranked', field_names = [priority_field]) as cursor:
        priority = 0
        for row in cursor:
            priority += 1
            row[0] = priority
            cursor.updateRow(row)
            
    print('Priority determination complete - see "' + in_table + '_ranked"')

In [116]:
# Model 3 is preffered: alpha value of 2
priority_set_up(
    os.path.join(file_gdb,'MN_Cities_2'), 
    os.path.join(file_gdb,'BMSB_sightings')
)

In [117]:
# Rank MN cities for BMSB trap priority for Huff Model alpha 2
rank_and_priority(
    os.path.join(file_gdb,'MN_Cities_2'),
    'FP',
    'Traps',
    'Rank',
    'Priority'
)

Calculating rank index...
Ranking done
Determining city priority...
Priority determination complete - see "F:\1. UMN MGIS\1. Semesters\4th Semester\1. ArcGIS II\4. Project\ArcPro Files\ArcII_Final Project\ArcII_Final Project.gdb\MN_Cities_2_ranked"


In [94]:
# Huff Model with alpha value of 1.5
priority_set_up(
    'MN_Cities_15', 
    os.path.join(file_gdb,'BMSB_sightings')
)

In [95]:
# Rank MN cities for BMSB trap priority for Huff Model alpha 1.5
rank_and_priority(
    'MN_Cities_15',
    'FP',
    'Traps',
    'Rank',
    'Priority'
)

Calculating rank index...
Ranking done
Determining city priority...
Priority determination complete - see "MN_Cities_15_ranked"


In [96]:
# Huff Model with alpha value of 1.75
priority_set_up(
    os.path.join(file_gdb,'MN_Cities_175'), 
    os.path.join(file_gdb,'BMSB_sightings')
)

In [97]:
# Rank MN cities for BMSB trap priority for Huff Model alpha 1.75
rank_and_priority(
    os.path.join(file_gdb,'MN_Cities_175'),
    'FP',
    'Traps',
    'Rank',
    'Priority'
)

Calculating rank index...
Ranking done
Determining city priority...
Priority determination complete - see "F:\1. UMN MGIS\1. Semesters\4th Semester\1. ArcGIS II\4. Project\ArcPro Files\ArcII_Final Project\ArcII_Final Project.gdb\MN_Cities_175_ranked"
