#### Import packages and change arcpy defaults

In [1]:
from pyinaturalist import *
import pandas as pd
import arcpy

arcpy.env.overwriteOutput = True

ModuleNotFoundError: No module named 'pyinaturalist'

#### Import observations from inat API
Change all four parameters each time you run it for a different species; a few examples are included below. 
The code parameter is the first two letters of each word in the plant's scientific name.
Find the iNaturalist taxon id on wikipedia or https://www.inaturalist.org/taxa/47126-Plantae


In [135]:
# name = 'blackoak'
# code = 'QUKE'
# species = 'Quercus kelloggii'
# taxon = 49919

# name = 'soaproot'
# code = 'CHPO'
# species = 'Chlorogalum pomeridianum'
# taxon = 47597

# name = 'blackberry'
# code = 'RUUR'
# species = 'Rubus ursinus'
# taxon = 53445

# Specify criteria for observations
# https://pyinaturalist.readthedocs.io/en/latest/modules/pyinaturalist.v1.observations.html#pyinaturalist.v1.observations.get_observations
# my area = is roughly the AMLT stewardship area, change the four coordinate bounds to bound your study area
response = get_observations(taxon_id=taxon, swlat = 36.077540, swlng = -122.605514, nelat = 37.381013, nelng = -120.292403, page='all', per_page=200, quality_grade='research')
# response

This request is larger than recommended for API usage. For bulk requests, consider using the iNat export tool instead: https://www.inaturalist.org/observations/export


MemoryError: 

#### View the results in a pandas dataframe

In [124]:
# Select results from JSON Data, and normalize them
# This flattens the nested iNat data structure and converts to pandas dataframe
data = response['results']
df = pd.json_normalize(data)

# Split the location data into latitude and Longitude columns
# this is the format of location col [38.8418469038, -123.0608224869]
df['lat'] = df['location'].apply(lambda x:x[0])
df['lon'] = df['location'].apply(lambda x:x[1])

# Subset dataframe to include only the fields of interest
df = df[['lat','lon','quality_grade','time_observed_at','scientific_name','common_name','positional_accuracy','public_positional_accuracy']]

# Show the first 8 lines of the dataframe below cell
df.head(8)

Unnamed: 0,lat,lon,quality_grade,time_observed_at,species_guess,positional_accuracy,public_positional_accuracy
0,37.181496,-121.845535,research,2009-12-31T14:27:00-08:00,Big Berry Manzanita,,
1,37.164092,-122.063336,research,,Bonny Doon manzanita,,28447.0
2,37.111142,-122.126218,research,2011-05-08T09:45:49-07:00,Santa Cruz Manzanita,,28447.0
3,37.003917,-122.137798,research,2011-05-08T09:52:17-07:00,Bonny Doon manzanita,,28447.0
4,37.055492,-122.143636,research,2011-05-08T10:05:05-07:00,Brittleleaf Manzanita,,
5,37.055561,-122.143207,research,2011-05-08T10:06:39-07:00,glossyleaf manzanita,,
6,37.147297,-121.77626,research,2011-09-03T10:19:18-07:00,Big Berry Manzanita,5.0,5.0
7,36.665211,-121.735924,research,,toro manzanita,,28505.0


#### Define a function to convert the dataframe into a feature class for use in ArcGIS Pro

In [125]:
# This is a user-defined function to push dataframe into arcgis feature class
def addData(fc, occurrences):
    arcpy.management.DeleteRows(fc)
    cursor = arcpy.da.InsertCursor(fc, ["SHAPE@XY","lat","lon","quality_grade", "time_observed_at","species_guess","positional_accuracy","public_positional_accuracy"])

    for index, row in occurrences.iterrows():
        xy = (row['lon'],row['lat'])
        cursor.insertRow([xy,row['lat'], row['lon'], row['quality_grade'], row['time_observed_at'], row['species_guess'], row['positional_accuracy'],row['public_positional_accuracy']])
    del cursor

#### Create a feature class with the correct fields to hold the iNat data

In [126]:
# Define your workspace and geodatabase
workspace = "D:/1_AMLT/2_Ethnobotanical_Data_Pro/iNaturalist/"

# If file geodatabase already exists, name of file geodatabase
filegdbname = "iNat.gdb"
filegdb = "%s%s" % (workspace, filegdbname)

# # if file geodatabase does not exist, creates file geodatabase
# if (arcpy.Exists(filegdb)) == False:
#     arcpy.management.CreateFileGDB(workspace, filegdbname[:-4], "CURRENT")

# We are creating a feature class within the geodatabase to add the iNat data to
fc = name # ie "blackoak"
fcPath = "%s/%s" % (filegdb, fc)

# If feature class does not exist, create feature class
if (arcpy.Exists(fcPath)) == False:
    arcpy.management.CreateFeatureclass(filegdb, fc, "POINT", None, "DISABLED", "DISABLED", 'GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]];-400 -400 1000000000;-100000 10000;-100000 10000;8.98315284119521E-09;0.001;0.001;IsHighPrecision', '', 0, 0, 0, '')

    # Fields to create in feature class
    fields = ("lat DOUBLE # # # #;" +
        "lon DOUBLE # # # #;" +
        "quality_grade TEXT # 255 # #;" +
        "time_observed_at TEXT # 255 # #;" +
        "species_guess TEXT # 255 # #;" +
        "positional_accuracy DOUBLE # # # #;" +
        "public_positional_accuracy DOUBLE # # # #")

    arcpy.management.AddFields(name, fields) 


#### Add iNat data from the dataframe to the newly created feature class

In [127]:
addData(fcPath, df)

ARCT


### Processing iNat data for use in SAHM

#### Project and clip iNat data

In [128]:
print(code, name)
# Project to my SDM projection and clip to study area
output = "D:/1_AMLT/1_SDM/SDM_Pro/SDM.gdb/iNat_" + code
area = "D:/1_AMLT/1_SDM/SDM_Pro/SDM.gdb/BoundingBox" 
outsr = arcpy.Describe(area).spatialReference

arcpy.management.Project(fcPath, output, outsr)

# # Clip to study area boundary if necessary
# inat = output + "_SDM"
# arcpy.analysis.Clip(output, area, inat)

# print number of records/points
result = arcpy.GetCount_management(output)
count = int(result.getOutput(0))
print('Total features in iNat database = ', count)

ARCT
Total features in iNat database =  3552


#### Get any points from my ethnobotanical data
Note: this will not work on your computer as it is a confidential dataset.
You may adapt this block to any private/local location datasets that you may have. 

In [129]:
print(code, name)
# this layer has to be in the active map
# select from the layer by species name
ethnobot = "All_EthnobotData_Merged_012521"

search = "Species LIKE '%" + species + "%'"
print(search)

arcpy.management.SelectLayerByAttribute(ethnobot, "NEW_SELECTION", search)

# export data
gdb = "D:/1_AMLT/1_SDM/SDM_Pro/SDM.gdb/"
out_name = "My_" + code + "_forSAHM"

print('Creating ' + out_name)
arcpy.conversion.FeatureClassToFeatureClass(ethnobot, gdb, out_name)

# clear selection
arcpy.management.SelectLayerByAttribute(ethnobot, "CLEAR_SELECTION")

# print number of records/points
result = arcpy.GetCount_management(out_name)
count = int(result.getOutput(0))
print('Total features in my database = ', count)

ARCT
Species LIKE '%Arctostaphylos%'
Creating My_ARCT_forSAHM
Total features in ethnobot database =  7


#### Merge iNat and personal data
And print the total number of location points

In [130]:
print(code)
# merge the two into one in SDM.gdb, saved as 'All_****_forSAHM'
inat = "D:/1_AMLT/1_SDM/SDM_Pro/SDM.gdb/iNat_" + code
ethno = "D:/1_AMLT/1_SDM/SDM_Pro/SDM.gdb/My_" + code + "_forSAHM"
allpoints = "D:/1_AMLT/1_SDM/SDM_Pro/SDM.gdb/All_" + code + "_forSAHM"

print('Merging inat and ethnobot db into one output')
arcpy.management.Merge([inat, ethno], allpoints)

# print number of records/points
result = arcpy.GetCount_management(allpoints)
count = int(result.getOutput(0))
print('Total features in combined layer = ', count)
# this fc is ready to use, already in WGS84 (best for SAHM)

ARCT
Merging inat and ethnobot db into one output
Total features in combined layer =  3559


#### Add a field called 'code' and set to 1
This is necessary for location input data in SAHM

In [131]:
print(code)
# allpoints = "D:/1_AMLT/1_SDM/SDM_Pro/SDM.gdb/All_" + code + "_forSAHM"
# add field named as species code
arcpy.AddField_management(allpoints, code, "LONG")
# set to one
arcpy.management.CalculateField(allpoints, code, 1)
print('Field added')

ARCT
Field added


### Remove location points that fall within the same analysis cell

#### Convert point data to raster
Processing environment = template raster for projection, cell size, snapping, and extent. 
Use code field as value, cell assignment rule = max

In [132]:
print(code)
# Convert these points to a raster
template = r"D:\1_AMLT\1_SDM\SAHM\CovariatesFinal\bio_1_temp.tif"
pointras = r"D:\1_AMLT\1_SDM\SDM_Pro\SDM.gdb\All_" + code + "_forSAHM_Raster"

with arcpy.EnvManager(snapRaster=template, cellSize=template):
    arcpy.conversion.PointToRaster(allpoints, code, pointras, "MAXIMUM","NONE", template, "BUILD")

print('Raster created')

ARCT
Raster created


#### Convert that raster to points
Using centroids, print number of remaining points

In [133]:
print(code)
# convert raster to points (centroids)
finalpoints = r"D:\1_AMLT\1_SDM\SDM_Pro\SDM.gdb\All_" + code + "_forSAHM_FromRaster"

arcpy.conversion.RasterToPoint(pointras, finalpoints)
print('Points from raster created')

# print number of records/points
result = arcpy.GetCount_management(finalpoints)
count = int(result.getOutput(0))
print('Total features in final point layer = ', count)

ARCT
Points from raster created
Total features in final point layer =  1447


### Add final fields needed in the locations csv file

#### Add fields for lat as y, long as x in WGS84, responseBinary with 1s, species code column with 1s, export as csv

In [2]:
print(code)
sppLocation = r"D:\1_AMLT\1_SDM\SAHM\SpeciesLocations"
csvfile = "All_" + code + "_forSAHM_FromRaster.csv"

# add field named as species code
arcpy.AddField_management(finalpoints, code, "LONG")
# set to one
arcpy.management.CalculateField(finalpoints, code, 1)

# add field named responseBinary
arcpy.AddField_management(finalpoints, 'resposeBinary', "LONG")
# set to one
arcpy.management.CalculateField(finalpoints, 'resposeBinary', 1)

# add fields x and y
arcpy.AddField_management(finalpoints, 'x', "DOUBLE")
arcpy.AddField_management(finalpoints, 'y', "DOUBLE")
# calculate lat and long coordinate values in WGS84
arcpy.management.CalculateGeometryAttributes(finalpoints, "x POINT_X;y POINT_Y", '', '', 'GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]', "DD")

print('Fields added')

# deleting three unnecessary fields
arcpy.DeleteField_management(finalpoints, ["grid_code", "pointid", "bio_1_temp_1"])

# Export the table to a csv file for input into SAHM
arcpy.TableToTable_conversion(finalpoints, sppLocation, csvfile)
print('Exported to csv')

PLAN
Fields added
Exported to csv
