# Use pygif and Open Plant Hardiness Zones (OPHZ) to add hardiness zone to GBIF data

In [1]:
# imports
from pygbif import occurrences as occ
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [2]:
# removes pandas output limit
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# Query the GBIF API (this is where you can edit your query)

In [3]:
# Fetches data from GBIF API and stores it in a dataframe, then removes unnecessary columns
occdf_full = pd.DataFrame(occ.search(
    country='US',
    has_coordinate=True,
    hasGeospatialIssue = False,
    return_type='dataframe',
    limit=10000,
)['results'])

occdf = occdf_full[["datasetKey", "decimalLatitude", "decimalLongitude", "scientificName", "taxonKey"]]

### Combine OPHZ geometries for each state into one index

In [4]:
import geopandas as gpd
import os

# Get a list of all GeoJSON files in the folder
file_list = [f for f in os.listdir('./ophz/geojson/') if f.endswith('.geojson')]

# Initialize an empty list to store the GeoDataFrames
gdf_list = []

# Iterate through the list of files
for file in file_list:
    # Read in the GeoJSON file
    gdf_state = gpd.read_file(os.path.join('./ophz/geojson/', file))
    # Append the GeoDataFrame to the list
    gdf_list.append(gdf_state)

# Concatenate the list of GeoDataFrames into a single GeoDataFrame
gdf = pd.concat(gdf_list)

# Create a spatial index for the GeoDataFrame
sindex = gdf.sindex

### Combine OPHZ with GBIF data

In [5]:
# supresses shapely warnings
import warnings
warnings.filterwarnings('ignore')

from shapely.geometry import Point

# Initialize an empty list to store the matching features
matching_features = []

# Loops through each occurence in the occurrence dataframe
for i in range(len(occdf)):
    # Finds potential matches
    lon = occdf.iloc[i].decimalLongitude
    lat = occdf.iloc[i].decimalLatitude
    potential_matches = list(sindex.intersection((lon, lat)))
    # Loops through each potential match
    for j in potential_matches:
        # Checks if the occurence is within the polygon
        if gdf.iloc[j].geometry.contains(Point(lon, lat)):
            # Combine occurence entry with matching features using pandas.concat
            matching_features.append(pd.concat([occdf.iloc[i], gdf.iloc[j][["TEMP", "ZONE"]]]))

ophz_gbif_df = pd.DataFrame(matching_features)
# matching_df

# Saves the data to a CSV file (includes datasetKey for easy reupload to GBIF)

In [6]:
# ophz_gbif_df.to_csv('ophz_gbif.csv')

## Map everything using Folium

In [7]:
# Displays results on a map with temp as the color
# Overlays it on a map of the US, uses lat and lon to place each occurence
import matplotlib.pyplot as plt
import matplotlib
import folium
from folium.plugins import FastMarkerCluster


# Maps temp to colors, -40 is pink, +40 is red, 0 is green
cmap = plt.cm.get_cmap('RdYlGn')
norm = plt.Normalize(vmin=-40, vmax=40)

folium_map = folium.Map(location=[38, -97],
                        zoom_start=4.4)

# These two lines should create FastMarkerClusters
FastMarkerCluster(data=list(zip(ophz_gbif_df['decimalLatitude'].values, ophz_gbif_df['decimalLongitude'].values))).add_to(folium_map)
folium.LayerControl().add_to(folium_map)

for index, row in ophz_gbif_df.iterrows():
   # generate the popup message that is shown on click.
   popup_text = "{}<br> Zone: {}<br> Temp: {}"
   popup_text = popup_text.format(
                     row["scientificName"],
                     row["ZONE"],
                     row["TEMP"]
                     )
   # add marker to the map
   folium.CircleMarker(location=(row['decimalLatitude'],
                                 row['decimalLongitude']),
                     radius= 3,
                     color=matplotlib.colors.to_hex(cmap(norm(row['TEMP']))),
                     popup=popup_text).add_to(folium_map)

# displays map
folium_map.save("map.html")