<a href="https://colab.research.google.com/github/haberkornm/EPA-Ecoregions-Point-in-Polygon/blob/main/get_ecoregions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Mounting google drive so shapefiles can be loaded
from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#Geopandas must be installed everytime a new colab session is opened. If this one doesn't work try the install below
pip install geopandas

In [None]:
#If the above didn't work for installing geopandas try this one.
!pip install git+git://github.com/geopandas/geopandas.git

#Making Dataframe for Function

In [4]:
#Packages required for making dataframe of polygons from shapefile
import geopandas as gpd  #Used for extracting shape properties and point in polygon within function
import pandas as pd  #used for saving and running polygon dataframe

#This is how the csv dataframe was created from the shapefile for the below function.
#Shape file found at https://www.epa.gov/eco-research/ecoregions-north-america
#zip file for level 3 ecoregions https://gaftp.epa.gov/EPADataCommons/ORD/Ecoregions/cec_na/NA_CEC_Eco_Level3.zip

#Save the unzipped folder downloaded from the above link to your google drive or computer.
#The entire folder as it is must be saved for the shp file to work below.

#the path is the location the shp file is found in your google drive or computer
path = "/content/drive/MyDrive/Reverse geocoding/NA_CEC_Eco_Level3/NA_CEC_Eco_Level3.shp"
NA3 = gpd.read_file(path) #Read path into a dataframe
NA3["geometry"] = NA3["geometry"].to_crs(epsg=4326)  #Change polygons to lat long

#Saves NA3 dataframe to your computer or drive
NA3.to_csv("/content/drive/MyDrive/Reverse geocoding/ecoregions3.csv")

The NA3 dataframe was created here because it allows the below function to run significantly faster.

#Reverse Geolocation Function

In [5]:
#Packages required for function
import geopandas as gpd  #Used for extracting shape properties and point in polygon within function
from shapely.geometry import Point #Used for making data type point for polygons
import pandas as pd  #used for running polygon dataframe
from shapely import wkt

#This dataframe was created from the EPA North American Ecoregions Level 3 shapefile as shown above.
NA3 = pd.read_csv('/content/drive/MyDrive/Reverse geocoding/ecoregions3.csv') #Load Pandas dataframe
NA3['geometry'] = NA3['geometry'].apply(wkt.loads) #Convert geometry for geopandas
NA3 = gpd.GeoDataFrame(NA3) #Convert pandas dataframe to geopandas dataframe

#Create function to obtain epa ecoregions 1, 2, and 3
#Uses single set of lat and long for output
def get_eco(lat, long):
  point = Point(long, lat)
  for i in range(0, len(NA3)):
    if point.within(NA3["geometry"][i])==True:
      loc = [NA3['NA_L1KEY'][i],  NA3['NA_L2KEY'][i],  NA3['NA_L3KEY'][i]]
      return(loc)

#Create function that uses get_eco for entire dataframe.
def get_ecoregions(lat, long, df):  #df is dataframe function will be used on
  ecos = []
  for i in range(0, len(df)):
    ecos.append(get_eco(lat[i], long[i]))
  df[['Level_1', 'Level_2', 'Level_3']] = ecos

#Testing Function

In [6]:
#Create 10 points for to create dataframe for testing function
points = [[-112.0870977, 33.4942405, 'Phoenix'], [-111.5719556, 35.1842613, 'Flagstaff'], [-109.6450123, 33.958546, 'Greer'], 
[-111.6771445, 35.3498522, 'Mt. Humphrey'], [-105.5777349, 42.8788752, 'E. Iowa'], [-106.0264983, 39.6419848, 'Silverthorne'], 
[-90.3325451, 38.4984679, 'Eureka'], [-101.9310323, 34.8903525, 'Canyon'], [-91.6049041, 43.637534, 'S. Minn'], 
[-117.5877772, 33.6247486, 'S. California']]

#Make 10 points into pandas dataframe
df = pd.DataFrame(points, columns = ['long', 'lat', 'Location'])

In [7]:
df

Unnamed: 0,long,lat,Location
0,-112.087098,33.49424,Phoenix
1,-111.571956,35.184261,Flagstaff
2,-109.645012,33.958546,Greer
3,-111.677144,35.349852,Mt. Humphrey
4,-105.577735,42.878875,E. Iowa
5,-106.026498,39.641985,Silverthorne
6,-90.332545,38.498468,Eureka
7,-101.931032,34.890352,Canyon
8,-91.604904,43.637534,S. Minn
9,-117.587777,33.624749,S. California


In [8]:
#Testing function on lat and long columns in df
get_ecoregions(df.lat, df.long, df)

In [9]:
#get_ecoregions() function produces three new columns in dataframe df for level 1, level 2, and level 3 epa ecoregions.
df

Unnamed: 0,long,lat,Location,Level_1,Level_2,Level_3
0,-112.087098,33.49424,Phoenix,10 NORTH AMERICAN DESERTS,10.2 WARM DESERTS,10.2.2 Sonoran Desert
1,-111.571956,35.184261,Flagstaff,13 TEMPERATE SIERRAS,13.1 UPPER GILA MOUNTAINS,13.1.1 Arizona/New Mexico Mountains
2,-109.645012,33.958546,Greer,13 TEMPERATE SIERRAS,13.1 UPPER GILA MOUNTAINS,13.1.1 Arizona/New Mexico Mountains
3,-111.677144,35.349852,Mt. Humphrey,13 TEMPERATE SIERRAS,13.1 UPPER GILA MOUNTAINS,13.1.1 Arizona/New Mexico Mountains
4,-105.577735,42.878875,E. Iowa,9 GREAT PLAINS,9.3 WEST-CENTRAL SEMIARID PRAIRIES,9.3.3 Northwestern Great Plains
5,-106.026498,39.641985,Silverthorne,6 NORTHWESTERN FORESTED MOUNTAINS,6.2 WESTERN CORDILLERA,6.2.14 Southern Rockies
6,-90.332545,38.498468,Eureka,8 EASTERN TEMPERATE FORESTS,8.3 SOUTHEASTERN USA PLAINS,8.3.2 Interior River Valleys and Hills
7,-101.931032,34.890352,Canyon,9 GREAT PLAINS,9.4 SOUTH CENTRAL SEMIARID PRAIRIES,9.4.1 High Plains
8,-91.604904,43.637534,S. Minn,8 EASTERN TEMPERATE FORESTS,8.1 MIXED WOOD PLAINS,8.1.5 Driftless Area
9,-117.587777,33.624749,S. California,11 MEDITERRANEAN CALIFORNIA,11.1 MEDITERRANEAN CALIFORNIA,"11.1.1 California Coastal Sage, Chaparral, an..."
