# Access data by URL
(João Palma - Master GreenDS)
ipynb - stands for **I**teractive **PY**thon **N**ote**B**ook

# Access Biodiveristy worldwide

## GBIF - Global Biodiversity Information Facility

https://www.gbif.org/developer/summary

## Requesting
beginners guide : https://data-blog.gbif.org/post/gbif-api-beginners-guide/

- Go to https://www.gbif.org
- Get Data - > Ocurrences
- Create your search, e.g.:
  - Scientific name: Galerida cristata
  - Basis of ocurrence: Human observation
  - Administrative areas (gadm.org) : Beja - PRT.3.1
  - Year: 1900-2022

You will find that your browser updates the query string. You should have something like this:
- **https://www.gbif.org/occurrence/search**?basis_of_record=HUMAN_OBSERVATION&has_coordinate=true&has_geospatial_issue=false&taxon_key=2490669&year=1900,2022&gadm_gid=PRT.3_1

Now replace **https://www.gbif.org/occurrence/search** with **https://api.gbif.org/v1/occurrence/search** (https://www.gbif.org/developer/summary) to become

- https://api.gbif.org/v1/occurrence/search?basis_of_record=HUMAN_OBSERVATION&has_coordinate=true&has_geospatial_issue=false&taxon_key=2490669&year=1900,2022&gadm_gid=PRT.3_1

Copy-paste in your browser. See what comes out.

In [None]:
import urllib.request
import json
import time
import datetime
import pandas as pd
 
region = "PRT.3_1" # Beja. check https://gadm.org
years="1900,2020" # this is a range
#insectivoras aquaticas
birds=[]
birds.append(["Galerida cristata","2490669","insect_agri"])
birds.append(["Hippolais polyglotta","2493214","insect_agri"])
birds.append(["Oenanthe oenanthe","5231240","insect_agri"])
birds.append(["Anthus pratensis","2490266","insect_agri"])
birds.append(["Saxicola rubicola","4408759","insect_agri"])


df = pd.DataFrame()
dic_res = {"count":[],"specie_grp":[],"specie":[],"decimalLongitude":[],"decimalLatitude":[],"day":[],"month":[],"year":[],"timestamp":[]}

count=0
for specie in birds:
  url = "https://api.gbif.org/v1/occurrence/search?basis_of_record=HUMAN_OBSERVATION&has_coordinate=true&has_geospatial_issue=false&taxon_key=" + specie[1] + "&year=" + years + "&gadm_gid=" + region + "&limit=1000000"
  f = urllib.request.urlopen(url)
  res = json.loads (f.read().decode('utf-8'))
  for i in res["results"]:
    count+=1
    if count % 300 ==0:
      print (count, " records found and counting...")
    #print (i["decimalLongitude"],i["decimalLatitude"],i["individualCount"],i["day"],i["month"],i["year"])
    if "day" in i:
      s = str(i["day"]) + "/" + str(i["month"]) + "/" + str(i["year"])
    else:
      continue
      #s = str(15) + "/" + str(i["month"]) + "/" + str(i["year"])
    timestamp = time.mktime(datetime.datetime.strptime(s, "%d/%m/%Y").timetuple())
    #print (count,specie[2],specie[0],i["decimalLongitude"],i["decimalLatitude"],i["day"],i["month"],i["year"],timestamp)
    dic_res["count"].append(count)
    dic_res["specie_grp"].append(specie[2])
    dic_res["specie"].append(specie[0])
    dic_res["decimalLongitude"].append(i["decimalLongitude"])
    dic_res["decimalLatitude"].append(i["decimalLatitude"])
    dic_res["day"].append(i["day"])
    dic_res["month"].append(i["month"])
    dic_res["year"].append(i["year"])
    dic_res["timestamp"].append(timestamp)
#print (json.dumps(res,sort_keys=True, indent=4))
df = pd.DataFrame(dic_res) 
print ("Done! Found ", df.shape[0], " records")
print("dataset size (rows,cols):", df.shape)
print (df)
# Organize the dataset in groups
groups=[]
groups.append(["Granívoras agrícolas", df[df['specie_grp'] =="insect_agri"]])


In [None]:
# Now lets try to print out a heat map with the coordinates:
import plotly.express as px
#for grp in groups:
fig = px.density_mapbox(groups[0][1], lat='decimalLatitude', lon='decimalLongitude', z='year', radius=10,
                          center=dict(lat=38, lon=-7.9), zoom=7,
                          mapbox_style="stamen-terrain", title=groups[0][0])
fig.show()