# Data Download 
The purpose of this notebook is to troubleshoot the downloading of GBIF and city parks datasets in a programmatic way. 

Accessing GBIF occurrence records for City of Vancouver, requires providing the city boundary limits. City boundary was obtained from the [Vancouver Open Data Portal](https://opendata.vancouver.ca/explore/dataset/city-boundary/export/?location=11,49.23374,-123.13477)

# import libraries 


In [82]:
from pygbif import occurrences as occ
import geopandas as gpd
from shapely.geometry.multipolygon import MultiPolygon
import shapely.geometry as geom
import shapely.wkt
import pandas as pd
from zipfile import ZipFile

# load the city boundary 

In [38]:
url = 'https://opendata.vancouver.ca/explore/dataset/facet-grid-boundaries/download/?format=shp&timezone=America/Los_Angeles&lang=en'
city_boundary = gpd.read_file(url)

In [39]:
city_boundary.head()

Unnamed: 0,facet_text,geometry
0,X04,"POLYGON ((-123.01232 49.30354, -123.01232 49.2..."
1,X05,"POLYGON ((-123.01232 49.29905, -123.01232 49.2..."
2,X07,"POLYGON ((-123.02332 49.29005, -123.01232 49.2..."
3,W15,"POLYGON ((-123.02331 49.25407, -123.02330 49.2..."
4,X18,"POLYGON ((-123.01231 49.24058, -123.01231 49.2..."


In [71]:
poly_wkt = [str(poly_str) for poly_str in city_boundary.geometry]
poly_list = [shapely.wkt.loads(poly) for poly in poly_wkt]
geometry = str(MultiPolygon(poly_list))

# download GBIF data 

In [11]:
GBIF_USER = 'aromatic_toast'
GBIF_PWD = 'EspressoMach2029'
GBIF_EMAIL = 'lesley.miller@alumni.ubc.ca'

In [72]:
occ.download(['geometry =  geometry',
             'hasCoordinate = True',
             'year = 2019',
             'user = GBIF_USER', 
             'pwd = GBIF_PWD', 
             'email = GBIF_EMAIL'])

Exception: error: Instantiation of [simple type, class org.gbif.api.model.occurrence.predicate.EqualsPredicate] value failed: <value> may not be empty (through reference chain: org.gbif.api.model.occurrence.predicate.EqualsPredicate["value"]), with error status code 400check your number of active downloads.

In [73]:
occ.download_list(user = GBIF_USER, pwd = GBIF_PWD)

{'meta': {'offset': 0, 'limit': 20, 'endofrecords': True, 'count': 1},
 'results': [{'key': '0019909-190415153152247',
   'doi': '10.15468/dl.txhrd1',
   'license': 'http://creativecommons.org/licenses/by-nc/4.0/legalcode',
   'request': {'predicate': {'type': 'and',
     'predicates': [{'type': 'equals', 'key': 'COUNTRY', 'value': 'CA'},
      {'type': 'and',
       'predicates': [{'type': 'greaterThanOrEquals',
         'key': 'YEAR',
         'value': '2009'},
        {'type': 'lessThanOrEquals', 'key': 'YEAR', 'value': '2019'}]},
      {'type': 'or',
       'predicates': [{'type': 'within',
         'geometry': 'POLYGON((-180 -90,180 -90,180 90,-180 90,-180 -90))'},
        {'type': 'within',
         'geometry': 'POLYGON((-123.39727 49.02333,-122.53448 49.02333,-122.53448 49.4198,-123.39727 49.4198,-123.39727 49.02333))'}]},
      {'type': 'equals',
       'key': 'DATASET_KEY',
       'value': '50c9509d-22c7-4a22-a47d-8c48425ef4a7'},
      {'type': 'equals', 'key': 'HAS_COORDINATE

In [75]:
occ.download_get(key = '0019909-190415153152247', 
                 path = '/Users/lesley/data_science_portfolio/vancouver_park_biodiversity/data/')

Download file size: 56878354 bytes
On disk at /Users/lesley/data_science_portfolio/vancouver_park_biodiversity/data//0019909-190415153152247.zip


{'path': '/Users/lesley/data_science_portfolio/vancouver_park_biodiversity/data//0019909-190415153152247.zip',
 'size': 56878354,
 'key': '0019909-190415153152247'}

In [81]:
# extract the data 
zip = ZipFile('/Users/lesley/data_science_portfolio/vancouver_park_biodiversity/data/0019909-190415153152247.zip')
zip.extractall(path='data/')

# load in city parks data