# Intake examples

* Refer to `sample_catalog.yml` as an example
* File types that work within Google Cloud Storage buckets, URLs, or DCATs (open data catalogs)
    * Tabular: csv, parquet
    * Geospatial: zipped shapefile, geojson
* For open data portals, go to the `data.json`
    * Ex: CA open data portal's URL is https://data.ca.gov
    * The corresponding `data.json` file is https://data.ca.gov/data.json
    * Each dataset has associated metadata, including `accessURL`, `landingPage`, etc. Find the dataset's `identifier`, and input that as a catalog item

In [1]:
import intake

# Need this to get credentials to read from GCS bucket
import gcsfs

# Open all the YAML files within the `catalogs` directory
catalog = intake.open_catalog("../catalogs/*.yml")

## Go through each item in the catalog and open it

In [2]:
catalog.ca_open_data.cdcr_population_covid_tracking.read().head()

Unnamed: 0,Date,InstitutionName,Latitude,Longitude,TotalConfirmed,TotalDeaths,DistinctPatientsTested,NewInTheLast14Days
0,2020-03-10,Avenal State Prison (ASP),35.930766,-120.10057,0,0,0,0
1,2020-03-10,California City Correctional Facility (CAC),35.139327,-117.91473,0,0,1,0
2,2020-03-10,Calipatria State Prison (CAL),33.131764,-115.51208,0,0,0,0
3,2020-03-10,California Correctional Center (CCC),40.446063,-120.664132,0,0,0,0
4,2020-03-10,California Correctional Institution (CCI),35.132673,-118.50781,0,0,0,0


In [3]:
catalog.la_geohub.city_boundary.read()

Unnamed: 0,OBJECTID,CITY,geometry
0,1,IN,"POLYGON ((-118.42000 34.32917, -118.41999 34.3..."


In [4]:
catalog.ca_geoportal.calenviroscreen.read().head()

Unnamed: 0,FID,SwisNo,Name,Activity,SiteScore,Link,geometry
0,1,01-AA-0001,Turk Island Landfill,Solid Waste Disposal Site,1,http://www.calrecycle.ca.gov/SWFacilities/Dire...,MULTIPOINT (-122.08529 37.58538)
1,2,01-AA-0003,Pleasanton Garbage Service SW TS,Large Volume Transfer/Proc Facility,5,http://www.calrecycle.ca.gov/SWFacilities/Dire...,MULTIPOINT (-121.85410 37.67460)
2,3,01-AA-0004,West Beach Sanitary Landfill,Solid Waste Disposal Site,1,http://www.calrecycle.ca.gov/SWFacilities/Dire...,MULTIPOINT (-122.32860 37.78500)
3,4,01-AA-0006,Davis Street Sanitary Landfill,Solid Waste Disposal Site,1,http://www.calrecycle.ca.gov/SWFacilities/Dire...,MULTIPOINT (-122.19480 37.71440)
4,5,01-AA-0007,Davis Street Transfer Station,Large Volume Transfer/Proc Facility,5,http://www.calrecycle.ca.gov/SWFacilities/Dire...,MULTIPOINT (-122.19372 37.71225)


In [5]:
catalog.lehd_federal_jobs_by_tract.read().head()

Unnamed: 0,st,stname,cty,ctyname,trct,trctname,year,c000,ca01,ca02,...,cr05,cr07,ct01,ct02,cd01,cd02,cd03,cd04,cs01,cs02
0,1,Alabama,1001,"Autauga County, AL",1001020100,"201 (Autauga, AL)",2011,3,0,2,...,0,0,3,0,0,3,0,0,0,3
1,1,Alabama,1001,"Autauga County, AL",1001020100,"201 (Autauga, AL)",2012,1,0,1,...,0,0,1,0,0,0,0,1,1,0
2,1,Alabama,1001,"Autauga County, AL",1001020100,"201 (Autauga, AL)",2013,2,0,1,...,0,0,2,0,0,2,0,0,2,0
3,1,Alabama,1001,"Autauga County, AL",1001020200,"202 (Autauga, AL)",2011,2,0,0,...,0,0,2,0,0,0,1,1,2,0
4,1,Alabama,1001,"Autauga County, AL",1001020200,"202 (Autauga, AL)",2012,4,0,3,...,0,0,4,0,0,0,3,1,3,1


In [6]:
catalog.test_csv.read().head()

Unnamed: 0,county,county_pop2020,county_fips
0,Alameda,1685886,6001
1,Alpine,1117,6003
2,Amador,38531,6005
3,Butte,217769,6007
4,Calaveras,44289,6009


In [7]:
catalog.test_zipped_shapefile.read().head()

Unnamed: 0,Id,geometry
0,0,"LINESTRING (6504115.842 1741572.086, 6504123.7..."
1,0,"LINESTRING (6502785.651 1740966.650, 6502790.5..."
2,0,"LINESTRING (6502785.651 1740966.650, 6502783.5..."
3,0,"LINESTRING (6502780.671 1738173.745, 6502781.0..."
4,0,"LINESTRING (6503013.968 1738006.462, 6502887.7..."


In [8]:
catalog.test_geojson.read().head()

Unnamed: 0,OBJECTID,Refuge_Name,Delivery_Point,Type,geometry
0,1,Los Banos WA,Salt Slough,Lift,POINT (-120.76200 37.13649)
1,2,Los Banos WA,West Delta Canal Branch 2,Gravity,POINT (-120.78003 37.12726)
2,3,Los Banos WA,Boundary Drain,Lift,POINT (-120.78038 37.10903)
3,4,Los Banos WA,San Pedro Canal,Gravity,POINT (-120.79828 37.10802)
4,5,Los Banos WA,San Luis Canal,Gravity,POINT (-120.81691 37.12885)
