# Facilities to Shapefiles

Reads in facilities data from NYC Open Data API and creates an ESRI Shapefile for each of 5 facilities subgroups: hospitals, colleges, libraries, public schools and private schools; and writes those shapefiles to disk in the 'outputs' folder.

## Variables

In [1]:
import os, pandas as pd, geopandas as gpd, requests, pandasql as ps, sqlite3 as lite, logging, datetime, re
from geopandas import GeoDataFrame
from shapely.geometry import Point
from fiona.crs import from_epsg

In [2]:
# configure logger
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.FileHandler("error_log.log")
handler.setLevel(logging.ERROR)
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
handler.setFormatter(formatter)
logger.addHandler(handler)

# monthYear is appended to all shapefiles names
today = datetime.datetime.today()
month = today.strftime("%B")
year = today.year
monthYear = f"{month}{year}"

In [3]:
# Inputs


# Outputs
h_path = os.path.join('outputs', 'hospitals.shp')
c_path = os.path.join('outputs', 'colleges.shp')
l_path = os.path.join('outputs', 'libraries.shp')
pub_s_path = os.path.join('outputs', 'public_schools.shp')
prv_s_path = os.path.join('outputs', 'private_schools.shp')

## API Call

### Read in facilities data from API and create a pandas dataframe
### Permalink for facilities data: 
https://data.cityofnewyork.us/City-Government/Facilities-Database/ji82-xba5

In [4]:
csv_path=os.path.join('outputs', 'facilities.csv')

url = "https://data.cityofnewyork.us/resource/ji82-xba5.csv?$limit=50000"

facilities = pd.read_csv(url)
print('Created dataframe for',len(facilities),"facilities")
facilities.head()
#facilities.to_csv(csv_path)

Created dataframe for 36925 facilities


Unnamed: 0,geom,uid_merged,uid,idold,idagency,facname,addressnum,streetname,address,city,...,overlevel,overagency,overabbrev,agencyjuris,datasource,dataname,datalink,datadate,pgtable,facdomain
0,0101000020E6100000BB6246787B7B52C03BE466B8015B...,,151722,,,Bkn01g,,,Scholes Street,Brooklyn,...,NYCDSNY: City,NYC Department of Sanitation,NYCDSNY,,NYCDSNY,NYCDSNY: DSNY_select_facs_07262916,,NYCDSNY: 2016-07-26,dsny_facilities_mtsgaragemaintenance,Core Infrastructure and Transportation
1,0101000020E61000007F4C6BD3D87C52C01FBAA0BE655A...,,181303,,,"Eng 216,Lad 108,Bat. 35",445.0,Broadway,445 Broadway,Brooklyn,...,NYCFDNY: City,NYC Fire Department,NYCFDNY,NYCFDNY,NYCDCAS,NYCDCAS: City Owned and Leased Properties,NYCDCAS: http://www1.nyc.gov/site/planning/dat...,NYCDCAS: 2017-06-01,dcas_facilities_colp,"Public Safety, Emergency Services, and Adminis..."
2,0101000020E610000024ED461FF37C52C08236397CD25B...,,168047,,NYCDOT: 66,North 10th & Union Ave Plaza,,,North 10th,Brooklyn,...,NYCDOT: City,NYC Department of Transportation,NYCDOT,,NYCDOT,NYCDOT: Plaza Program,,NYCDOT: 2017-02-24,dot_facilities_pedplazas,"Parks, Gardens, and Historical Sites"
3,0101000020E6100000C91F0C3CF77C52C0D3156C239E5E...,,147855,,,Pulaski Bridge,,,,Brooklyn,...,NYCDOT: City,NYC Department of Transportation,NYCDOT,,NYCDOT,NYCDOT: Facilities Data - Bridge Houses,,NYCDOT: 2017-02-23,dot_facilities_bridgehouses,Core Infrastructure and Transportation
4,0101000020E6100000E2E995B20C7F52C0B41EBE4C1461...,,182240,,NYCDCP: M050100,Bertelsmann,1548.0,Broadway,1548 Broadway,New York,...,"NYCDCP,NYCDOB: City",NYC Department of Buildings;NYC Department of ...,NYCDCP;NYCDOB,,NYCDCP,NYCDCP: Privately Owned Public Spaces,,NYCDCP: 2017-10-23,dcp_pops,"Parks, Gardens, and Historical Sites"


In [5]:
# Convert borocode into 5-digit FIPS code for interoperability with US Census data, name column 'bcode'

facilities['bcode'] = facilities.borocode
bcode_map = {1.0:'36061', 2.0:'36005', 3.0:'36047', 4.0:'36081', 5.0:'36085'}
facilities['bcode'] = facilities.bcode.replace(bcode_map)
facilities.head()

Unnamed: 0,geom,uid_merged,uid,idold,idagency,facname,addressnum,streetname,address,city,...,overagency,overabbrev,agencyjuris,datasource,dataname,datalink,datadate,pgtable,facdomain,bcode
0,0101000020E6100000BB6246787B7B52C03BE466B8015B...,,151722,,,Bkn01g,,,Scholes Street,Brooklyn,...,NYC Department of Sanitation,NYCDSNY,,NYCDSNY,NYCDSNY: DSNY_select_facs_07262916,,NYCDSNY: 2016-07-26,dsny_facilities_mtsgaragemaintenance,Core Infrastructure and Transportation,36047
1,0101000020E61000007F4C6BD3D87C52C01FBAA0BE655A...,,181303,,,"Eng 216,Lad 108,Bat. 35",445.0,Broadway,445 Broadway,Brooklyn,...,NYC Fire Department,NYCFDNY,NYCFDNY,NYCDCAS,NYCDCAS: City Owned and Leased Properties,NYCDCAS: http://www1.nyc.gov/site/planning/dat...,NYCDCAS: 2017-06-01,dcas_facilities_colp,"Public Safety, Emergency Services, and Adminis...",36047
2,0101000020E610000024ED461FF37C52C08236397CD25B...,,168047,,NYCDOT: 66,North 10th & Union Ave Plaza,,,North 10th,Brooklyn,...,NYC Department of Transportation,NYCDOT,,NYCDOT,NYCDOT: Plaza Program,,NYCDOT: 2017-02-24,dot_facilities_pedplazas,"Parks, Gardens, and Historical Sites",36047
3,0101000020E6100000C91F0C3CF77C52C0D3156C239E5E...,,147855,,,Pulaski Bridge,,,,Brooklyn,...,NYC Department of Transportation,NYCDOT,,NYCDOT,NYCDOT: Facilities Data - Bridge Houses,,NYCDOT: 2017-02-23,dot_facilities_bridgehouses,Core Infrastructure and Transportation,36047
4,0101000020E6100000E2E995B20C7F52C0B41EBE4C1461...,,182240,,NYCDCP: M050100,Bertelsmann,1548.0,Broadway,1548 Broadway,New York,...,NYC Department of Buildings;NYC Department of ...,NYCDCP;NYCDOB,,NYCDCP,NYCDCP: Privately Owned Public Spaces,,NYCDCP: 2017-10-23,dcp_pops,"Parks, Gardens, and Historical Sites",36061


## Subset facilities dataset into five subgroups using SQL queries

In [10]:
hospitals_sql = """SELECT uid, LTRIM(idagency,'NYSDOH: ') as idagency, facname, opname, address, city, zipcode, 
bcode, facsubgrp, factype, xcoord, ycoord
FROM facilities
WHERE factype = 'Hospital' 
AND idagency LIKE 'NYSDOH%'
ORDER BY facname; """

hospitals = ps.sqldf(hospitals_sql, locals())
print(len(hospitals),'hospitals:')
hospitals.head()

59 hospitals:


Unnamed: 0,uid,idagency,facname,opname,address,city,zipcode,bcode,facsubgrp,factype,xcoord,ycoord
0,122497,1438,Bellevue Hospital Center,New York City Health and Hospital Corporation,462 1 Avenue,New York,10016.0,36061,Hospitals and Clinics,Hospital,991144.9,208553.7637
1,154701,1178,BronxCare Hospital Center,BronxCare Health System,1650 Grand Concourse,Bronx,10457.0,36005,Hospitals and Clinics,Hospital,1008866.0,246591.3003
2,105716,1164,BronxCare Hospital Center,BronxCare Health System,1276 Fulton Avenue,Bronx,10456.0,36005,Hospitals and Clinics,Hospital,1011033.0,242195.8313
3,25716,1286,Brookdale Hospital Medical Center,"One Brooklyn Health System, Inc.",1 Brookdale Plaza,Brooklyn,11212.0,36047,Hospitals and Clinics,Hospital,1008551.0,177904.688
4,155377,1288,Brooklyn Hospital Center - Downtown Campus,The Brooklyn Hospital Center,121 Dekalb Avenue,Brooklyn,11201.0,36047,Hospitals and Clinics,Hospital,990381.7,190892.4034


In [11]:
libraries_sql = """SELECT uid, idagency, facname, opname, address, city, zipcode, 
bcode, facsubgrp, factype, xcoord, ycoord
FROM facilities
WHERE factype = 'Public Libraries' 
AND facname NOT LIKE '%Reading and Writing%'
AND facname NOT LIKE '%Learning Center%'
AND facname NOT LIKE '%Information Center%'
AND facname NOT LIKE '%Circulation Services%'
AND facname NOT LIKE '%Family Literacy%'
AND facname NOT LIKE '%Cyber Center%'
AND facname NOT LIKE '%New Americas Program%'
AND facname NOT LIKE '%International Resource%'
ORDER BY facname; """

libraries = ps.sqldf(libraries_sql, locals())
print(len(libraries),'libraries:')
libraries.head()

213 libraries:


Unnamed: 0,uid,idagency,facname,opname,address,city,zipcode,bcode,facsubgrp,factype,xcoord,ycoord
0,129643,,115th Street,New York Public Library,203 West 115 Street,New York,10026.0,36061,Public Libraries,Public Libraries,997115.2,231826.4715
1,21658,,125th Street,New York Public Library,224 East 125 Street,New York,10035.0,36061,Public Libraries,Public Libraries,1002286.0,231844.9933
2,105120,,58th Street,New York Public Library,127 East 58 Street,New York,10022.0,36061,Public Libraries,Public Libraries,992748.0,216979.9407
3,134066,,67th Street,New York Public Library,328 East 67 Street,New York,10065.0,36061,Public Libraries,Public Libraries,995455.2,217957.2369
4,34151,,96th Street,New York Public Library,112 East 96 Street,New York,10128.0,36061,Public Libraries,Public Libraries,997595.4,225626.1266


In [12]:
colleges_sql = """SELECT uid, LTRIM(idagency,'NYSED: ') as idagency, facname, opname, address, city, zipcode, 
bcode, facsubgrp, factype, xcoord, ycoord
FROM facilities
WHERE facsubgrp = 'Colleges or Universities' 
AND idagency LIKE 'NYSED%'
ORDER BY facname; """

colleges = ps.sqldf(colleges_sql, locals())
print(len(colleges),'colleges:')
colleges.head()

145 colleges:


Unnamed: 0,uid,idagency,facname,opname,address,city,zipcode,bcode,facsubgrp,factype,xcoord,ycoord
0,61832,310200772075,Amer Academy Of Dramatic Arts,Amer Academy Of Dramatic Arts,120 Madison Ave,New York,10016.0,36061,Colleges or Universities,2 Year Independent,988473.1807,210885.3396
1,115592,310200770000,American Acad Mcallister Inst,American Acad Mcallister Inst,619 W 54 St,New York,10019.0,36061,Colleges or Universities,2 Year Independent,985959.2149,219415.3536
2,178159,800000056189,American University Of Beirut,American University Of Beirut,850 3rd Ave,New York,10022.0,36061,Colleges or Universities,4-Year Independent,992464.3249,214960.9895
3,193614,800000066391,Art Institute Of New York City,Art Institute Of New York City,218 West 40 Street,New York,10018.0,36061,Colleges or Universities,2 Year Proprietary,987279.3582,214336.5084
4,23523,331300260006,"Asa College, Inc.","Asa College, Inc.",81 Willoughby St,Brooklyn,11201.0,36047,Colleges or Universities,2 Year Proprietary,988109.347,191490.4357


In [13]:
public_schools_sql = """SELECT uid, idagency, facname, opname, address, city, zipcode, bcode, facsubgrp, factype, ltrim(util,'NYCDOE: ') AS util, xcoord, ycoord 
FROM facilities 
WHERE (facsubgrp = 'Public K-12 Schools' OR facsubgrp = 'Charter K-12 Schools' ) AND (idagency !='' OR boro LIKE 'Staten%')
AND factype NOT IN ('Educational Skills Center','Special Education School','Vocational High School') AND uid NOT IN ('23745','117627','122377')
ORDER BY facname; """

public_schools = ps.sqldf(public_schools_sql, locals())
print(len(public_schools),'public schools:')
public_schools.head()

2203 public schools:


Unnamed: 0,uid,idagency,facname,opname,address,city,zipcode,bcode,facsubgrp,factype,util,xcoord,ycoord
0,146904,NYCDOE: M540-M540,A. Philip Randolph Campus High School,NYC Department of Education,443 West 135 Street,New York,10031.0,36061,Public K-12 Schools,High school - Public,1447.0,998115.9,237439.5975
1,97116,NYCDOE: Q290-Q290,A.C.E. Academy For Scholars At The Geraldine F...,NYC Department of Education,55-20 Metropolitan Avenue,Ridgewood,11385.0,36081,Public K-12 Schools,Elementary School - Public,546.0,1009681.0,198926.0539
2,107695,NYCDOE: K410-K410,Abraham Lincoln High School,NYC Department of Education,2800 Ocean Parkway,Brooklyn,11235.0,36047,Public K-12 Schools,High school - Public,2049.0,993061.3,151445.8003
3,61939,NYSED: 800000084067,Academic Leadership Charter Middle S,Academic Leadership Charter Middle School,470 Jackson Ave-3rd Fl,Bronx,10455.0,36005,Charter K-12 Schools,Charter School,,1009259.0,234806.1425
4,61862,NYCDOE: X491-X155,Academic Leadership Charter School,Academic Leadership Charter School,470 Jackson Avenue,Bronx,10455.0,36005,Charter K-12 Schools,K-8 School - Charter,250.0,1009341.0,234782.0136


In [14]:
private_schools_sql = """SELECT uid, ltrim(idagency,'NYSED: ') as idagency, facname, opname, address, city, zipcode, 
bcode, facsubgrp, factype, ltrim(util,'NYSED: ') AS util, xcoord, ycoord
FROM facilities
WHERE facsubgrp = 'Non-Public K-12 Schools' 
AND idagency LIKE 'NYSED%' AND factype NOT LIKE 'Satellite Site%'
ORDER BY facname; """

private_schools = ps.sqldf(private_schools_sql, locals())
print(len(private_schools),'private schools:')
private_schools.head()

863 private schools:


Unnamed: 0,uid,idagency,facname,opname,address,city,zipcode,bcode,facsubgrp,factype,util,xcoord,ycoord
0,147055,342400995969,A Childs Place Too,A Childs Place Too,107-17 Northern Boulevard,East Elmhurst,11369.0,36081,Non-Public K-12 Schools,Elementary School - Non-public,53,1022691.0,215510.3419
1,90383,331500716881,A Fantis Parochial School,A Fantis Parochial School,195 State St,Brooklyn,11201.0,36047,Non-Public K-12 Schools,Elementary School - Non-public,131,986638.3,190741.1658
2,156680,331800996245,A Plus Kidz Academy,A Plus Kidz Academy,10504 Flatlands Ave,Brooklyn,11236.0,36047,Non-Public K-12 Schools,Elementary School - Non-public,37,1013618.0,175205.7129
3,84439,310200996009,Aaron School (8-12),Aaron School (8-12),42 East 30 Street,New York,10016.0,36061,Non-Public K-12 Schools,Middle School - Non-public,55,988712.4,210566.9125
4,157353,310200999964,Aaron School (K-7),Aaron School (K-7),309 E 45th St,New York,10017.0,36061,Non-Public K-12 Schools,Elementary School - Non-public,114,992554.6,213078.5208


## Create geodataframes in state plane CRS (EPSG 2263)

In [15]:
def create_point_shapes(df, x="xcoord", y="ycoord", epsg=2263):
    """ Create a point GeodataFrame from DataFrame with x,y coordinates
        in NAD83 coordinate system
        
        Params:
            df (DataFrame): pandas DataFrame 
            x, y (str, optional) Default values x="stop_lon", y="stop_lat", 
            column names for x and y coordinates
            epsg (int): Default value epsg=4269; EPSG value for x,y coordinate system
        Returns: 
            gdf: (GeoDataFrame) Point GeoDataFrame in NAD83 Coordinate System
    """
    if df[x].isna().sum() > 0 or df[y].isna().sum() > 0:
        raise Exception(f'''DataFrame contains Null coordinates; 
                        consider removing rows with Null {x,y} values''')

    points = [Point(xy) for xy in zip(df[x], df[y])]
    gdf = gpd.GeoDataFrame(df, geometry=points, crs=from_epsg(epsg))
    return gdf

In [16]:
try:
    hospitals_geo = create_point_shapes(hospitals)
    hospitals_geo.name = 'hospitals'
    print("Created geodataframe for",hospitals_geo.shape[0],"hospitals")
except Exception as e:
    logger.exception("Could not create geodataframe from hospitals")
    raise
try:
    libraries_geo = create_point_shapes(libraries)
    libraries_geo.name = 'libraries'
    print("Created geodataframe for",libraries_geo.shape[0],"libraries")
except Exception as e:
    logger.exception("Could not create geodataframe from libraries")
    raise
try:
    colleges_geo = create_point_shapes(colleges)
    colleges_geo.name = 'colleges'
    print("Created geodataframe for",colleges_geo.shape[0],"colleges")
except Exception as e:
    logger.exception("Could not create geodataframe from colleges")
    raise
try:
    public_schools_geo = create_point_shapes(public_schools)
    public_schools_geo.name = 'public_schools'
    print("Created geodataframe for",public_schools_geo.shape[0],"public schools")
except Exception as e:
    logger.exception("Could not create geodataframe from public schools")
    raise
try:
    private_schools_geo = create_point_shapes(private_schools)
    private_schools_geo.name = 'private_schools'
    print("Created geodataframe for",private_schools_geo.shape[0],"private schools")
except Exception as e:
    logger.exception("Could not create geodataframe from private schools")
    raise

Created geodataframe for 59 hospitals
Created geodataframe for 213 libraries
Created geodataframe for 145 colleges
Created geodataframe for 2203 public schools
Created geodataframe for 863 private schools


## Write shapefiles to disk (in "outputs" folder)

In [16]:
def make_facilities_shapefiles(gdf, folder):
    """ Create facilities shapefiles
    
        Params:
            gdf(str): A pandas geodataframe object, projected in NY State Plane (ft) CRS
            folder (str): Name of the folder to put the facilities shapefiles into
            
    """
    try:
        
        # pass name of geodataframe as naming convention for output
        name = re.sub("_geo", "", gdf.name)
        
        # save GeoDataframes to shapefiles
        gdf.drop_duplicates(
            subset=["idagency", "xcoord", "ycoord"]
        ).to_file(
            os.path.join(
                folder, f"{gdf.name}_{monthYear.lower()}.shp"
            )
        )
        print(f"Created point shapefile for", gdf.shape[0], gdf.name)

    except Exception as e:
        logger.exception("Unexpected exception occurred")
        raise

In [17]:
make_facilities_shapefiles(hospitals_geo, 'outputs')
make_facilities_shapefiles(libraries_geo, 'outputs')
make_facilities_shapefiles(colleges_geo, 'outputs')
make_facilities_shapefiles(public_schools_geo, 'outputs')
make_facilities_shapefiles(private_schools_geo, 'outputs')

Created point shapefile for 59 hospitals
Created point shapefile for 213 libraries
Created point shapefile for 145 colleges
Created point shapefile for 2203 public_schools
Created point shapefile for 863 private_schools
