# NYC Facilities Database

Retrieves data from the NYC Open Data API . Five tables are generated: hospitals, libraries, colleges, public schools, and private schools. Initial data retrieved from API is written to json, final output is written to a SQLite database.

https://www1.nyc.gov/site/planning/data-maps/open-data/dwn-selfac.page

## Variables

In [30]:
import os, pandas as pd, json, requests, pandasql as ps, sqlite3 as lite

## Download data from API and write to disk as a json file

In [5]:
#Dump files for api data storage
fjsonpath=os.path.join('outputs', 'facilities.json')

In [6]:
#Code 200 = success, do not rerun this block unless it's necessary

fjson_url = "https://data.cityofnewyork.us/resource/ji82-xba5.json?$limit=50000"
response=requests.get(fjson_url)
if response.status_code==200:
    facilities_data=response.json()
    with open(fjsonpath, 'w') as f:
        json.dump(facilities_data, f)
    print('Data dumped to json file')
else:
    print('Problem with retrieval, response code',resonse.status_code)

Data dumped to json file


## Load json file and create a pandas dataframe for all facilities

In [31]:
with open(fjsonpath, 'r') as f:
    facilities_data=json.load(f)
facilities = pd.read_json(fjsonpath, orient='records')
facilities.head()

Unnamed: 0,geom,uid,facname,address,city,boro,borocode,zipcode,latitude,longitude,...,datalink,idagency,uid_merged,area,areatype,capacity,captype,util,utilrate,idold
0,0101000020E6100000BB6246787B7B52C03BE466B8015B...,151722,Bkn01g,Scholes Street,Brooklyn,Brooklyn,3.0,11237.0,40.71099,-73.929411,...,,,,,,,,,,
1,0101000020E61000007F4C6BD3D87C52C01FBAA0BE655A...,181303,"Eng 216,Lad 108,Bat. 35",445 Broadway,Brooklyn,Brooklyn,3.0,11211.0,40.70623,-73.950734,...,NYCDCAS: http://www1.nyc.gov/site/planning/dat...,,,,,,,,,
2,0101000020E610000024ED461FF37C52C08236397CD25B...,168047,North 10th & Union Ave Plaza,North 10th,Brooklyn,Brooklyn,3.0,11211.0,40.717361,-73.952339,...,,NYCDOT: 66,,,,,,,,
3,0101000020E6100000C91F0C3CF77C52C0D3156C239E5E...,147855,Pulaski Bridge,,Brooklyn,Brooklyn,3.0,,40.739201,-73.95259,...,,,,,,,,,,
4,0101000020E6100000E2E995B20C7F52C0B41EBE4C1461...,182240,Bertelsmann,1548 Broadway,New York,Manhattan,1.0,10036.0,40.758432,-73.98515,...,,NYCDCP: M050100,,,,,,,,


In [32]:
# Convert borocode into 5-digit FIPS code for interoperability with US Census data

facilities['bcode'] = facilities.borocode
bcode_map = {1.0:'36061', 2.0:'36005', 3.0:'36047', 4.0:'36081', 5.0:'36085'}
facilities['bcode'] = facilities.bcode.replace(bcode_map)
facilities.head()

Unnamed: 0,geom,uid,facname,address,city,boro,borocode,zipcode,latitude,longitude,...,idagency,uid_merged,area,areatype,capacity,captype,util,utilrate,idold,bcode
0,0101000020E6100000BB6246787B7B52C03BE466B8015B...,151722,Bkn01g,Scholes Street,Brooklyn,Brooklyn,3.0,11237.0,40.71099,-73.929411,...,,,,,,,,,,36047
1,0101000020E61000007F4C6BD3D87C52C01FBAA0BE655A...,181303,"Eng 216,Lad 108,Bat. 35",445 Broadway,Brooklyn,Brooklyn,3.0,11211.0,40.70623,-73.950734,...,,,,,,,,,,36047
2,0101000020E610000024ED461FF37C52C08236397CD25B...,168047,North 10th & Union Ave Plaza,North 10th,Brooklyn,Brooklyn,3.0,11211.0,40.717361,-73.952339,...,NYCDOT: 66,,,,,,,,,36047
3,0101000020E6100000C91F0C3CF77C52C0D3156C239E5E...,147855,Pulaski Bridge,,Brooklyn,Brooklyn,3.0,,40.739201,-73.95259,...,,,,,,,,,,36047
4,0101000020E6100000E2E995B20C7F52C0B41EBE4C1461...,182240,Bertelsmann,1548 Broadway,New York,Manhattan,1.0,10036.0,40.758432,-73.98515,...,NYCDCP: M050100,,,,,,,,,36061


## Subset facilities data frame into 5 facility subgroups

In [33]:
# Create colleges data frame

colleges_sql = """SELECT uid, LTRIM(idagency,'NYSED: ') as idagency, facname, opname, address, city, zipcode, 
bcode, facsubgrp, factype, xcoord, ycoord
FROM facilities
WHERE facsubgrp = 'Colleges or Universities' 
AND idagency LIKE 'NYSED%'
ORDER BY facname; """

colleges = ps.sqldf(colleges_sql, locals())
print('Colleges:')
colleges.head()

Colleges:


Unnamed: 0,uid,idagency,facname,opname,address,city,zipcode,bcode,facsubgrp,factype,xcoord,ycoord
0,61832,310200772075,Amer Academy Of Dramatic Arts,Amer Academy Of Dramatic Arts,120 Madison Ave,New York,10016.0,36061,Colleges or Universities,2 Year Independent,988473.1807,210885.3396
1,115592,310200770000,American Acad Mcallister Inst,American Acad Mcallister Inst,619 W 54 St,New York,10019.0,36061,Colleges or Universities,2 Year Independent,985959.2149,219415.3536
2,178159,800000056189,American University Of Beirut,American University Of Beirut,850 3rd Ave,New York,10022.0,36061,Colleges or Universities,4-Year Independent,992464.3249,214960.9895
3,193614,800000066391,Art Institute Of New York City,Art Institute Of New York City,218 West 40 Street,New York,10018.0,36061,Colleges or Universities,2 Year Proprietary,987279.3582,214336.5084
4,23523,331300260006,"Asa College, Inc.","Asa College, Inc.",81 Willoughby St,Brooklyn,11201.0,36047,Colleges or Universities,2 Year Proprietary,988109.347,191490.4357


In [34]:
# Create hospitals data frame

hospitals_sql = """SELECT uid, LTRIM(idagency,'NYSDOH: ') as idagency, facname, opname, address, city, zipcode, 
bcode, facsubgrp, factype, xcoord, ycoord
FROM facilities
WHERE factype = 'Hospital' 
AND idagency LIKE 'NYSDOH%'
ORDER BY facname; """

hospitals = ps.sqldf(hospitals_sql, locals())
print('Hospitals:')
hospitals.head()

Hospitals:


Unnamed: 0,uid,idagency,facname,opname,address,city,zipcode,bcode,facsubgrp,factype,xcoord,ycoord
0,122497,1438,Bellevue Hospital Center,New York City Health and Hospital Corporation,462 1 Avenue,New York,10016.0,36061,Hospitals and Clinics,Hospital,991144.9,208553.7637
1,154701,1178,BronxCare Hospital Center,BronxCare Health System,1650 Grand Concourse,Bronx,10457.0,36005,Hospitals and Clinics,Hospital,1008866.0,246591.3003
2,105716,1164,BronxCare Hospital Center,BronxCare Health System,1276 Fulton Avenue,Bronx,10456.0,36005,Hospitals and Clinics,Hospital,1011033.0,242195.8313
3,25716,1286,Brookdale Hospital Medical Center,"One Brooklyn Health System, Inc.",1 Brookdale Plaza,Brooklyn,11212.0,36047,Hospitals and Clinics,Hospital,1008551.0,177904.688
4,155377,1288,Brooklyn Hospital Center - Downtown Campus,The Brooklyn Hospital Center,121 Dekalb Avenue,Brooklyn,11201.0,36047,Hospitals and Clinics,Hospital,990381.7,190892.4034


In [35]:
# Create libraries data frame

libraries_sql = """SELECT uid, idagency, facname, opname, address, city, zipcode, 
bcode, facsubgrp, factype, xcoord, ycoord
FROM facilities
WHERE factype = 'Public Libraries' 
AND facname NOT LIKE '%Reading and Writing%'
AND facname NOT LIKE '%Learning Center%'
AND facname NOT LIKE '%Information Center%'
AND facname NOT LIKE '%Circulation Services%'
AND facname NOT LIKE '%Family Literacy%'
AND facname NOT LIKE '%Cyber Center%'
AND facname NOT LIKE '%New Americas Program%'
AND facname NOT LIKE '%International Resource%'
ORDER BY facname; """

libraries = ps.sqldf(libraries_sql, locals())
print('Libraries:')
libraries.head()

Libraries:


Unnamed: 0,uid,idagency,facname,opname,address,city,zipcode,bcode,facsubgrp,factype,xcoord,ycoord
0,129643,,115th Street,New York Public Library,203 West 115 Street,New York,10026.0,36061,Public Libraries,Public Libraries,997115.2,231826.4715
1,21658,,125th Street,New York Public Library,224 East 125 Street,New York,10035.0,36061,Public Libraries,Public Libraries,1002286.0,231844.9933
2,105120,,58th Street,New York Public Library,127 East 58 Street,New York,10022.0,36061,Public Libraries,Public Libraries,992748.0,216979.9407
3,134066,,67th Street,New York Public Library,328 East 67 Street,New York,10065.0,36061,Public Libraries,Public Libraries,995455.2,217957.2369
4,34151,,96th Street,New York Public Library,112 East 96 Street,New York,10128.0,36061,Public Libraries,Public Libraries,997595.4,225626.1266


In [36]:
# Create private schools data frame

private_schools_sql = """SELECT uid, ltrim(idagency,'NYSED: ') as idagency, facname, opname, address, city, zipcode, 
bcode, facsubgrp, factype, ltrim(util,'NYSED: ') AS enrolled, xcoord, ycoord
FROM facilities
WHERE facsubgrp = 'Non-Public K-12 Schools' 
AND idagency LIKE 'NYSED%' AND factype NOT LIKE 'Satellite Site%'
ORDER BY facname; """

private_schools = ps.sqldf(private_schools_sql, locals())
print('Private Schools:')
private_schools.head()

Private Schools:


Unnamed: 0,uid,idagency,facname,opname,address,city,zipcode,bcode,facsubgrp,factype,enrolled,xcoord,ycoord
0,147055,342400995969,A Childs Place Too,A Childs Place Too,107-17 Northern Boulevard,East Elmhurst,11369.0,36081,Non-Public K-12 Schools,Elementary School - Non-public,53,1022691.0,215510.3419
1,90383,331500716881,A Fantis Parochial School,A Fantis Parochial School,195 State St,Brooklyn,11201.0,36047,Non-Public K-12 Schools,Elementary School - Non-public,131,986638.3,190741.1658
2,156680,331800996245,A Plus Kidz Academy,A Plus Kidz Academy,10504 Flatlands Ave,Brooklyn,11236.0,36047,Non-Public K-12 Schools,Elementary School - Non-public,37,1013618.0,175205.7129
3,84439,310200996009,Aaron School (8-12),Aaron School (8-12),42 East 30 Street,New York,10016.0,36061,Non-Public K-12 Schools,Middle School - Non-public,55,988712.4,210566.9125
4,157353,310200999964,Aaron School (K-7),Aaron School (K-7),309 E 45th St,New York,10017.0,36061,Non-Public K-12 Schools,Elementary School - Non-public,114,992554.6,213078.5208


In [37]:
# Create public schools data frame

public_schools_sql = """SELECT uid, idagency, facname, opname, address, city, zipcode, bcode, facsubgrp, factype, ltrim(util,'NYCDOE: ') AS enrolled, xcoord, ycoord 
FROM facilities 
WHERE (facsubgrp = 'Public K-12 Schools' OR facsubgrp = 'Charter K-12 Schools' ) AND (idagency !='' OR boro LIKE 'Staten%')
AND factype NOT IN ('Educational Skills Center','Special Education School','Vocational High School') AND uid NOT IN ('23745','117627','122377')
ORDER BY facname; """

public_schools = ps.sqldf(public_schools_sql, locals())
print('Public Schools:')
public_schools.head()

Public Schools:


Unnamed: 0,uid,idagency,facname,opname,address,city,zipcode,bcode,facsubgrp,factype,enrolled,xcoord,ycoord
0,146904,NYCDOE: M540-M540,A. Philip Randolph Campus High School,NYC Department of Education,443 West 135 Street,New York,10031.0,36061,Public K-12 Schools,High school - Public,1447.0,998115.9,237439.5975
1,97116,NYCDOE: Q290-Q290,A.C.E. Academy For Scholars At The Geraldine F...,NYC Department of Education,55-20 Metropolitan Avenue,Ridgewood,11385.0,36081,Public K-12 Schools,Elementary School - Public,546.0,1009681.0,198926.0539
2,107695,NYCDOE: K410-K410,Abraham Lincoln High School,NYC Department of Education,2800 Ocean Parkway,Brooklyn,11235.0,36047,Public K-12 Schools,High school - Public,2049.0,993061.3,151445.8003
3,61939,NYSED: 800000084067,Academic Leadership Charter Middle S,Academic Leadership Charter Middle School,470 Jackson Ave-3rd Fl,Bronx,10455.0,36005,Charter K-12 Schools,Charter School,,1009259.0,234806.1425
4,61862,NYCDOE: X491-X155,Academic Leadership Charter School,Academic Leadership Charter School,470 Jackson Avenue,Bronx,10455.0,36005,Charter K-12 Schools,K-8 School - Charter,250.0,1009341.0,234782.0136


## Insert each facility subgroup as a table into a SQLite database

In [38]:
# Prior to this step, create an empty spatialite database in the outputs folder using the spatialite GUI
# Connect to SQLite database

db_path = os.path.join('outputs', 'facilities.sqlite')
con = lite.connect(db_path)
cur = con.cursor()
print('Connected to SQLite database: ' + db_path)

Connected to SQLite database: outputs\facilities.sqlite


In [39]:
# Create hospitals table
cur.execute("DROP TABLE IF EXISTS a_hospitals")

cur.execute("""
CREATE TABLE a_hospitals (
uid TEXT,
idagency TEXT NOT NULL PRIMARY KEY,
facname TEXT,
opname TEXT,
address TEXT,
city TEXT,
zipcode TEXT,
bcode TEXT,
facsubgrp TEXT,
factype TEXT,
xcoord REAL,
ycoord REAL);
""")   

# Insert hospitals data into table
hospitals.to_sql(name='a_hospitals', if_exists='append', con=con, index=False)

# Print row count from hospitals table
cur.execute('SELECT COUNT(*) FROM a_hospitals;')
rows = cur.fetchone()
print(rows[0], 'records written to a_hospitals')

# Create libraries table
cur.execute("DROP TABLE IF EXISTS a_libraries")

cur.execute("""
CREATE TABLE a_libraries (
uid TEXT NOT NULL PRIMARY KEY,
idagency TEXT,
facname TEXT,
opname TEXT,
address TEXT,
city TEXT,
zipcode TEXT,
bcode TEXT,
facsubgrp TEXT,
factype TEXT,
xcoord REAL,
ycoord REAL);
""")   

# Insert libraries data into table
libraries.to_sql(name='a_libraries', if_exists='append', con=con, index=False)

# Print row count from libraries table
cur.execute('SELECT COUNT(*) FROM a_libraries;')
rows = cur.fetchone()
print(rows[0], 'records written to a_libraries')

# Create colleges table
cur.execute("DROP TABLE IF EXISTS a_colleges")

cur.execute("""
CREATE TABLE a_colleges (
uid INTEGER,
idagency TEXT NOT NULL PRIMARY KEY,
facname TEXT,
opname TEXT,
address TEXT,
city TEXT,
zipcode TEXT,
bcode TEXT,
facsubgrp TEXT,
factype TEXT,
xcoord REAL,
ycoord REAL);
""")   

# Insert colleges data into table
colleges.to_sql(name='a_colleges', if_exists='append', con=con, index=False)

# Print row count from colleges table
cur.execute('SELECT COUNT(*) FROM a_colleges;')
rows = cur.fetchone()
print(rows[0], 'records written to a_colleges')

# Create public schools table
cur.execute("DROP TABLE IF EXISTS a_schools_public")

cur.execute("""
CREATE TABLE a_schools_public (
rowid INTEGER NOT NULL PRIMARY KEY,
uid INTEGER,
idagency TEXT,
facname TEXT,
opname TEXT,
address TEXT,
city TEXT,
zipcode TEXT,
bcode TEXT,
facsubgrp TEXT,
factype TEXT,
enrolled INTEGER,
xcoord REAL,
ycoord REAL);
""")   

public_schools = public_schools.reset_index()
public_schools = public_schools.rename(columns={"index": "rowid"})

# Insert public schools data into table
public_schools.to_sql(name='a_schools_public', if_exists='append', con=con, index=False)

# Print row count from public schools table
cur.execute('SELECT COUNT(*) FROM a_schools_public;')
rows = cur.fetchone()
print(rows[0], 'records written to a_schools_public')

# Create private schools table
cur.execute("DROP TABLE IF EXISTS a_schools_private")
cur.execute("""
CREATE TABLE a_schools_private (
uid INTEGER,
idagency TEXT NOT NULL PRIMARY KEY,
facname TEXT,
opname TEXT,
address TEXT,
city TEXT,
zipcode TEXT,
bcode TEXT,
facsubgrp TEXT,
factype TEXT,
enrolled INTEGER,
xcoord REAL,
ycoord REAL);
""")   

private_schools = private_schools.reset_index()
private_schools = private_schools.rename(columns={"index": "rowid"})

# Insert public schools data into table
private_schools.to_sql(name='a_schools_private', if_exists='append', con=con, index=False)

# Print row count from public schools table
cur.execute('SELECT COUNT(*) FROM a_schools_private;')
rows = cur.fetchone()
print(rows[0], 'records written to a_schools_private')

59 records written to a_hospitals
213 records written to a_libraries
145 records written to a_colleges
2203 records written to a_schools_public
863 records written to a_schools_private


In [40]:
# Close the connection
cur.close()