In [1]:
pip install sodapy

Note: you may need to restart the kernel to use updated packages.


In [2]:
import yaml
import pandas as pd
from sodapy import Socrata

### Using Socrata API to access 311 service requests

In [3]:
creds = yaml.load(open('credentials.yml'), Loader=yaml.FullLoader)
data_url='data.cityofnewyork.us'
data_set='erm2-nwe9'
app_token= creds['nyc_credentials']['app_token']
client = Socrata(data_url,app_token)
client.timeout = 60
results = client.get(data_set, where="Agency='DHS'and complaint_type='Encampment' and created_date between '2021-09-01' and '2024-05-20'",limit=30000) ## filtering for Encampment complaints to DHS
encampment_complaints = pd.DataFrame.from_records(results)


In [4]:
metadata = client.get_metadata(data_set)
[x['name'] for x in metadata['columns']]

['Unique Key',
 'Created Date',
 'Closed Date',
 'Agency',
 'Agency Name',
 'Complaint Type',
 'Descriptor',
 'Location Type',
 'Incident Zip',
 'Incident Address',
 'Street Name',
 'Cross Street 1',
 'Cross Street 2',
 'Intersection Street 1',
 'Intersection Street 2',
 'Address Type',
 'City',
 'Landmark',
 'Facility Type',
 'Status',
 'Due Date',
 'Resolution Description',
 'Resolution Action Updated Date',
 'Community Board',
 'BBL',
 'Borough',
 'X Coordinate (State Plane)',
 'Y Coordinate (State Plane)',
 'Open Data Channel Type',
 'Park Facility Name',
 'Park Borough',
 'Vehicle Type',
 'Taxi Company Borough',
 'Taxi Pick Up Location',
 'Bridge Highway Name',
 'Bridge Highway Direction',
 'Road Ramp',
 'Bridge Highway Segment',
 'Latitude',
 'Longitude',
 'Location',
 'Zip Codes',
 'Community Districts',
 'Borough Boundaries',
 'City Council Districts',
 'Police Precincts',
 'Police Precinct']

In [5]:
encampment_complaints.head()

Unnamed: 0,unique_key,created_date,closed_date,agency,agency_name,complaint_type,descriptor,location_type,incident_zip,incident_address,...,open_data_channel_type,park_facility_name,park_borough,latitude,longitude,location,bridge_highway_name,road_ramp,bridge_highway_segment,bridge_highway_direction
0,61207002,2024-05-19T22:21:31.000,2024-05-25T15:18:32.000,DHS,Department of Homeless Services,Encampment,,Street/Sidewalk,10014.0,162 WEST 4 STREET,...,ONLINE,Unspecified,MANHATTAN,40.73193713547279,-74.00132781042846,"{'latitude': '40.73193713547279', 'longitude':...",,,,
1,61203501,2024-05-19T20:40:31.000,2024-05-24T11:29:21.000,DHS,Department of Homeless Services,Encampment,,Store/Commercial,10009.0,408 EAST 13 STREET,...,MOBILE,Unspecified,MANHATTAN,40.730453663275725,-73.9826161731861,"{'latitude': '40.730453663275725', 'longitude'...",,,,
2,61199983,2024-05-19T20:08:23.000,,DHS,Department of Homeless Services,Encampment,,Residential Building/House,10011.0,66 WEST 12 STREET,...,MOBILE,Unspecified,MANHATTAN,40.73574408128484,-73.99713854937615,"{'latitude': '40.73574408128484', 'longitude':...",,,,
3,61203512,2024-05-19T19:29:06.000,2024-05-20T13:21:43.000,DHS,Department of Homeless Services,Encampment,,Subway,,,...,ONLINE,Unspecified,MANHATTAN,40.75513193220205,-73.98411847753619,"{'latitude': '40.75513193220205', 'longitude':...",F,42 St & 6 Av NW and NE corners,Entrance,
4,61201154,2024-05-19T18:10:32.000,2024-05-20T00:25:23.000,DHS,Department of Homeless Services,Encampment,,Residential Building/House,10011.0,154 NINTH AVENUE,...,MOBILE,Unspecified,MANHATTAN,40.744293981051925,-74.00291595058681,"{'latitude': '40.744293981051925', 'longitude'...",,,,


In [6]:
encampment_complaints["created_date"] = pd.to_datetime(encampment_complaints["created_date"]).dt.date

In [7]:
min_date = encampment_complaints["created_date"].min()
print(min_date)

2021-09-01


In [8]:
encampment_complaints.to_csv("encampment_complaints.csv", index=False)

In [9]:
aggregated_encampment_complaints = encampment_complaints.groupby('created_date')['unique_key'].nunique().reset_index()

In [10]:
aggregated_encampment_complaints.columns = ['month_year','encampment_complaints_by_month']

In [11]:
aggregated_encampment_complaints.head()

Unnamed: 0,month_year,encampment_complaints_by_month
0,2021-09-01,15
1,2021-09-02,12
2,2021-09-03,18
3,2021-09-04,17
4,2021-09-05,21


In [12]:
aggregated_encampment_complaints.to_csv("aggregated_encampment_complaints.csv", index=False)

In [13]:
creds = yaml.load(open('credentials.yml'), Loader=yaml.FullLoader)
data_url='data.cityofnewyork.us'
data_set='k46n-sa2m'
app_token= creds['nyc_credentials']['app_token']
client = Socrata(data_url,app_token)
client.timeout = 60
results = client.get(data_set)
daily_dhs_report = pd.DataFrame.from_records(results)

In [14]:
daily_dhs_report.head()

Unnamed: 0,date_of_census,total_adults_in_shelter,total_children_in_shelter,total_individuals_in_shelter,single_adult_men_in_shelter,single_adult_women_in_shelter,total_single_adults_in_shelter,families_with_children_in_shelter,adults_in_families_with_children_in_shelter,children_in_families_with_children_in_shelter,total_individuals_in_families_with_children_in_shelter_,adult_families_in_shelter,individuals_in_adult_families_in_shelter
0,2024-05-28T00:00:00.000,53911,32552,86463,15393,4994,20387,18957,29545,32552,62097,1851,3979
1,2024-05-27T00:00:00.000,53793,32601,86394,15274,5013,20287,18977,29560,32601,62161,1833,3946
2,2024-05-26T00:00:00.000,53742,32605,86347,15243,5006,20249,18962,29535,32605,62140,1839,3958
3,2024-05-25T00:00:00.000,53648,32583,86231,15184,4999,20183,18942,29513,32583,62096,1836,3952
4,2024-05-24T00:00:00.000,53728,32569,86297,15235,5010,20245,18930,29519,32569,62088,1842,3964


In [15]:
#change type from datetime to date
daily_dhs_report["date_of_census"] = pd.to_datetime(daily_dhs_report["date_of_census"]).dt.date

#change types in our dataframe from str to int to be able to manipulate and do calculations with
daily_dhs_report=daily_dhs_report.astype({"single_adult_men_in_shelter": int,
                               "total_adults_in_shelter": int,
                               "single_adult_women_in_shelter":int}
                              ,errors='raise')

In [16]:
daily_dhs_report.head()

Unnamed: 0,date_of_census,total_adults_in_shelter,total_children_in_shelter,total_individuals_in_shelter,single_adult_men_in_shelter,single_adult_women_in_shelter,total_single_adults_in_shelter,families_with_children_in_shelter,adults_in_families_with_children_in_shelter,children_in_families_with_children_in_shelter,total_individuals_in_families_with_children_in_shelter_,adult_families_in_shelter,individuals_in_adult_families_in_shelter
0,2024-05-28,53911,32552,86463,15393,4994,20387,18957,29545,32552,62097,1851,3979
1,2024-05-27,53793,32601,86394,15274,5013,20287,18977,29560,32601,62161,1833,3946
2,2024-05-26,53742,32605,86347,15243,5006,20249,18962,29535,32605,62140,1839,3958
3,2024-05-25,53648,32583,86231,15184,4999,20183,18942,29513,32583,62096,1836,3952
4,2024-05-24,53728,32569,86297,15235,5010,20245,18930,29519,32569,62088,1842,3964


In [17]:
daily_dhs_report.to_csv("daily_dhs_report.csv",index=False)

In [18]:
creds = yaml.load(open('credentials.yml'), Loader=yaml.FullLoader)
data_url='data.cityofnewyork.us'
data_set='3qem-6v3v'
app_token= creds['nyc_credentials']['app_token']
client = Socrata(data_url,app_token)
client.timeout = 60
results = client.get(data_set, where="report_date between '2021-09-01' and '2024-05-20'")
dhs_buildings = pd.DataFrame.from_records(results)


In [19]:
metadata = client.get_metadata(data_set)
[x['name'] for x in metadata['columns']]

['Report Date',
 'Borough',
 'Community District',
 'Adult Family Comm Hotel',
 'Adult Family Shelter',
 'Adult Shelter',
 'Adult Shelter Comm Hotel',
 'FWC Cluster',
 'FWC Comm Hotel',
 'FWC Shelter']

In [20]:
dhs_buildings.head()

Unnamed: 0,report_date,borough,community_district,adult_shelter,adult_shelter_comm_hotel,fwc_shelter,adult_family_shelter,fwc_cluster,fwc_comm_hotel,adult_family_comm_hotel
0,2021-09-30T00:00:00.000,Bronx,201,3.0,2.0,6,,,,
1,2021-09-30T00:00:00.000,Bronx,202,,,5,1.0,1.0,,
2,2021-09-30T00:00:00.000,Bronx,203,7.0,2.0,6,1.0,,,
3,2021-09-30T00:00:00.000,Bronx,204,2.0,,13,,2.0,,
4,2021-09-30T00:00:00.000,Bronx,205,3.0,,7,1.0,3.0,,


In [21]:
dhs_buildings.fillna(0,inplace= True)

In [22]:
dhs_buildings.head()

Unnamed: 0,report_date,borough,community_district,adult_shelter,adult_shelter_comm_hotel,fwc_shelter,adult_family_shelter,fwc_cluster,fwc_comm_hotel,adult_family_comm_hotel
0,2021-09-30T00:00:00.000,Bronx,201,3,2,6,0,0,0,0
1,2021-09-30T00:00:00.000,Bronx,202,0,0,5,1,1,0,0
2,2021-09-30T00:00:00.000,Bronx,203,7,2,6,1,0,0,0
3,2021-09-30T00:00:00.000,Bronx,204,2,0,13,0,2,0,0
4,2021-09-30T00:00:00.000,Bronx,205,3,0,7,1,3,0,0


In [23]:
#change date and time column to just date YYYY-MM-DD format
dhs_buildings["report_date"]=pd.to_datetime(dhs_buildings["report_date"]).dt.date

In [24]:
dhs_buildings.to_csv("dhs_buildings.csv",index=False)