In [1]:
pip install sodapy

Note: you may need to restart the kernel to use updated packages.


In [2]:
import yaml
import pandas as pd
from sodapy import Socrata

### Using Socrata API to access 311 service requests

In [3]:
creds = yaml.load(open('credentials.yml'), Loader=yaml.FullLoader)
data_url='data.cityofnewyork.us'
data_set='erm2-nwe9'
app_token= creds['nyc_credentials']['app_token']
client = Socrata(data_url,app_token)
client.timeout = 60
results = client.get(data_set, where="Agency='DHS'and complaint_type='Encampment'",limit=10000) ## filtering for Encampment complaints to DHS
encampment_complaints = pd.DataFrame.from_records(results)


In [4]:
metadata = client.get_metadata(data_set)
[x['name'] for x in metadata['columns']]

['Unique Key',
 'Created Date',
 'Closed Date',
 'Agency',
 'Agency Name',
 'Complaint Type',
 'Descriptor',
 'Location Type',
 'Incident Zip',
 'Incident Address',
 'Street Name',
 'Cross Street 1',
 'Cross Street 2',
 'Intersection Street 1',
 'Intersection Street 2',
 'Address Type',
 'City',
 'Landmark',
 'Facility Type',
 'Status',
 'Due Date',
 'Resolution Description',
 'Resolution Action Updated Date',
 'Community Board',
 'BBL',
 'Borough',
 'X Coordinate (State Plane)',
 'Y Coordinate (State Plane)',
 'Open Data Channel Type',
 'Park Facility Name',
 'Park Borough',
 'Vehicle Type',
 'Taxi Company Borough',
 'Taxi Pick Up Location',
 'Bridge Highway Name',
 'Bridge Highway Direction',
 'Road Ramp',
 'Bridge Highway Segment',
 'Latitude',
 'Longitude',
 'Location',
 'Zip Codes',
 'Community Districts',
 'Borough Boundaries',
 'City Council Districts',
 'Police Precincts',
 'Police Precinct']

In [5]:
encampment_complaints.head()

Unnamed: 0,unique_key,created_date,agency,agency_name,complaint_type,descriptor,location_type,incident_zip,incident_address,street_name,...,latitude,longitude,location,resolution_action_updated_date,closed_date,resolution_description,bridge_highway_name,road_ramp,bridge_highway_segment,bridge_highway_direction
0,61236678,2024-05-22T23:37:26.000,DHS,Department of Homeless Services,Encampment,,Street/Sidewalk,10016,11 EAST 29 STREET,EAST 29 STREET,...,40.744946414604016,-73.98597587253091,"{'latitude': '40.744946414604016', 'longitude'...",,,,,,,
1,61235332,2024-05-22T20:18:28.000,DHS,Department of Homeless Services,Encampment,,Store/Commercial,11103,47-07 BROADWAY,BROADWAY,...,40.75628060536484,-73.91325289282614,"{'latitude': '40.756280605364836', 'longitude'...",2024-05-22T22:32:36.000,,,,,,
2,61232565,2024-05-22T20:12:02.000,DHS,Department of Homeless Services,Encampment,,Residential Building/House,10003,24 ST MARKS PLACE,ST MARKS PLACE,...,40.728972240538575,-73.98857340070353,"{'latitude': '40.728972240538575', 'longitude'...",,,,,,,
3,61238027,2024-05-22T19:12:54.000,DHS,Department of Homeless Services,Encampment,,Store/Commercial,11214,2444 86 STREET,86 STREET,...,40.598614479390086,-73.9883616460881,"{'latitude': '40.598614479390086', 'longitude'...",,,,,,,
4,61239341,2024-05-22T18:12:53.000,DHS,Department of Homeless Services,Encampment,,Street/Sidewalk,10025,2814 BROADWAY,BROADWAY,...,40.80336985390443,-73.96726396631105,"{'latitude': '40.80336985390443', 'longitude':...",,,,,,,


In [6]:
encampment_complaints["created_date"] = pd.to_datetime(encampment_complaints["created_date"]).dt.date

In [7]:
encampment_complaints.to_csv("encampment_complaints.csv", index=False)

In [8]:
aggregated_encampment_complaints = encampment_complaints.groupby('created_date')['unique_key'].nunique().reset_index()

In [9]:
 aggregated_encampment_complaints.columns = ['mon_year','encampment_complaints_by_month']

In [10]:
aggregated_encampment_complaints.to_csv("aggregated_encampment_complaints.csv", index=False)

In [11]:
# metadata = client.get_metadata(data_set)
# [x['name'] for x in metadata['columns']]
aggregated_encampment_complaints.head()

Unnamed: 0,mon_year,encampment_complaints_by_month
0,2023-05-10,2
1,2023-05-11,19
2,2023-05-12,47
3,2023-05-13,28
4,2023-05-14,23


In [12]:
creds = yaml.load(open('credentials.yml'), Loader=yaml.FullLoader)
data_url='data.cityofnewyork.us'
data_set='k46n-sa2m'
app_token= creds['nyc_credentials']['app_token']
client = Socrata(data_url,app_token)
client.timeout = 60
results = client.get(data_set)
daily_dhs_report = pd.DataFrame.from_records(results)

In [13]:
daily_dhs_report.head()

Unnamed: 0,date_of_census,total_adults_in_shelter,total_children_in_shelter,total_individuals_in_shelter,single_adult_men_in_shelter,single_adult_women_in_shelter,total_single_adults_in_shelter,families_with_children_in_shelter,adults_in_families_with_children_in_shelter,children_in_families_with_children_in_shelter,total_individuals_in_families_with_children_in_shelter_,adult_families_in_shelter,individuals_in_adult_families_in_shelter
0,2024-05-22T00:00:00.000,53751,32578,86329,15278,5019,20297,18927,29504,32578,62082,1834,3950
1,2024-05-21T00:00:00.000,53799,32612,86411,15322,5016,20338,18942,29527,32612,62139,1828,3934
2,2024-05-20T00:00:00.000,53776,32576,86352,15309,5009,20318,18937,29515,32576,62091,1831,3943
3,2024-05-19T00:00:00.000,53739,32555,86294,15269,5012,20281,18934,29519,32555,62074,1828,3939
4,2024-05-18T00:00:00.000,53663,32519,86182,15257,4983,20240,18914,29489,32519,62008,1826,3934


In [14]:
#change type from datetime to date
daily_dhs_report["date_of_census"] = pd.to_datetime(daily_dhs_report["date_of_census"]).dt.date

#change types in our dataframe from str to int to be able to manipulate and do calculations with
daily_dhs_report=daily_dhs_report.astype({"single_adult_men_in_shelter": int,
                               "total_adults_in_shelter": int,
                               "single_adult_women_in_shelter":int}
                              ,errors='raise')

In [15]:
# adding new columns to find the ratio per day of men and women at shelters
daily_dhs_report["avg_num_single_adult_men_in_shelter"] = daily_dhs_report["total_adults_in_shelter"]/daily_dhs_report["single_adult_men_in_shelter"]
daily_dhs_report["avg_num_single_adult_women_in_shelter"]= daily_dhs_report["total_adults_in_shelter"]/daily_dhs_report["single_adult_women_in_shelter"]

In [16]:
daily_dhs_report.head()

Unnamed: 0,date_of_census,total_adults_in_shelter,total_children_in_shelter,total_individuals_in_shelter,single_adult_men_in_shelter,single_adult_women_in_shelter,total_single_adults_in_shelter,families_with_children_in_shelter,adults_in_families_with_children_in_shelter,children_in_families_with_children_in_shelter,total_individuals_in_families_with_children_in_shelter_,adult_families_in_shelter,individuals_in_adult_families_in_shelter,avg_num_single_adult_men_in_shelter,avg_num_single_adult_women_in_shelter
0,2024-05-22,53751,32578,86329,15278,5019,20297,18927,29504,32578,62082,1834,3950,3.518196,10.709504
1,2024-05-21,53799,32612,86411,15322,5016,20338,18942,29527,32612,62139,1828,3934,3.511226,10.725478
2,2024-05-20,53776,32576,86352,15309,5009,20318,18937,29515,32576,62091,1831,3943,3.512705,10.735875
3,2024-05-19,53739,32555,86294,15269,5012,20281,18934,29519,32555,62074,1828,3939,3.519484,10.722067
4,2024-05-18,53663,32519,86182,15257,4983,20240,18914,29489,32519,62008,1826,3934,3.517271,10.769215


In [17]:
daily_dhs_report.to_csv("daily_dhs_report.csv",index=False)

In [18]:
creds = yaml.load(open('credentials.yml'), Loader=yaml.FullLoader)
data_url='data.cityofnewyork.us'
data_set='3qem-6v3v'
app_token= creds['nyc_credentials']['app_token']
client = Socrata(data_url,app_token)
client.timeout = 60
results = client.get(data_set)
dhs_buildings = pd.DataFrame.from_records(results)

In [19]:
metadata = client.get_metadata(data_set)
[x['name'] for x in metadata['columns']]

['Report Date',
 'Borough',
 'Community District',
 'Adult Family Comm Hotel',
 'Adult Family Shelter',
 'Adult Shelter',
 'Adult Shelter Comm Hotel',
 'FWC Cluster',
 'FWC Comm Hotel',
 'FWC Shelter']

In [20]:
dhs_buildings.head()

Unnamed: 0,report_date,borough,community_district,adult_shelter,fwc_comm_hotel,fwc_shelter,adult_family_shelter,adult_shelter_comm_hotel,adult_family_comm_hotel
0,2024-04-30T00:00:00.000,Bronx,212,3.0,4.0,3.0,,,
1,2024-04-30T00:00:00.000,Queens,414,1.0,3.0,1.0,1.0,,
2,2024-04-30T00:00:00.000,Queens,404,,1.0,1.0,,1.0,
3,2024-04-30T00:00:00.000,Brooklyn,318,,1.0,3.0,1.0,,
4,2024-04-30T00:00:00.000,Bronx,211,,,,,,


In [21]:
dhs_buildings.fillna(0,inplace= True)

In [22]:
dhs_buildings.head()

Unnamed: 0,report_date,borough,community_district,adult_shelter,fwc_comm_hotel,fwc_shelter,adult_family_shelter,adult_shelter_comm_hotel,adult_family_comm_hotel
0,2024-04-30T00:00:00.000,Bronx,212,3,4,3,0,0,0
1,2024-04-30T00:00:00.000,Queens,414,1,3,1,1,0,0
2,2024-04-30T00:00:00.000,Queens,404,0,1,1,0,1,0
3,2024-04-30T00:00:00.000,Brooklyn,318,0,1,3,1,0,0
4,2024-04-30T00:00:00.000,Bronx,211,0,0,0,0,0,0


In [23]:
#change date and time column to just date YYYY-MM-DD format
dhs_buildings["report_date"]=pd.to_datetime(dhs_buildings["report_date"]).dt.date

In [24]:
dhs_buildings.to_csv("dhs_buildings.csv",index=False)