# New York City Recycling and Diversion Rates

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
import folium
from folium import Choropleth

In [5]:
#reading in recycle and diversion file
recycle = pd.read_csv('https://data.cityofnewyork.us/resource/gaq9-z3hz.csv')
recycle.head()

Unnamed: 0,_zone,district,fiscal_month_number,fiscal_year,month_name,diversion_rate_total_total_recycling_total_waste_,capture_rate_paper_total_paper_max_paper_,capture_rate_mgp_total_mgp_max_mgp_,capture_rate_total_total_recycling_leaves_recycling_max_paper_max_mgp_x100
0,Brooklyn North,BKN01,10,2019,April,14.687093,44.90916,43.034062,44.146764
1,Brooklyn North,BKN02,10,2019,April,19.950181,34.19402,57.947031,41.2137
2,Brooklyn North,BKN03,10,2019,April,12.164161,33.521557,44.919731,38.155937
3,Brooklyn North,BKN04,10,2019,April,15.541803,35.211361,68.51126,48.750755
4,Brooklyn North,BKN05,10,2019,April,10.051845,22.26543,45.051791,31.530129


In [7]:
#reading in shapefile of community districts
dist = gpd.read_file('Community Districts/geo_export_362b8b30-acf1-46f8-acdc-971fc2ff0c6e.shp')
dist.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 71 entries, 0 to 70
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   boro_cd     71 non-null     float64 
 1   shape_area  71 non-null     float64 
 2   shape_leng  71 non-null     float64 
 3   geometry    71 non-null     geometry
dtypes: float64(3), geometry(1)
memory usage: 2.3 KB


In [8]:
#reading in sanitation districts
dsny = pd.read_csv('DSNY_Districts.csv')
dsny_codes = dsny[['DISTRICT', 'DISTRICTCODE']]
dsny_codes.head()

Unnamed: 0,DISTRICT,DISTRICTCODE
0,SI03,503
1,05599998 40.54236129800006,-74.14129590299996 40.54234409500003
2,SI02,502
3,02884299994 40.59100730500006,-74.20206877699997 40.59115276000006
4,SI01,501


In [9]:
#getting rid of all rows that are not decimal/numeric in DISTRICTCODE
codes = dsny_codes.loc[dsny_codes['DISTRICTCODE'].str.isdecimal()]
codes
#resetting index to count from 0
codes = codes.reset_index(drop=True)

In [10]:
codes['DISTRICTCODE_f'] = codes.DISTRICTCODE.astype(float)
codes.head()

Unnamed: 0,DISTRICT,DISTRICTCODE,DISTRICTCODE_f
0,SI03,503,503.0
1,SI02,502,502.0
2,SI01,501,501.0
3,BX10,210,210.0
4,BX02,202,202.0


In [11]:
#merging dist and codes to get  geometry for the sanitation districts
merged = pd.merge(dist, codes, left_on = 'boro_cd', right_on = 'DISTRICTCODE_f')
merged.head()

Unnamed: 0,boro_cd,shape_area,shape_leng,geometry,DISTRICT,DISTRICTCODE,DISTRICTCODE_f
0,101.0,42689170.0,74168.753335,"MULTIPOLYGON (((-74.04388 40.69019, -74.04351 ...",MN01,101,101.0
1,102.0,37689200.0,34130.593766,"POLYGON ((-74.00779 40.74197, -74.00697 40.741...",MN02,102,102.0
2,103.0,46879760.0,30468.302497,"POLYGON ((-73.97177 40.72582, -73.97179 40.725...",MN03,103,103.0
3,104.0,49311800.0,67623.951686,"POLYGON ((-73.99557 40.77387, -73.99394 40.773...",MN04,104,104.0
4,105.0,43790300.0,35288.30459,"POLYGON ((-73.98149 40.76788, -73.97908 40.766...",MN05,105,105.0


In [12]:
#reading in file for monthly recycling tonnage data
tonnage = pd.read_csv('https://data.cityofnewyork.us/resource/ebb7-mvp5.csv')
tonnage.info()
#using the api only gives us 1000 of like 27,000 rows

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 11 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   month                1000 non-null   object 
 1   borough              1000 non-null   object 
 2   communitydistrict    1000 non-null   object 
 3   refusetonscollected  997 non-null    float64
 4   papertonscollected   997 non-null    float64
 5   mgptonscollected     997 non-null    float64
 6   resorganicstons      103 non-null    float64
 7   schoolorganictons    18 non-null     float64
 8   leavesorganictons    22 non-null     float64
 9   xmastreetons         60 non-null     float64
 10  borough_id           997 non-null    float64
dtypes: float64(8), object(3)
memory usage: 86.1+ KB


In [13]:
def cd_codes(boro):
    if boro == 'Bronx':
        val = 'BX'
    elif boro == 'Brooklyn':
        val = 'BK'
    elif boro == 'Queens':
        val = 'Q'
    elif boro == 'Manhattan':
        val = 'MN'
    else:
        val = 'SI'
    return val
tonnage['cd_code'] = ''
tonnage['cd_code'] = tonnage.apply(lambda x: cd_codes(x['borough']), axis = 1)

#tonnage.head(50)
tonnage['cd'] = tonnage['cd_code'] + tonnage['communitydistrict']
tonnage.head(50)
#set index to cd
tonnage.set_index('cd', inplace = True)
tonnage.head(50)

Unnamed: 0_level_0,month,borough,communitydistrict,refusetonscollected,papertonscollected,mgptonscollected,resorganicstons,schoolorganictons,leavesorganictons,xmastreetons,borough_id,cd_code
cd,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
BX01,2021 / 07,Bronx,1,496.01,25.05,28.9,,,,,2.0,BX
BX02,2021 / 07,Bronx,2,413.81,33.12,38.15,,,,,2.0,BX
BX03,2021 / 07,Bronx,3,491.32,37.15,51.37,,,,,2.0,BX
BX04,2021 / 07,Bronx,4,970.41,57.11,72.22,,,,,2.0,BX
BX05,2021 / 07,Bronx,5,867.76,56.87,75.28,,,,,2.0,BX
BX06,2021 / 07,Bronx,6,512.17,59.13,67.57,,,,,2.0,BX
BX07,2021 / 07,Bronx,7,858.11,70.66,134.5,,,,,2.0,BX
BX08,2021 / 07,Bronx,8,601.53,80.84,77.28,,,,,2.0,BX
BX09,2021 / 07,Bronx,9,957.13,62.07,90.83,,,,,2.0,BX
BX10,2021 / 07,Bronx,10,640.29,79.98,99.27,,,,,2.0,BX


In [14]:
def reform(cd):
    if 'S' in cd[2] or 'N' in cd[2]:
        new = cd[:2] + cd[3:]
    elif 'E' in cd[1] or 'W' in cd[1]:
        new = cd[:1] + cd[2:]
    else:
        new = cd
    return new

merged['re_codes'] = merged.apply(lambda x: reform(x['DISTRICT']), axis = 1)
merged.head(20)

Unnamed: 0,boro_cd,shape_area,shape_leng,geometry,DISTRICT,DISTRICTCODE,DISTRICTCODE_f,re_codes
0,101.0,42689170.0,74168.753335,"MULTIPOLYGON (((-74.04388 40.69019, -74.04351 ...",MN01,101,101.0,MN01
1,102.0,37689200.0,34130.593766,"POLYGON ((-74.00779 40.74197, -74.00697 40.741...",MN02,102,102.0,MN02
2,103.0,46879760.0,30468.302497,"POLYGON ((-73.97177 40.72582, -73.97179 40.725...",MN03,103,103.0,MN03
3,104.0,49311800.0,67623.951686,"POLYGON ((-73.99557 40.77387, -73.99394 40.773...",MN04,104,104.0,MN04
4,105.0,43790300.0,35288.30459,"POLYGON ((-73.98149 40.76788, -73.97908 40.766...",MN05,105,105.0,MN05
5,106.0,38733650.0,43163.835799,"MULTIPOLYGON (((-73.96128 40.73016, -73.96128 ...",MN06,106,106.0,MN06
6,107.0,53152820.0,39863.701364,"POLYGON ((-73.96838 40.78660, -73.96885 40.785...",MN07,107,107.0,MN07
7,108.0,55172440.0,53927.099287,"MULTIPOLYGON (((-73.93557 40.77192, -73.93566 ...",MN08,108,108.0,MN08
8,309.0,45326330.0,29944.474194,"POLYGON ((-73.94508 40.65572, -73.94716 40.655...",BKN09,309,309.0,BK09
9,109.0,41898030.0,34958.004577,"POLYGON ((-73.94020 40.82865, -73.94154 40.826...",MN09,109,109.0,MN09


In [15]:
#getting the mean for all
ref = tonnage.groupby('cd')['refusetonscollected'].mean()
san_dist = merged[['re_codes', 'geometry']].set_index('re_codes')
san_dist.info()
ref.head()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 59 entries, MN01 to SI03
Data columns (total 1 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   geometry  59 non-null     geometry
dtypes: geometry(1)
memory usage: 944.0+ bytes


cd
BK01    5398.843529
BK02    2425.151765
BK03    4219.329412
BK04    3292.947059
BK05    4878.291765
Name: refusetonscollected, dtype: float64

In [16]:
#make choropleth map to visualize tonnage within community districts
nyc = folium.Map(location = [40.7128, -74.0060], tiles = 'cartodbpositron', zoom_start = 10)
Choropleth(geo_data = san_dist.__geo_interface__,
          data = ref,
          key_on = 'feature.id',
          fill_color = 'YlGnBu',
          legend_name = 'Average Total Refuse Collected in Tons by Sanitation District 2020 - 2021').add_to(nyc)
def embed_map(m, file_name):
    from IPython.display import IFrame
    m.save(file_name)
    return IFrame(file_name, width = '100%', height = '500px')
embed_map(nyc, 'nyc.html')

In [17]:
#splitting month column into month, year columns
tonnage['month']
tonnage[['year','mon']] = tonnage.month.str.split(" /",expand=True)
tonnage

Unnamed: 0_level_0,month,borough,communitydistrict,refusetonscollected,papertonscollected,mgptonscollected,resorganicstons,schoolorganictons,leavesorganictons,xmastreetons,borough_id,cd_code,year,mon
cd,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
BX01,2021 / 07,Bronx,01,496.01,25.05,28.90,,,,,2.0,BX,2021,07
BX02,2021 / 07,Bronx,02,413.81,33.12,38.15,,,,,2.0,BX,2021,07
BX03,2021 / 07,Bronx,03,491.32,37.15,51.37,,,,,2.0,BX,2021,07
BX04,2021 / 07,Bronx,04,970.41,57.11,72.22,,,,,2.0,BX,2021,07
BX05,2021 / 07,Bronx,05,867.76,56.87,75.28,,,,,2.0,BX,2021,07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Q07,2020 / 03,Queens,07,6163.72,788.01,785.12,104.57,,,,4.0,Q,2020,03
Q08,2020 / 03,Queens,08,3454.12,422.73,417.58,102.89,,,,4.0,Q,2020,03
Q09,2020 / 03,Queens,09,3613.19,423.53,556.55,158.20,21.61,,,4.0,Q,2020,03
Q10,2020 / 03,Queens,10,3461.11,351.01,515.83,54.60,,,,4.0,Q,2020,03


In [18]:
#getting only 2020 refuse
tonnage['year'] = tonnage['year'].astype(str).astype(int)
#tonnage['year']
ref_2020 = tonnage[tonnage.year == 2020]
ref_2020

Unnamed: 0_level_0,month,borough,communitydistrict,refusetonscollected,papertonscollected,mgptonscollected,resorganicstons,schoolorganictons,leavesorganictons,xmastreetons,borough_id,cd_code,year,mon
cd,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
BX01,2020 / 12,Bronx,01,2275.73,177.02,181.35,,,,,2.0,BX,2020,12
BX02,2020 / 12,Bronx,02,1723.33,163.03,188.08,,,4.5,,2.0,BX,2020,12
BX03,2020 / 12,Bronx,03,2281.47,225.45,259.82,,,,,2.0,BX,2020,12
BX04,2020 / 12,Bronx,04,4395.54,339.69,405.88,,,,,2.0,BX,2020,12
BX05,2020 / 12,Bronx,05,3740.06,311.92,398.73,,,,,2.0,BX,2020,12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Q07,2020 / 03,Queens,07,6163.72,788.01,785.12,104.57,,,,4.0,Q,2020,03
Q08,2020 / 03,Queens,08,3454.12,422.73,417.58,102.89,,,,4.0,Q,2020,03
Q09,2020 / 03,Queens,09,3613.19,423.53,556.55,158.20,21.61,,,4.0,Q,2020,03
Q10,2020 / 03,Queens,10,3461.11,351.01,515.83,54.60,,,,4.0,Q,2020,03


In [19]:
ref_2020

Unnamed: 0_level_0,month,borough,communitydistrict,refusetonscollected,papertonscollected,mgptonscollected,resorganicstons,schoolorganictons,leavesorganictons,xmastreetons,borough_id,cd_code,year,mon
cd,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
BX01,2020 / 12,Bronx,01,2275.73,177.02,181.35,,,,,2.0,BX,2020,12
BX02,2020 / 12,Bronx,02,1723.33,163.03,188.08,,,4.5,,2.0,BX,2020,12
BX03,2020 / 12,Bronx,03,2281.47,225.45,259.82,,,,,2.0,BX,2020,12
BX04,2020 / 12,Bronx,04,4395.54,339.69,405.88,,,,,2.0,BX,2020,12
BX05,2020 / 12,Bronx,05,3740.06,311.92,398.73,,,,,2.0,BX,2020,12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Q07,2020 / 03,Queens,07,6163.72,788.01,785.12,104.57,,,,4.0,Q,2020,03
Q08,2020 / 03,Queens,08,3454.12,422.73,417.58,102.89,,,,4.0,Q,2020,03
Q09,2020 / 03,Queens,09,3613.19,423.53,556.55,158.20,21.61,,,4.0,Q,2020,03
Q10,2020 / 03,Queens,10,3461.11,351.01,515.83,54.60,,,,4.0,Q,2020,03


In [20]:
#creating map for 2020
nyc_2020 = folium.Map(location = [40.7128, -74.0060], tiles = 'cartodbpositron', zoom_start = 10)
Choropleth(geo_data = san_dist.__geo_interface__,
          data = ref_2020['refusetonscollected'],
          key_on = 'feature.id',
          fill_color = 'YlGnBu',
          legend_name = 'Total Refuse Collected in Tons by Sanitation District for 2020').add_to(nyc_2020)

def embed_map(m, file_name):
    from IPython.display import IFrame
    m.save(file_name)
    return IFrame(file_name, width = '100%', height = '500px')
embed_map(nyc_2020, 'nyc2020.html')