In [51]:
%matplotlib notebook
import pandas as pd
import matplotlib.pyplot as plt
from citipy import citipy
import gmaps
from config import gkey
gmaps.configure(api_key=gkey)

# Chromium Data

In [4]:
chromium_csv = "Database/Chromium/Chromium.csv"
chromium = pd.read_csv(chromium_csv)
chromium.head()

Unnamed: 0,STATE_ABBR,AMA_SITE_CODE,AQS_POC,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,AQS_PARAMETER_CODE,AQS_PARAMETER_NAME,...,AQS_QUALIFIER_10,AQS_METHOD_CODE,SAMPLE_COLLECTION_DESC,SAMPLE_ANALYSIS_DESC,SAMPLE_VALUE_FLAG,BELOW_MDL_FLAG,CENSUS_TRACT_ID,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE
0,NJ,340010006,1,IMPROVE,2014,4,14-Oct-14,00:00,88112,Chromium (PM2.5),...,,800,IMPROVE Module A with Cyclone Inlet-Teflon Fil...,X-Ray Fluorescence,,,34001010505,3044,39.464872,-74.448736
1,NJ,340010006,1,IMPROVE,2014,4,25-Nov-14,00:00,88112,Chromium (PM2.5),...,,800,IMPROVE Module A with Cyclone Inlet-Teflon Fil...,X-Ray Fluorescence,,,34001010505,3044,39.464872,-74.448736
2,NJ,340010006,1,IMPROVE,2014,3,02-Sep-14,00:00,88112,Chromium (PM2.5),...,,800,IMPROVE Module A with Cyclone Inlet-Teflon Fil...,X-Ray Fluorescence,,,34001010505,3044,39.464872,-74.448736
3,NJ,340010006,1,IMPROVE,2014,3,05-Sep-14,00:00,88112,Chromium (PM2.5),...,,800,IMPROVE Module A with Cyclone Inlet-Teflon Fil...,X-Ray Fluorescence,,,34001010505,3044,39.464872,-74.448736
4,NJ,340010006,1,IMPROVE,2014,3,08-Sep-14,00:00,88112,Chromium (PM2.5),...,,800,IMPROVE Module A with Cyclone Inlet-Teflon Fil...,X-Ray Fluorescence,,,34001010505,3044,39.464872,-74.448736


In [6]:
chromium = chromium[['PROGRAM', 'YEAR', 'QUARTER', 'SAMPLE_DATE', 'SAMPLE_START_TIME', 'DURATION_DESC', 
'SAMPLE_VALUE_REPORTED', 'UNIT_DESC', 'SAMPLE_VALUE_STD_FINAL_TYPE', 'MDL_STD_UG_M3', 'CENSUS_TRACT_POPULATION_2010', 'MONITOR_LATITUDE', 
'MONITOR_LONGITUDE']]
chromium.head()

Unnamed: 0,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,DURATION_DESC,SAMPLE_VALUE_REPORTED,UNIT_DESC,SAMPLE_VALUE_STD_FINAL_TYPE,MDL_STD_UG_M3,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE
0,IMPROVE,2014,4,14-Oct-14,00:00,24 HOURS,0.0001,UG/CU METER (LC),L,0.0001,3044,39.464872,-74.448736
1,IMPROVE,2014,4,25-Nov-14,00:00,24 HOURS,0.0002,UG/CU METER (LC),L,0.0001,3044,39.464872,-74.448736
2,IMPROVE,2014,3,02-Sep-14,00:00,24 HOURS,0.0005,UG/CU METER (LC),L,0.0001,3044,39.464872,-74.448736
3,IMPROVE,2014,3,05-Sep-14,00:00,24 HOURS,0.0003,UG/CU METER (LC),L,0.0001,3044,39.464872,-74.448736
4,IMPROVE,2014,3,08-Sep-14,00:00,24 HOURS,0.0001,UG/CU METER (LC),L,0.0001,3044,39.464872,-74.448736


In [10]:
chromium_df = pd.DataFrame(chromium)
chromium_df.head()

Unnamed: 0,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,DURATION_DESC,SAMPLE_VALUE_REPORTED,UNIT_DESC,SAMPLE_VALUE_STD_FINAL_TYPE,MDL_STD_UG_M3,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE
0,IMPROVE,2014,4,14-Oct-14,00:00,24 HOURS,0.0001,UG/CU METER (LC),L,0.0001,3044,39.464872,-74.448736
1,IMPROVE,2014,4,25-Nov-14,00:00,24 HOURS,0.0002,UG/CU METER (LC),L,0.0001,3044,39.464872,-74.448736
2,IMPROVE,2014,3,02-Sep-14,00:00,24 HOURS,0.0005,UG/CU METER (LC),L,0.0001,3044,39.464872,-74.448736
3,IMPROVE,2014,3,05-Sep-14,00:00,24 HOURS,0.0003,UG/CU METER (LC),L,0.0001,3044,39.464872,-74.448736
4,IMPROVE,2014,3,08-Sep-14,00:00,24 HOURS,0.0001,UG/CU METER (LC),L,0.0001,3044,39.464872,-74.448736


In [11]:
program = chromium_df["PROGRAM"].value_counts()
program

PM2.5 SPECIATION NETWORK      6935
IMPROVE                       2921
CSN SUPPLEMENTAL               320
COMMUNITY-SCALE MONITORING     249
SLAMS                           40
Name: PROGRAM, dtype: int64

In [12]:
# Please use this cell
plt.figure(figsize=(10,5))
chromium_df.plot.scatter(x='YEAR', y='SAMPLE_VALUE_REPORTED')
plt.xlim(1990,2017)
plt.title('Chromium (microg/c3) by Year in NJ', size=15)
plt.xlabel('Year', size=12)
plt.ylabel('Sample Value Reported', size=12)
# plt.savefig('Images/sample_value_reported_year.png')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Sample Value Reported')

In [14]:
PM2_program_df = chromium_df.loc[chromium_df["PROGRAM"] =="PM2.5 SPECIATION NETWORK"]
PM2_program_df.head()

Unnamed: 0,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,DURATION_DESC,SAMPLE_VALUE_REPORTED,UNIT_DESC,SAMPLE_VALUE_STD_FINAL_TYPE,MDL_STD_UG_M3,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE
992,PM2.5 SPECIATION NETWORK,2003,1,15-Jan-03,00:00,24 HOURS,0.00176,UG/CU METER (LC),L,0.0023,6058,39.923042,-75.097617
993,PM2.5 SPECIATION NETWORK,2003,1,10-Mar-03,00:00,24 HOURS,0.00245,UG/CU METER (LC),L,0.002,6058,39.923042,-75.097617
994,PM2.5 SPECIATION NETWORK,2002,4,28-Nov-02,00:00,24 HOURS,,UG/CU METER (LC),L,0.0015,6058,39.923042,-75.097617
995,PM2.5 SPECIATION NETWORK,2002,4,01-Dec-02,00:00,24 HOURS,0.00176,UG/CU METER (LC),L,0.0023,6058,39.923042,-75.097617
996,PM2.5 SPECIATION NETWORK,2002,4,04-Dec-02,00:00,24 HOURS,,UG/CU METER (LC),L,0.0015,6058,39.923042,-75.097617


In [16]:
sorted_date_PM2 = PM2_program_df.sort_values("YEAR", ascending=False)
sorted_date_PM2.head()

Unnamed: 0,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,DURATION_DESC,SAMPLE_VALUE_REPORTED,UNIT_DESC,SAMPLE_VALUE_STD_FINAL_TYPE,MDL_STD_UG_M3,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE
9652,PM2.5 SPECIATION NETWORK,2016,1,31-Mar-16,00:00,24 HOURS,-0.00053,UG/CU METER (LC),L,0.00366,3685,40.64144,-74.208365
6260,PM2.5 SPECIATION NETWORK,2016,3,20-Jul-16,00:00,24 HOURS,0.0096,UG/CU METER (LC),L,0.00367,3453,40.720989,-74.192892
6248,PM2.5 SPECIATION NETWORK,2016,2,14-Jun-16,00:00,24 HOURS,0.00385,UG/CU METER (LC),L,0.00368,3453,40.720989,-74.192892
6249,PM2.5 SPECIATION NETWORK,2016,2,20-Jun-16,00:00,24 HOURS,-0.00046,UG/CU METER (LC),L,0.00366,3453,40.720989,-74.192892
6250,PM2.5 SPECIATION NETWORK,2016,3,28-Aug-16,00:00,24 HOURS,0.00306,UG/CU METER (LC),L,0.00367,3453,40.720989,-74.192892


In [17]:
grouped_city= PM2_program_df.groupby(["YEAR"])

year = pd.DataFrame(grouped_city["SAMPLE_VALUE_REPORTED"].max())

year.head()

Unnamed: 0_level_0,SAMPLE_VALUE_REPORTED
YEAR,Unnamed: 1_level_1
2001,0.0322
2002,0.0539
2003,0.0343
2004,0.0566
2005,0.279


In [18]:
coordinates = [
    (40.64144, -74.208365),
    (40.720989, -74.192892),
    (40.472825, -74.422403),
    (40.787628, -74.676301),
    (39.923042, -75.097617)
]

In [19]:
figure_layout = {
    'width': '800px',
    'height': '300px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'
}
fig = gmaps.figure(layout=figure_layout)

In [20]:
markers = gmaps.marker_layer(coordinates)

fig.add_layer(markers)
fig

Figure(layout=FigureLayout(border='1px solid black', height='300px', margin='0 auto 0 auto', padding='1px', wi…

In [21]:
improve_program_df = chromium_df.loc[chromium_df["PROGRAM"] =="IMPROVE"]
improve_program_df.head()

Unnamed: 0,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,DURATION_DESC,SAMPLE_VALUE_REPORTED,UNIT_DESC,SAMPLE_VALUE_STD_FINAL_TYPE,MDL_STD_UG_M3,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE
0,IMPROVE,2014,4,14-Oct-14,00:00,24 HOURS,0.0001,UG/CU METER (LC),L,0.0001,3044,39.464872,-74.448736
1,IMPROVE,2014,4,25-Nov-14,00:00,24 HOURS,0.0002,UG/CU METER (LC),L,0.0001,3044,39.464872,-74.448736
2,IMPROVE,2014,3,02-Sep-14,00:00,24 HOURS,0.0005,UG/CU METER (LC),L,0.0001,3044,39.464872,-74.448736
3,IMPROVE,2014,3,05-Sep-14,00:00,24 HOURS,0.0003,UG/CU METER (LC),L,0.0001,3044,39.464872,-74.448736
4,IMPROVE,2014,3,08-Sep-14,00:00,24 HOURS,0.0001,UG/CU METER (LC),L,0.0001,3044,39.464872,-74.448736


In [22]:
sorted_date_improve = improve_program_df.sort_values("YEAR", ascending=False)
sorted_date_improve.head()

Unnamed: 0,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,DURATION_DESC,SAMPLE_VALUE_REPORTED,UNIT_DESC,SAMPLE_VALUE_STD_FINAL_TYPE,MDL_STD_UG_M3,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE
267,IMPROVE,2016,4,02-Dec-16,00:00,24 HOURS,7e-05,UG/CU METER (LC),L,0.00015,3044,39.464872,-74.448736
287,IMPROVE,2016,3,08-Jul-16,00:00,24 HOURS,0.00029,UG/CU METER (LC),L,0.00016,3044,39.464872,-74.448736
285,IMPROVE,2016,3,02-Jul-16,00:00,24 HOURS,2e-05,UG/CU METER (LC),L,0.00015,3044,39.464872,-74.448736
284,IMPROVE,2016,2,29-Jun-16,00:00,24 HOURS,8e-05,UG/CU METER (LC),L,0.00012,3044,39.464872,-74.448736
283,IMPROVE,2016,2,26-Jun-16,00:00,24 HOURS,0.00012,UG/CU METER (LC),L,0.00012,3044,39.464872,-74.448736


In [23]:
grouped_city= improve_program_df.groupby(["YEAR"])

year = pd.DataFrame(grouped_city["SAMPLE_VALUE_REPORTED"].max())

year.head()

Unnamed: 0_level_0,SAMPLE_VALUE_REPORTED
YEAR,Unnamed: 1_level_1
1991,0.00045
1992,0.00093
1993,0.00322
1994,0.00275
1995,0.00555


In [25]:
community_program_df = chromium_df.loc[chromium_df["PROGRAM"] =="COMMUNITY-SCALE MONITORING"]
community_program_df.head()

Unnamed: 0,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,DURATION_DESC,SAMPLE_VALUE_REPORTED,UNIT_DESC,SAMPLE_VALUE_STD_FINAL_TYPE,MDL_STD_UG_M3,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE
310,COMMUNITY-SCALE MONITORING,2008,2,05-Jun-08,00:00,24 HOURS,36.641,Nanograms/cubic meter (LC),L,2e-06,6127,40.817218,-74.043758
312,COMMUNITY-SCALE MONITORING,2008,1,13-Mar-08,00:00,24 HOURS,73.458,Nanograms/cubic meter (LC),L,2e-06,6127,40.817218,-74.043758
313,COMMUNITY-SCALE MONITORING,2008,1,19-Mar-08,00:00,24 HOURS,76.403,Nanograms/cubic meter (LC),L,2e-06,6127,40.817218,-74.043758
314,COMMUNITY-SCALE MONITORING,2008,1,25-Mar-08,00:00,24 HOURS,47.462,Nanograms/cubic meter (LC),L,2e-06,6127,40.817218,-74.043758
315,COMMUNITY-SCALE MONITORING,2008,1,31-Mar-08,00:00,24 HOURS,68.641,Nanograms/cubic meter (LC),L,2e-06,6127,40.817218,-74.043758


In [26]:
sorted_date_community = community_program_df.sort_values("YEAR", ascending=False)
sorted_date_community.head()

Unnamed: 0,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,DURATION_DESC,SAMPLE_VALUE_REPORTED,UNIT_DESC,SAMPLE_VALUE_STD_FINAL_TYPE,MDL_STD_UG_M3,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE
310,COMMUNITY-SCALE MONITORING,2008,2,05-Jun-08,00:00,24 HOURS,36.641,Nanograms/cubic meter (LC),L,2e-06,6127,40.817218,-74.043758
1649,COMMUNITY-SCALE MONITORING,2008,1,08-Mar-08,06:00,12 HOUR,33.842,Nanograms/cubic meter (LC),L,2e-06,6127,40.817218,-74.043758
1636,COMMUNITY-SCALE MONITORING,2008,1,29-Feb-08,00:00,24 HOURS,40.047,Nanograms/cubic meter (LC),L,2e-06,6127,40.816809,-74.043673
1637,COMMUNITY-SCALE MONITORING,2008,1,23-Feb-08,00:00,24 HOURS,,Nanograms/cubic meter (LC),L,2e-06,6127,40.816809,-74.043673
1638,COMMUNITY-SCALE MONITORING,2008,1,17-Feb-08,00:00,24 HOURS,,Nanograms/cubic meter (LC),L,2e-06,6127,40.816809,-74.043673


In [27]:
grouped_city= community_program_df.groupby(["YEAR"])

year = pd.DataFrame(grouped_city["SAMPLE_VALUE_REPORTED"].max())

year.head()

Unnamed: 0_level_0,SAMPLE_VALUE_REPORTED
YEAR,Unnamed: 1_level_1
2007,157.163
2008,826.511


In [28]:
csn_program_df = chromium_df.loc[chromium_df["PROGRAM"] =="CSN SUPPLEMENTAL"]
csn_program_df.head()

Unnamed: 0,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,DURATION_DESC,SAMPLE_VALUE_REPORTED,UNIT_DESC,SAMPLE_VALUE_STD_FINAL_TYPE,MDL_STD_UG_M3,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE
1148,CSN SUPPLEMENTAL,2016,4,09-Oct-16,00:00,24 HOURS,0.01815,UG/CU METER (LC),L,0.00364,2328,39.934446,-75.125291
1149,CSN SUPPLEMENTAL,2016,4,15-Oct-16,00:00,24 HOURS,0.00287,UG/CU METER (LC),L,0.00365,2328,39.934446,-75.125291
1150,CSN SUPPLEMENTAL,2016,4,21-Oct-16,00:00,24 HOURS,0.00355,UG/CU METER (LC),L,0.00361,2328,39.934446,-75.125291
1151,CSN SUPPLEMENTAL,2016,4,27-Oct-16,00:00,24 HOURS,0.00364,UG/CU METER (LC),L,0.00366,2328,39.934446,-75.125291
1152,CSN SUPPLEMENTAL,2016,4,02-Nov-16,00:00,24 HOURS,0.00393,UG/CU METER (LC),L,0.00364,2328,39.934446,-75.125291


In [29]:
sorted_date_csn = csn_program_df.sort_values("YEAR", ascending=False)
sorted_date_csn.head()

Unnamed: 0,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,DURATION_DESC,SAMPLE_VALUE_REPORTED,UNIT_DESC,SAMPLE_VALUE_STD_FINAL_TYPE,MDL_STD_UG_M3,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE
1148,CSN SUPPLEMENTAL,2016,4,09-Oct-16,00:00,24 HOURS,0.01815,UG/CU METER (LC),L,0.00364,2328,39.934446,-75.125291
1811,CSN SUPPLEMENTAL,2016,2,17-Jun-16,00:00,24 HOURS,,UG/CU METER (LC),L,0.00344,2328,39.934446,-75.125291
1821,CSN SUPPLEMENTAL,2016,2,29-Jun-16,00:00,24 HOURS,0.00712,UG/CU METER (LC),L,0.00359,2328,39.934446,-75.125291
1820,CSN SUPPLEMENTAL,2016,3,16-Aug-16,00:00,24 HOURS,0.00139,UG/CU METER (LC),L,0.00357,2328,39.934446,-75.125291
1819,CSN SUPPLEMENTAL,2016,2,23-Jun-16,00:00,24 HOURS,,UG/CU METER (LC),L,0.00344,2328,39.934446,-75.125291


In [30]:
grouped_city= csn_program_df.groupby(["YEAR"])

year = pd.DataFrame(grouped_city["SAMPLE_VALUE_REPORTED"].max())

year.head()

Unnamed: 0_level_0,SAMPLE_VALUE_REPORTED
YEAR,Unnamed: 1_level_1
2013,0.0511
2014,0.113
2015,0.0335
2016,0.05085


In [31]:
slams_program_df = chromium_df.loc[chromium_df["PROGRAM"] =="SLAMS"]
slams_program_df.head()

Unnamed: 0,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,DURATION_DESC,SAMPLE_VALUE_REPORTED,UNIT_DESC,SAMPLE_VALUE_STD_FINAL_TYPE,MDL_STD_UG_M3,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE
5708,SLAMS,2016,4,02-Nov-16,00:00,24 HOURS,0.00406,UG/CU METER (LC),L,0.00367,5231,40.462182,-74.429439
5710,SLAMS,2016,4,29-Nov-16,00:00,24 HOURS,0.00159,UG/CU METER (LC),L,0.00367,5231,40.462182,-74.429439
5711,SLAMS,2016,4,02-Dec-16,00:00,24 HOURS,,UG/CU METER (LC),L,0.00367,5231,40.462182,-74.429439
5712,SLAMS,2016,4,08-Dec-16,00:00,24 HOURS,0.01855,UG/CU METER (LC),L,0.00367,5231,40.462182,-74.429439
5713,SLAMS,2016,4,11-Dec-16,00:00,24 HOURS,0.02432,UG/CU METER (LC),L,0.00367,5231,40.462182,-74.429439


In [32]:
sorted_date_slams = slams_program_df.sort_values("YEAR", ascending=False)
sorted_date_slams.head()

Unnamed: 0,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,DURATION_DESC,SAMPLE_VALUE_REPORTED,UNIT_DESC,SAMPLE_VALUE_STD_FINAL_TYPE,MDL_STD_UG_M3,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE
5708,SLAMS,2016,4,02-Nov-16,00:00,24 HOURS,0.00406,UG/CU METER (LC),L,0.00367,5231,40.462182,-74.429439
5710,SLAMS,2016,4,29-Nov-16,00:00,24 HOURS,0.00159,UG/CU METER (LC),L,0.00367,5231,40.462182,-74.429439
5735,SLAMS,2016,4,14-Nov-16,00:00,24 HOURS,0.00181,UG/CU METER (LC),L,0.00367,5231,40.462182,-74.429439
5736,SLAMS,2016,4,08-Nov-16,00:00,24 HOURS,0.00088,UG/CU METER (LC),L,0.00367,5231,40.462182,-74.429439
5737,SLAMS,2016,4,17-Nov-16,00:00,24 HOURS,0.01003,UG/CU METER (LC),L,0.00367,5231,40.462182,-74.429439


In [33]:
grouped_city= slams_program_df.groupby(["YEAR"])

year = pd.DataFrame(grouped_city["SAMPLE_VALUE_REPORTED"].max())

year.head()

Unnamed: 0_level_0,SAMPLE_VALUE_REPORTED
YEAR,Unnamed: 1_level_1
2016,0.0249


In [34]:
breakpoints_sensitive = chromium_df.loc[(chromium_df["SAMPLE_VALUE_REPORTED"]>35.5) &
                              (chromium_df["SAMPLE_VALUE_REPORTED"]<= 55.4)]
breakpoints_sensitive = pd.DataFrame(breakpoints_sensitive)
breakpoints_sensitive.head()

Unnamed: 0,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,DURATION_DESC,SAMPLE_VALUE_REPORTED,UNIT_DESC,SAMPLE_VALUE_STD_FINAL_TYPE,MDL_STD_UG_M3,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE
310,COMMUNITY-SCALE MONITORING,2008,2,05-Jun-08,00:00,24 HOURS,36.641,Nanograms/cubic meter (LC),L,2e-06,6127,40.817218,-74.043758
314,COMMUNITY-SCALE MONITORING,2008,1,25-Mar-08,00:00,24 HOURS,47.462,Nanograms/cubic meter (LC),L,2e-06,6127,40.817218,-74.043758
317,COMMUNITY-SCALE MONITORING,2008,2,12-Apr-08,00:00,24 HOURS,44.975,Nanograms/cubic meter (LC),L,2e-06,6127,40.817218,-74.043758
322,COMMUNITY-SCALE MONITORING,2008,2,12-May-08,00:00,24 HOURS,51.231,Nanograms/cubic meter (LC),L,2e-06,6127,40.817218,-74.043758
323,COMMUNITY-SCALE MONITORING,2008,2,18-May-08,00:00,24 HOURS,55.226,Nanograms/cubic meter (LC),L,2e-06,6127,40.817218,-74.043758


In [35]:
program_count = breakpoints_sensitive["PROGRAM"].value_counts()
program_count.head()

COMMUNITY-SCALE MONITORING    59
Name: PROGRAM, dtype: int64

In [36]:
year_count = breakpoints_sensitive["YEAR"].value_counts()
year_count

2008    45
2007    14
Name: YEAR, dtype: int64

In [37]:
locations = breakpoints_sensitive[['MONITOR_LATITUDE', 'MONITOR_LONGITUDE']]
weights = breakpoints_sensitive['SAMPLE_VALUE_REPORTED']
fig = gmaps.figure()
fig.add_layer(gmaps.heatmap_layer(locations, weights=weights))
fig

Figure(layout=FigureLayout(height='420px'))

In [39]:
breakpoints_unhealthy = chromium_df.loc[(chromium_df["SAMPLE_VALUE_REPORTED"]>=55.5) &
                              (chromium_df["SAMPLE_VALUE_REPORTED"]<= 150.4)]
breakpoints_unhealthy= pd.DataFrame(breakpoints_unhealthy)

breakpoints_unhealthy.head()

Unnamed: 0,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,DURATION_DESC,SAMPLE_VALUE_REPORTED,UNIT_DESC,SAMPLE_VALUE_STD_FINAL_TYPE,MDL_STD_UG_M3,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE
312,COMMUNITY-SCALE MONITORING,2008,1,13-Mar-08,00:00,24 HOURS,73.458,Nanograms/cubic meter (LC),L,2e-06,6127,40.817218,-74.043758
313,COMMUNITY-SCALE MONITORING,2008,1,19-Mar-08,00:00,24 HOURS,76.403,Nanograms/cubic meter (LC),L,2e-06,6127,40.817218,-74.043758
315,COMMUNITY-SCALE MONITORING,2008,1,31-Mar-08,00:00,24 HOURS,68.641,Nanograms/cubic meter (LC),L,2e-06,6127,40.817218,-74.043758
320,COMMUNITY-SCALE MONITORING,2008,2,30-Apr-08,00:00,24 HOURS,108.443,Nanograms/cubic meter (LC),L,2e-06,6127,40.817218,-74.043758
325,COMMUNITY-SCALE MONITORING,2008,3,11-Jul-08,00:00,24 HOURS,75.764,Nanograms/cubic meter (LC),L,2e-06,6127,40.817218,-74.043758


In [40]:
program_count = breakpoints_unhealthy["PROGRAM"].value_counts()
program_count

COMMUNITY-SCALE MONITORING    110
Name: PROGRAM, dtype: int64

In [41]:
year_count = breakpoints_unhealthy["YEAR"].value_counts()
year_count

2008    86
2007    24
Name: YEAR, dtype: int64

In [42]:
locations = breakpoints_unhealthy[['MONITOR_LATITUDE', 'MONITOR_LONGITUDE']]
weights = breakpoints_unhealthy['SAMPLE_VALUE_REPORTED']
fig = gmaps.figure()
fig.add_layer(gmaps.heatmap_layer(locations, weights=weights))
fig

Figure(layout=FigureLayout(height='420px'))

In [43]:
breakpoints_veryunhealthy = chromium_df.loc[(chromium_df["SAMPLE_VALUE_REPORTED"]>=150.5) &
                              (chromium_df["SAMPLE_VALUE_REPORTED"]<= 250.4)]

breakpoints_veryunhealthy.head()

Unnamed: 0,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,DURATION_DESC,SAMPLE_VALUE_REPORTED,UNIT_DESC,SAMPLE_VALUE_STD_FINAL_TYPE,MDL_STD_UG_M3,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE
318,COMMUNITY-SCALE MONITORING,2008,2,18-Apr-08,00:00,24 HOURS,243.557,Nanograms/cubic meter (LC),L,2e-06,6127,40.817218,-74.043758
319,COMMUNITY-SCALE MONITORING,2008,2,24-Apr-08,00:00,24 HOURS,158.336,Nanograms/cubic meter (LC),L,2e-06,6127,40.817218,-74.043758
321,COMMUNITY-SCALE MONITORING,2008,2,06-May-08,00:00,24 HOURS,160.772,Nanograms/cubic meter (LC),L,2e-06,6127,40.817218,-74.043758
346,COMMUNITY-SCALE MONITORING,2007,3,21-Sep-07,00:00,24 HOURS,157.163,Nanograms/cubic meter (LC),L,2e-06,6127,40.817218,-74.043758
359,COMMUNITY-SCALE MONITORING,2008,1,05-Feb-08,00:00,24 HOURS,189.486,Nanograms/cubic meter (LC),L,2e-06,6127,40.817218,-74.043758


In [44]:
program_count = breakpoints_veryunhealthy["PROGRAM"].value_counts()
program_count

COMMUNITY-SCALE MONITORING    15
Name: PROGRAM, dtype: int64

In [45]:
year_count = breakpoints_veryunhealthy["YEAR"].value_counts()
year_count

2008    14
2007     1
Name: YEAR, dtype: int64

In [46]:
locations = breakpoints_veryunhealthy[['MONITOR_LATITUDE', 'MONITOR_LONGITUDE']]
weights = breakpoints_veryunhealthy['SAMPLE_VALUE_REPORTED']
fig = gmaps.figure()
fig.add_layer(gmaps.heatmap_layer(locations, weights=weights))
fig

Figure(layout=FigureLayout(height='420px'))

In [47]:
breakpoints_hazardous = chromium_df.loc[(chromium_df["SAMPLE_VALUE_REPORTED"]>=250.5)]

breakpoints_hazardous

Unnamed: 0,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,DURATION_DESC,SAMPLE_VALUE_REPORTED,UNIT_DESC,SAMPLE_VALUE_STD_FINAL_TYPE,MDL_STD_UG_M3,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE
326,COMMUNITY-SCALE MONITORING,2008,3,03-Sep-08,00:00,24 HOURS,826.511,Nanograms/cubic meter (LC),L,2e-06,6127,40.817218,-74.043758


In [48]:
program_count = breakpoints_hazardous["PROGRAM"].value_counts()
program_count

COMMUNITY-SCALE MONITORING    1
Name: PROGRAM, dtype: int64

In [49]:
year_count = breakpoints_hazardous["YEAR"].value_counts()
year_count

2008    1
Name: YEAR, dtype: int64

In [50]:
locations = breakpoints_hazardous[['MONITOR_LATITUDE', 'MONITOR_LONGITUDE']]
weights = breakpoints_hazardous['SAMPLE_VALUE_REPORTED']
fig = gmaps.figure()
fig.add_layer(gmaps.heatmap_layer(locations, weights=weights))
fig

Figure(layout=FigureLayout(height='420px'))

In [52]:
coordinates = []
for index, row in chromium_df.iterrows(): 
    if (row['MONITOR_LATITUDE'], row['MONITOR_LONGITUDE']) not in coordinates:
        coordinates.append((row['MONITOR_LATITUDE'],row['MONITOR_LONGITUDE']))
# coordinates
cities = []
for coordinate_pair in coordinates:
    lat, lon = coordinate_pair
    cities.append(citipy.nearest_city(lat, lon))
# cities
city_names = []
for city in cities:
    city_names.append(city.city_name)
city_names

['brigantine',
 'secaucus',
 'camden',
 'camden',
 'little ferry',
 'secaucus',
 'highland park',
 'hopatcong',
 'new brunswick',
 'newark',
 'elizabeth']

In [54]:
city_data = pd.DataFrame({'Coordinates':coordinates,'City Name': city_names})
city_data
city_index = []
for index, row in chromium_df.iterrows():
    for cindex, crow in city_data.iterrows():
        if crow['Coordinates'][1] == row['MONITOR_LONGITUDE']:
            city_index.append(crow['City Name'])
            continue
chromium_df['City'] = city_index            
chromium_df.head()

Unnamed: 0,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,DURATION_DESC,SAMPLE_VALUE_REPORTED,UNIT_DESC,SAMPLE_VALUE_STD_FINAL_TYPE,MDL_STD_UG_M3,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE,City
0,IMPROVE,2014,4,14-Oct-14,00:00,24 HOURS,0.0001,UG/CU METER (LC),L,0.0001,3044,39.464872,-74.448736,brigantine
1,IMPROVE,2014,4,25-Nov-14,00:00,24 HOURS,0.0002,UG/CU METER (LC),L,0.0001,3044,39.464872,-74.448736,brigantine
2,IMPROVE,2014,3,02-Sep-14,00:00,24 HOURS,0.0005,UG/CU METER (LC),L,0.0001,3044,39.464872,-74.448736,brigantine
3,IMPROVE,2014,3,05-Sep-14,00:00,24 HOURS,0.0003,UG/CU METER (LC),L,0.0001,3044,39.464872,-74.448736,brigantine
4,IMPROVE,2014,3,08-Sep-14,00:00,24 HOURS,0.0001,UG/CU METER (LC),L,0.0001,3044,39.464872,-74.448736,brigantine


In [55]:
grouped_city= chromium_df.groupby(["City"])

city = pd.DataFrame(grouped_city["SAMPLE_VALUE_REPORTED"].max())

city.head()

Unnamed: 0_level_0,SAMPLE_VALUE_REPORTED
City,Unnamed: 1_level_1
brigantine,0.0112
camden,0.113
elizabeth,0.279
highland park,0.524
hopatcong,0.19


In [56]:
city_list = grouped_city['City'].value_counts()
city_list
city_list = ['Brigantine', 'Camden', 'Elizabeth', 
             'Highland Park', 'Hopatcong', 'Little Ferry', 'New Brunswick','Newark','Seacaucus']

In [59]:
plt.figure(figsize=(10,5))
plt.bar(city_list,grouped_city['SAMPLE_VALUE_REPORTED'].mean(), alpha=0.7)
# plt.xlim(1989,2017)
plt.title('Sample Final Mean by City (microg/c3)', size=20)
plt.xlabel('City', size=16)
plt.xticks(rotation=45)
plt.ylabel('Sample Value Mean', size=16)
# plt.savefig('Images/Sample_Final_Mean_by_City.png')

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Sample Value Mean')

In [60]:
grouped_city= chromium_df.groupby(["City", "YEAR", "PROGRAM"])

year = pd.DataFrame(grouped_city["SAMPLE_VALUE_REPORTED"].max())

year.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,SAMPLE_VALUE_REPORTED
City,YEAR,PROGRAM,Unnamed: 3_level_1
brigantine,1991,IMPROVE,0.00045
brigantine,1992,IMPROVE,0.00093
brigantine,1993,IMPROVE,0.00322
brigantine,1994,IMPROVE,0.00275
brigantine,1995,IMPROVE,0.00555


In [85]:
brigantine = pd.DataFrame(year.query('City == [\'brigantine\']'))

camden = pd.DataFrame(year.query('City == [\'camden\']'))

elizabeth = pd.DataFrame(year.query('City == [\'elizabeth\']'))

highland_park = pd.DataFrame(year.query('City == [\'highland park\']'))

little_ferry = pd.DataFrame(year.query('City == [\'little ferry\']'))

hopatcong = pd.DataFrame(year.query('City == [\'hopatcong\']'))

new_brunswick = pd.DataFrame(year.query('City == [\'new brunswick\']'))

newark = pd.DataFrame(year.query('City == [\'newark\']'))

secaucus = pd.DataFrame(year.query('City == [\'secaucus\']'))

# Please take into account Little Ferry and Secaucus

fig, axes = plt.subplots(nrows=3, ncols=3, figsize=(20,15))

brigantine.plot(kind='bar', color='black', ylim=(0,0.75), ax=axes[0,0], legend=False)
axes[0,0].set_title("Brigantine")

camden.plot(kind='bar', color='green', ylim=(0,0.75), ax=axes[0,1], legend=False)
axes[0,1].set_title("Camden")

elizabeth.plot(kind='bar', ylim=(0,0.75), ax=axes[0,2], legend=False)
axes[0,2].set_title("Elizabeth")

highland_park.plot(kind='bar', color='blue', ylim =(0,0.75), ax=axes[1,0], legend=False)
axes[1,0].set_title("Highland Park")

little_ferry.plot(kind='bar', color='gray', ax=axes[1,1], legend=False)
axes[1,1].set_title("Little Ferry")

hopatcong.plot(kind='bar', color='red', ylim = (0,0.75), ax=axes[1,2], legend=False)
axes[1,2].set_title("Hopatcong")

new_brunswick.plot(kind='bar', color = 'yellow', ylim =(0,0.75), ax=axes[2,0], legend=False)
axes[2,0].set_title("New Brunswick")

newark.plot(kind='bar', color='purple', ylim=(0,0.75), ax=axes[2,1], legend=False)
axes[2,1].set_title("Newark")

secaucus.plot(kind='bar', color='gray', ax=axes[2,2], legend=False)
axes[2,2].set_title("Secaucus")

fig.subplots_adjust(hspace=0.75)
plt.tight_layout()

<IPython.core.display.Javascript object>

In [89]:
year_list = [1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016]

fig, ax = plt.subplots(nrows=3, ncols=3, figsize=(20,15))

brigantine.plot(kind='scatter',x = 'SAMPLE_VALUE_REPORTED', y = 'YEAR' ,color='black', ylim=(0,0.75), ax=axes[0,0], legend=False)
# axes[0,0].set_title("Brigantine")

camden.plot(kind='scatter', y= year_list, color='green', ylim=(0,0.75), ax=axes[0,1], legend=False)
# axes[0,1].set_title("Camden")

elizabeth.plot(kind='scatter', y = year_list, ylim=(0,0.75), ax=axes[0,2], legend=False)
# axes[0,2].set_title("Elizabeth")

highland_park.plot(kind='scatter', y=year_list, color='blue', ylim =(0,0.75), ax=axes[1,0], legend=False)
# axes[1,0].set_title("Highland Park")

little_ferry.plot(kind='scatter', y = year_list, color='gray', ax=axes[1,1], legend=False)
# axes[1,1].set_title("Little Ferry")

hopatcong.plot(kind='scatter', y = year_list, color='red', ylim = (0,0.75), ax=axes[1,2], legend=False)
# axes[1,2].set_title("Hopatcong")

new_brunswick.plot(kind='scatter', y = year_list,  color = 'yellow', ylim =(0,0.75), ax=axes[2,0], legend=False)
# axes[2,0].set_title("New Brunswick")

newark.plot(kind='scatter', y = year_list, color='purple', ylim=(0,0.75), ax=axes[2,1], legend=False)
# axes[2,1].set_title("Newark")

secaucus.plot(kind='scatter', y= year_list, color='brown', ax=axes[2,2], legend=False)
# axes[2,2].set_title("Secaucus")

fig.subplots_adjust(hspace=0.75)
plt.tight_layout()

<IPython.core.display.Javascript object>

KeyError: 'YEAR'

# Benzene Data

In [90]:
benzenepath = "Database/Benzene/Benzene.csv"
benzene = pd.read_csv(benzenepath)
benzene.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,STATE_ABBR,AMA_SITE_CODE,AQS_POC,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,AQS_PARAMETER_CODE,AQS_PARAMETER_NAME,...,AQS_QUALIFIER_10,AQS_METHOD_CODE,SAMPLE_COLLECTION_DESC,SAMPLE_ANALYSIS_DESC,SAMPLE_VALUE_FLAG,BELOW_MDL_FLAG,CENSUS_TRACT_ID,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE
0,NJ,340390004,1,SLAMS,2016,3,16-Sep-16,17:00,45201,Benzene,...,,92,Tenax GR/Trap,Thermal Desorber GC/PID,,,34039030600,3685,40.64144,-74.208365
1,NJ,340390004,1,SLAMS,2016,3,18-Sep-16,15:00,45201,Benzene,...,,92,Tenax GR/Trap,Thermal Desorber GC/PID,,,34039030600,3685,40.64144,-74.208365
2,NJ,340390004,1,SLAMS,2016,3,17-Sep-16,07:00,45201,Benzene,...,,92,Tenax GR/Trap,Thermal Desorber GC/PID,,,34039030600,3685,40.64144,-74.208365
3,NJ,340390004,1,SLAMS,2016,3,17-Sep-16,06:00,45201,Benzene,...,,92,Tenax GR/Trap,Thermal Desorber GC/PID,,,34039030600,3685,40.64144,-74.208365
4,NJ,340390004,1,SLAMS,2016,3,17-Sep-16,05:00,45201,Benzene,...,,92,Tenax GR/Trap,Thermal Desorber GC/PID,,,34039030600,3685,40.64144,-74.208365


In [91]:
columns=['PROGRAM', 'YEAR', 'QUARTER',
       'SAMPLE_DATE', 'SAMPLE_START_TIME', 'DURATION_DESC',
       'SAMPLE_VALUE_REPORTED', 'AQS_UNIT_CODE', 'UNIT_DESC',
       'SAMPLING_FREQUENCY_CODE', 'SAMPLE_VALUE_STD_FINAL_UG_M3',
       'SAMPLE_VALUE_STD_FINAL_TYPE', 'MDL_STD_UG_M3',
       'MDL_TYPE', 'CENSUS_TRACT_POPULATION_2010', 'MONITOR_LATITUDE', 'MONITOR_LONGITUDE']
benzene_clean = benzene.filter(items=columns)

In [92]:
year_group = benzene_clean.groupby(by='YEAR')

In [93]:
year_group['SAMPLE_VALUE_STD_FINAL_UG_M3'].mean()

YEAR
1990    3.847527
1991    3.245165
1992    4.663434
1993    3.420350
1994    4.043942
1995    0.742762
1996    0.898580
1997    0.968244
1998    0.839327
1999    0.722443
2000    0.682980
2001    0.675213
2002    0.615717
2003    0.680170
2004    0.574209
2005    0.525133
2006    0.461789
2007    0.406142
2008    0.506511
2009    0.307327
2010    0.291549
2011    0.252588
2012    0.242723
2013    0.237689
2014    0.236264
2015    0.423141
2016    0.798671
Name: SAMPLE_VALUE_STD_FINAL_UG_M3, dtype: float64

In [94]:
year_list = year_group['YEAR'].unique()

In [117]:
plt.figure(figsize=(10,5))
plt.bar(year_list,year_group['SAMPLE_VALUE_STD_FINAL_UG_M3'].mean(), alpha=0.7)
plt.xlim(1989,2017)
plt.title('Sample Final Mean by Year (μg/m$^3$))', size=20)
plt.xlabel('Year', size=16)
plt.ylabel('Sample Value Mean', size=16)
# plt.savefig('Images/sample_final_mean.png')

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Sample Value Mean')

In [101]:
benzene_clean['Day'], benzene_clean['Month'], benzene_clean['Year Abbrev'] = benzene_clean['SAMPLE_DATE'].str.split('-', 2).str
benzene_clean.head()

Unnamed: 0,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,DURATION_DESC,SAMPLE_VALUE_REPORTED,AQS_UNIT_CODE,UNIT_DESC,SAMPLING_FREQUENCY_CODE,SAMPLE_VALUE_STD_FINAL_UG_M3,SAMPLE_VALUE_STD_FINAL_TYPE,MDL_STD_UG_M3,MDL_TYPE,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE,Day,Month,Year Abbrev
0,SLAMS,2016,3,16-Sep-16,17:00,1 HOUR,0.139,8,PARTS PER BILLION,1,0.456957,L,0.319486,FEDERAL_MDL,3685,40.64144,-74.208365,16,Sep,16
1,SLAMS,2016,3,18-Sep-16,15:00,1 HOUR,0.384,8,PARTS PER BILLION,1,1.214223,L,0.319486,FEDERAL_MDL,3685,40.64144,-74.208365,18,Sep,16
2,SLAMS,2016,3,17-Sep-16,07:00,1 HOUR,0.904,8,PARTS PER BILLION,1,2.986913,L,0.319486,FEDERAL_MDL,3685,40.64144,-74.208365,17,Sep,16
3,SLAMS,2016,3,17-Sep-16,06:00,1 HOUR,0.814,8,PARTS PER BILLION,1,2.71547,L,0.319486,FEDERAL_MDL,3685,40.64144,-74.208365,17,Sep,16
4,SLAMS,2016,3,17-Sep-16,05:00,1 HOUR,0.484,8,PARTS PER BILLION,1,1.617723,L,0.319486,FEDERAL_MDL,3685,40.64144,-74.208365,17,Sep,16


In [102]:
month_group = benzene_clean.groupby('Month')
month_group['SAMPLE_VALUE_STD_FINAL_UG_M3'].mean()

Month
Apr    0.772463
Aug    0.601456
Dec    1.120478
Feb    1.267400
Jan    1.449801
Jul    0.588440
Jun    0.581206
Mar    1.109304
May    0.790647
Nov    1.093993
Oct    0.898026
Sep    0.770132
Name: SAMPLE_VALUE_STD_FINAL_UG_M3, dtype: float64

In [103]:
month_list = ['Apr','Aug','Dec','Feb','Jan','Jul','Jun','Mar','May','Nov','Oct','Sep']
# month_list

In [116]:
plt.figure(figsize=(10,5))
plt.bar(month_list,month_group['SAMPLE_VALUE_STD_FINAL_UG_M3'].mean(), alpha=0.7)
# plt.xlim(1989,2017)
plt.title('Sample Final Mean by Month (μg/m$^3$)', size=20)
plt.xlabel('Month', size=16)
plt.ylabel('Sample Value Mean', size=16)

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Sample Value Mean')

In [106]:
coordinates = []
    
for index, row in benzene_clean.iterrows(): 
    if (row['MONITOR_LATITUDE'], row['MONITOR_LONGITUDE']) not in coordinates:
        coordinates.append((row['MONITOR_LATITUDE'],row['MONITOR_LONGITUDE']))
coordinates

[(40.64144, -74.208365),
 (40.601725, -74.441067),
 (40.652053, -74.199861),
 (40.598028, -74.453808),
 (40.462182, -74.429439),
 (40.283091999999996, -74.742644),
 (40.472825, -74.422403),
 (39.923041999999995, -75.097617),
 (40.720989, -74.192892),
 (40.67025, -74.126081),
 (40.726668, -74.143741),
 (39.83709, -75.24401),
 (40.915746999999996, -74.176733),
 (40.927458, -74.160611),
 (40.918381, -74.168092),
 (40.787628000000005, -74.676301),
 (39.934446, -75.125291),
 (40.853578999999996, -73.966212)]

In [107]:
cities = []
for coordinate_pair in coordinates:
    lat, lon = coordinate_pair
    cities.append(citipy.nearest_city(lat, lon))
cities

[<citipy.citipy.City at 0x11ae76160>,
 <citipy.citipy.City at 0x11ae7ce48>,
 <citipy.citipy.City at 0x11ae76160>,
 <citipy.citipy.City at 0x11ae7ce48>,
 <citipy.citipy.City at 0x11ae7ca20>,
 <citipy.citipy.City at 0x11ae76390>,
 <citipy.citipy.City at 0x11ae791d0>,
 <citipy.citipy.City at 0x11ae72358>,
 <citipy.citipy.City at 0x11ae7cc50>,
 <citipy.citipy.City at 0x11ae6fba8>,
 <citipy.citipy.City at 0x11ae76f98>,
 <citipy.citipy.City at 0x11aee58d0>,
 <citipy.citipy.City at 0x11ae7f588>,
 <citipy.citipy.City at 0x11ae7f588>,
 <citipy.citipy.City at 0x11ae7f588>,
 <citipy.citipy.City at 0x11ae794a8>,
 <citipy.citipy.City at 0x11ae72358>,
 <citipy.citipy.City at 0x11ae767b8>]

In [108]:
city_names = []
for city in cities:
    city_names.append(city.city_name)
city_names

['elizabeth',
 'north plainfield',
 'elizabeth',
 'north plainfield',
 'new brunswick',
 'ewing',
 'highland park',
 'camden',
 'newark',
 'bayonne',
 'harrison',
 'darby',
 'paterson',
 'paterson',
 'paterson',
 'hopatcong',
 'camden',
 'fort lee']

In [109]:
city_data = pd.DataFrame({'Coordinates':coordinates,'City Name': city_names})
city_data.head()

Unnamed: 0,Coordinates,City Name
0,"(40.64144, -74.208365)",elizabeth
1,"(40.601725, -74.441067)",north plainfield
2,"(40.652053, -74.199861)",elizabeth
3,"(40.598028, -74.453808)",north plainfield
4,"(40.462182, -74.429439)",new brunswick


In [110]:
city_index = []
for index, row in benzene_clean.iterrows():
    for cindex, crow in city_data.iterrows():
        if crow['Coordinates'][1] == row['MONITOR_LONGITUDE']:
            city_index.append(crow['City Name'])
            continue
            
benzene_clean['City'] = city_index            
benzene_clean.head()

Unnamed: 0,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,DURATION_DESC,SAMPLE_VALUE_REPORTED,AQS_UNIT_CODE,UNIT_DESC,SAMPLING_FREQUENCY_CODE,...,SAMPLE_VALUE_STD_FINAL_TYPE,MDL_STD_UG_M3,MDL_TYPE,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE,Day,Month,Year Abbrev,City
0,SLAMS,2016,3,16-Sep-16,17:00,1 HOUR,0.139,8,PARTS PER BILLION,1,...,L,0.319486,FEDERAL_MDL,3685,40.64144,-74.208365,16,Sep,16,elizabeth
1,SLAMS,2016,3,18-Sep-16,15:00,1 HOUR,0.384,8,PARTS PER BILLION,1,...,L,0.319486,FEDERAL_MDL,3685,40.64144,-74.208365,18,Sep,16,elizabeth
2,SLAMS,2016,3,17-Sep-16,07:00,1 HOUR,0.904,8,PARTS PER BILLION,1,...,L,0.319486,FEDERAL_MDL,3685,40.64144,-74.208365,17,Sep,16,elizabeth
3,SLAMS,2016,3,17-Sep-16,06:00,1 HOUR,0.814,8,PARTS PER BILLION,1,...,L,0.319486,FEDERAL_MDL,3685,40.64144,-74.208365,17,Sep,16,elizabeth
4,SLAMS,2016,3,17-Sep-16,05:00,1 HOUR,0.484,8,PARTS PER BILLION,1,...,L,0.319486,FEDERAL_MDL,3685,40.64144,-74.208365,17,Sep,16,elizabeth


In [111]:
city_group = benzene_clean.groupby('City')
city_group['SAMPLE_VALUE_STD_FINAL_UG_M3'].mean()

City
bayonne             0.789630
camden              0.944523
darby               1.068948
elizabeth           1.292370
ewing               0.480158
fort lee            0.642609
harrison            3.263275
highland park       0.770077
hopatcong           0.557511
new brunswick       0.407624
newark              0.662322
north plainfield    4.350398
paterson            0.464650
Name: SAMPLE_VALUE_STD_FINAL_UG_M3, dtype: float64

In [112]:
city_list = city_group['City'].value_counts()
city_list
city_list = ['Bayonne', 'Camden', 'Darby','Elizabeth','Ewing','Fort Lee','Harrison','Highland Park','Hopcatong','New Brunswick','Newark','North Plainfield','Paterson']

In [120]:
city_year_group = benzene_clean.groupby(by=['YEAR', 'City'])
city_year_mean = pd.DataFrame(city_year_group['SAMPLE_VALUE_STD_FINAL_UG_M3'].mean())
city_year_mean

Unnamed: 0_level_0,Unnamed: 1_level_0,SAMPLE_VALUE_STD_FINAL_UG_M3
YEAR,City,Unnamed: 2_level_1
1990,camden,4.219280
1990,harrison,2.939274
1990,north plainfield,3.475300
1991,camden,3.394541
1991,harrison,5.281108
...,...,...
2016,elizabeth,1.291118
2016,fort lee,0.642609
2016,hopatcong,0.339812
2016,new brunswick,0.171683


In [115]:
plt.figure(figsize=(10,5))
plt.bar(city_list,city_group['SAMPLE_VALUE_STD_FINAL_UG_M3'].mean(), alpha=0.7)
# plt.xlim(1989,2017)
plt.title('Sample Final Mean by City (microg/c3)', size=20)
plt.xlabel('City', size=16)
plt.xticks(rotation=45)
plt.ylabel('Sample Value Mean', size=16)
# plt.savefig('Images/sample_final_mean_city.png')

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Sample Value Mean')

In [121]:
elizabeth = pd.DataFrame(city_year_mean.query('City == [\'elizabeth\']'))
# elizabeth

camden = pd.DataFrame(city_year_mean.query('City == [\'camden\']'))
# camden

hopatcong = pd.DataFrame(city_year_mean.query('City == [\'hopatcong\']'))
# hopatcong

new_brunswick = pd.DataFrame(city_year_mean.query('City == [\'new brunswick\']'))
# new_brunswick

ewing = pd.DataFrame(city_year_mean.query('City == [\'ewing\']'))
# ewing

highland_park = pd.DataFrame(city_year_mean.query('City == [\'highland park\']'))
# highland_park

fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(20,15))
fig.suptitle("Air Quality Change Over Time (μg/$m^3$)", size = 24, va= 'top')

elizabeth.plot(kind='bar', ax=axes[0,0], ylim=(0,4.25), legend=False)
axes[0,0].set_title("Elizabeth")

camden.plot(kind='bar', color='black', ax=axes[0,1], ylim=(0,4.25), legend=False)
axes[0,1].set_title("Camden")

hopatcong.plot(kind='bar', color='red', ax=axes[0,2], ylim=(0,4.25), legend=False)
axes[0,2].set_title("Hopatcong")

new_brunswick.plot(kind='bar', color = 'green', ax=axes[1,0], ylim=(0,4.25), legend=False)
axes[1,0].set_title("New Brunswick")

highland_park.plot(kind='bar', color='gray', ax=axes[1,1], ylim=(0,4.25), legend=False)
axes[1,1].set_title("Highland Park")

ewing.plot(kind='bar', color='purple', ax=axes[1,2], ylim=(0,4.25), legend=False)
axes[1,2].set_title("Ewing")

# fig.subplots_adjust(hspace=0.45,wspace=0.1)
plt.tight_layout(pad=6)
# plt.savefig('Images/air_quality_change_city.png')

<IPython.core.display.Javascript object>

# Toluene Data

# Cadmium Data

In [122]:
cadmium_df = pd.read_csv("Database/Cadmium/Cadmium.csv.csv")
cadmium_df.head()

Unnamed: 0,STATE_ABBR,AMA_SITE_CODE,AQS_POC,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,AQS_PARAMETER_CODE,AQS_PARAMETER_NAME,...,AQS_QUALIFIER_10,AQS_METHOD_CODE,SAMPLE_COLLECTION_DESC,SAMPLE_ANALYSIS_DESC,SAMPLE_VALUE_FLAG,BELOW_MDL_FLAG,CENSUS_TRACT_ID,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE
0,NJ,340230006,5,PM2.5 SPECIATION NETWORK,2007,4,21-Oct-07,00:00,88110,Cadmium (PM2.5),...,,811,Met One SASS Teflon,Energy dispersive XRF,,Y,34023006206,1841,40.472825,-74.422403
1,NJ,340230006,5,PM2.5 SPECIATION NETWORK,2007,3,03-Sep-07,00:00,88110,Cadmium (PM2.5),...,,811,Met One SASS Teflon,Energy dispersive XRF,ND,,34023006206,1841,40.472825,-74.422403
2,NJ,340230006,5,PM2.5 SPECIATION NETWORK,2007,3,06-Sep-07,00:00,88110,Cadmium (PM2.5),...,,811,Met One SASS Teflon,Energy dispersive XRF,ND,,34023006206,1841,40.472825,-74.422403
3,NJ,340230006,5,PM2.5 SPECIATION NETWORK,2007,3,09-Sep-07,00:00,88110,Cadmium (PM2.5),...,,811,Met One SASS Teflon,Energy dispersive XRF,ND,,34023006206,1841,40.472825,-74.422403
4,NJ,340230006,5,PM2.5 SPECIATION NETWORK,2007,3,12-Sep-07,00:00,88110,Cadmium (PM2.5),...,,811,Met One SASS Teflon,Energy dispersive XRF,ND,,34023006206,1841,40.472825,-74.422403


In [123]:
cadmium_clean= cadmium_df.filter(items=['PROGRAM', 'YEAR', 'QUARTER',
       'SAMPLE_DATE', 'SAMPLE_START_TIME', 'DURATION_DESC',
       'SAMPLE_VALUE_REPORTED', 'AQS_UNIT_CODE', 'UNIT_DESC',
       'SAMPLING_FREQUENCY_CODE', 'SAMPLE_VALUE_STD_FINAL_UG_M3',
       'SAMPLE_VALUE_STD_FINAL_TYPE', 'MDL_STD_UG_M3',
       'CENSUS_TRACT_POPULATION_2010', 'MONITOR_LATITUDE', 'MONITOR_LONGITUDE'])
cadmium_clean.head()

Unnamed: 0,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,DURATION_DESC,SAMPLE_VALUE_REPORTED,AQS_UNIT_CODE,UNIT_DESC,SAMPLING_FREQUENCY_CODE,SAMPLE_VALUE_STD_FINAL_UG_M3,SAMPLE_VALUE_STD_FINAL_TYPE,MDL_STD_UG_M3,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE
0,PM2.5 SPECIATION NETWORK,2007,4,21-Oct-07,00:00,24 HOURS,0.00175,105,UG/CU METER (LC),3,0.00175,L,0.01,1841,40.472825,-74.422403
1,PM2.5 SPECIATION NETWORK,2007,3,03-Sep-07,00:00,24 HOURS,0.0,105,UG/CU METER (LC),3,0.0,L,0.017,1841,40.472825,-74.422403
2,PM2.5 SPECIATION NETWORK,2007,3,06-Sep-07,00:00,24 HOURS,0.0,105,UG/CU METER (LC),3,0.0,L,0.019,1841,40.472825,-74.422403
3,PM2.5 SPECIATION NETWORK,2007,3,09-Sep-07,00:00,24 HOURS,0.0,105,UG/CU METER (LC),3,0.0,L,0.019,1841,40.472825,-74.422403
4,PM2.5 SPECIATION NETWORK,2007,3,12-Sep-07,00:00,24 HOURS,0.0,105,UG/CU METER (LC),3,0.0,L,0.01,1841,40.472825,-74.422403


In [143]:
cadmium_24hours= cadmium_clean[cadmium_clean['DURATION_DESC'] == '24 HOURS']
cadmium_24hours.head()

Unnamed: 0,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,DURATION_DESC,SAMPLE_VALUE_REPORTED,AQS_UNIT_CODE,UNIT_DESC,SAMPLING_FREQUENCY_CODE,SAMPLE_VALUE_STD_FINAL_UG_M3,SAMPLE_VALUE_STD_FINAL_TYPE,MDL_STD_UG_M3,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE,City
0,PM2.5 SPECIATION NETWORK,2007,4,21-Oct-07,00:00,24 HOURS,0.00175,105,UG/CU METER (LC),3,0.00175,L,0.01,1841,40.472825,-74.422403,highland park
1,PM2.5 SPECIATION NETWORK,2007,3,03-Sep-07,00:00,24 HOURS,0.0,105,UG/CU METER (LC),3,0.0,L,0.017,1841,40.472825,-74.422403,highland park
2,PM2.5 SPECIATION NETWORK,2007,3,06-Sep-07,00:00,24 HOURS,0.0,105,UG/CU METER (LC),3,0.0,L,0.019,1841,40.472825,-74.422403,highland park
3,PM2.5 SPECIATION NETWORK,2007,3,09-Sep-07,00:00,24 HOURS,0.0,105,UG/CU METER (LC),3,0.0,L,0.019,1841,40.472825,-74.422403,highland park
4,PM2.5 SPECIATION NETWORK,2007,3,12-Sep-07,00:00,24 HOURS,0.0,105,UG/CU METER (LC),3,0.0,L,0.01,1841,40.472825,-74.422403,highland park


In [144]:
cadmium_years_24hours=cadmium_24hours.groupby(by='YEAR')
cadmium_years_24hours.head()

Unnamed: 0,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,DURATION_DESC,SAMPLE_VALUE_REPORTED,AQS_UNIT_CODE,UNIT_DESC,SAMPLING_FREQUENCY_CODE,SAMPLE_VALUE_STD_FINAL_UG_M3,SAMPLE_VALUE_STD_FINAL_TYPE,MDL_STD_UG_M3,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE,City
0,PM2.5 SPECIATION NETWORK,2007,4,21-Oct-07,00:00,24 HOURS,0.00175,105,UG/CU METER (LC),3,0.00175,L,0.010,1841,40.472825,-74.422403,highland park
1,PM2.5 SPECIATION NETWORK,2007,3,03-Sep-07,00:00,24 HOURS,0.00000,105,UG/CU METER (LC),3,0.00000,L,0.017,1841,40.472825,-74.422403,highland park
2,PM2.5 SPECIATION NETWORK,2007,3,06-Sep-07,00:00,24 HOURS,0.00000,105,UG/CU METER (LC),3,0.00000,L,0.019,1841,40.472825,-74.422403,highland park
3,PM2.5 SPECIATION NETWORK,2007,3,09-Sep-07,00:00,24 HOURS,0.00000,105,UG/CU METER (LC),3,0.00000,L,0.019,1841,40.472825,-74.422403,highland park
4,PM2.5 SPECIATION NETWORK,2007,3,12-Sep-07,00:00,24 HOURS,0.00000,105,UG/CU METER (LC),3,0.00000,L,0.010,1841,40.472825,-74.422403,highland park
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1603,PM2.5 SPECIATION NETWORK,2014,2,16-Jun-14,00:00,24 HOURS,0.00000,105,UG/CU METER (LC),3,0.00000,L,0.017,1841,40.472825,-74.422403,highland park
1625,PM2.5 SPECIATION NETWORK,2013,1,01-Jan-13,00:00,24 HOURS,0.01050,105,UG/CU METER (LC),3,0.01050,L,0.010,1841,40.472825,-74.422403,highland park
1713,PM2.5 SPECIATION NETWORK,2013,2,09-Jun-13,00:00,24 HOURS,0.00000,105,UG/CU METER (LC),3,0.00000,L,0.010,1841,40.472825,-74.422403,highland park
1714,PM2.5 SPECIATION NETWORK,2013,2,22-Apr-13,00:00,24 HOURS,0.00000,105,UG/CU METER (LC),3,0.00000,L,0.019,1841,40.472825,-74.422403,highland park


In [126]:
mean_values=cadmium_years_24hours['SAMPLE_VALUE_STD_FINAL_UG_M3'].mean()
new_df=pd.DataFrame(mean_values)
new_df=new_df.rename(columns={'SAMPLE_VALUE_STD_FINAL_UG_M3':'FINAL VALUE MEAN UG M3'})

In [127]:
njyears_at_risk= new_df[new_df['FINAL VALUE MEAN UG M3']>0.0018]
njyears_at_risk.head()

Unnamed: 0_level_0,FINAL VALUE MEAN UG M3
YEAR,Unnamed: 1_level_1
2002,0.002405
2003,0.002354
2004,0.002504
2005,0.002126
2009,0.002151


In [129]:
njyears_at_risk.plot(kind='bar', figsize=(10,5))
plt.title('(Cadmium) Years at Risk in NJ', fontsize=14)
plt.xlabel('Year', fontsize=14)
plt.ylabel('FINAL VALUE MEAN UG M3', fontsize=14)

<IPython.core.display.Javascript object>

Text(0, 0.5, 'FINAL VALUE MEAN UG M3')

In [130]:
coordinates = []
    
for index, row in cadmium_clean.iterrows(): 
    if (row['MONITOR_LATITUDE'], row['MONITOR_LONGITUDE']) not in coordinates:
        coordinates.append((row['MONITOR_LATITUDE'],row['MONITOR_LONGITUDE']))
coordinates

[(40.472825, -74.422403),
 (40.816809, -74.043673),
 (39.923041999999995, -75.097617),
 (39.934446, -75.125291),
 (40.833110999999995, -74.043459),
 (40.720989, -74.192892),
 (40.64144, -74.208365),
 (40.787628000000005, -74.676301),
 (40.462182, -74.429439)]

In [131]:
cities = []
for coordinate_pair in coordinates:
    lat, lon = coordinate_pair
    cities.append(citipy.nearest_city(lat, lon))
# cities
city_names = []
for city in cities:
    city_names.append(city.city_name)
city_names

['highland park',
 'secaucus',
 'camden',
 'camden',
 'little ferry',
 'newark',
 'elizabeth',
 'hopatcong',
 'new brunswick']

In [132]:
city_data = pd.DataFrame({'Coordinates':coordinates,'City Name': city_names})
city_data

Unnamed: 0,Coordinates,City Name
0,"(40.472825, -74.422403)",highland park
1,"(40.816809, -74.043673)",secaucus
2,"(39.923041999999995, -75.097617)",camden
3,"(39.934446, -75.125291)",camden
4,"(40.833110999999995, -74.043459)",little ferry
5,"(40.720989, -74.192892)",newark
6,"(40.64144, -74.208365)",elizabeth
7,"(40.787628000000005, -74.676301)",hopatcong
8,"(40.462182, -74.429439)",new brunswick


In [133]:
figure_layout = {
    'width': '800px',
    'height': '300px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'
}
fig = gmaps.figure(layout=figure_layout)

In [134]:
# Assign the marker layer to a variable
markers = gmaps.marker_layer(coordinates)
# Add the layer to the map
fig.add_layer(markers)
fig

Figure(layout=FigureLayout(border='1px solid black', height='300px', margin='0 auto 0 auto', padding='1px', wi…

In [142]:
city_index = []
for index, row in cadmium_clean.iterrows():
    for cindex, crow in city_data.iterrows():
        if crow['Coordinates'][1] == row['MONITOR_LONGITUDE']:
            city_index.append(crow['City Name'])
            continue
            
cadmium_clean['City'] = city_index            
cadmium_clean.head()

Unnamed: 0,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,DURATION_DESC,SAMPLE_VALUE_REPORTED,AQS_UNIT_CODE,UNIT_DESC,SAMPLING_FREQUENCY_CODE,SAMPLE_VALUE_STD_FINAL_UG_M3,SAMPLE_VALUE_STD_FINAL_TYPE,MDL_STD_UG_M3,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE,City
0,PM2.5 SPECIATION NETWORK,2007,4,21-Oct-07,00:00,24 HOURS,0.00175,105,UG/CU METER (LC),3,0.00175,L,0.01,1841,40.472825,-74.422403,highland park
1,PM2.5 SPECIATION NETWORK,2007,3,03-Sep-07,00:00,24 HOURS,0.0,105,UG/CU METER (LC),3,0.0,L,0.017,1841,40.472825,-74.422403,highland park
2,PM2.5 SPECIATION NETWORK,2007,3,06-Sep-07,00:00,24 HOURS,0.0,105,UG/CU METER (LC),3,0.0,L,0.019,1841,40.472825,-74.422403,highland park
3,PM2.5 SPECIATION NETWORK,2007,3,09-Sep-07,00:00,24 HOURS,0.0,105,UG/CU METER (LC),3,0.0,L,0.019,1841,40.472825,-74.422403,highland park
4,PM2.5 SPECIATION NETWORK,2007,3,12-Sep-07,00:00,24 HOURS,0.0,105,UG/CU METER (LC),3,0.0,L,0.01,1841,40.472825,-74.422403,highland park


In [145]:
cadmium_24hours=cadmium_24hours.dropna()
cadmium_24hours

Unnamed: 0,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,DURATION_DESC,SAMPLE_VALUE_REPORTED,AQS_UNIT_CODE,UNIT_DESC,SAMPLING_FREQUENCY_CODE,SAMPLE_VALUE_STD_FINAL_UG_M3,SAMPLE_VALUE_STD_FINAL_TYPE,MDL_STD_UG_M3,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE,City
0,PM2.5 SPECIATION NETWORK,2007,4,21-Oct-07,00:00,24 HOURS,0.00175,105,UG/CU METER (LC),3,0.00175,L,0.010,1841,40.472825,-74.422403,highland park
1,PM2.5 SPECIATION NETWORK,2007,3,03-Sep-07,00:00,24 HOURS,0.00000,105,UG/CU METER (LC),3,0.00000,L,0.017,1841,40.472825,-74.422403,highland park
2,PM2.5 SPECIATION NETWORK,2007,3,06-Sep-07,00:00,24 HOURS,0.00000,105,UG/CU METER (LC),3,0.00000,L,0.019,1841,40.472825,-74.422403,highland park
3,PM2.5 SPECIATION NETWORK,2007,3,09-Sep-07,00:00,24 HOURS,0.00000,105,UG/CU METER (LC),3,0.00000,L,0.019,1841,40.472825,-74.422403,highland park
4,PM2.5 SPECIATION NETWORK,2007,3,12-Sep-07,00:00,24 HOURS,0.00000,105,UG/CU METER (LC),3,0.00000,L,0.010,1841,40.472825,-74.422403,highland park
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7307,PM2.5 SPECIATION NETWORK,2007,4,14-Nov-07,00:00,24 HOURS,0.00000,105,UG/CU METER (LC),3,0.00000,L,0.019,1649,40.787628,-74.676301,hopatcong
7308,PM2.5 SPECIATION NETWORK,2007,4,17-Nov-07,00:00,24 HOURS,0.00000,105,UG/CU METER (LC),3,0.00000,L,0.017,1649,40.787628,-74.676301,hopatcong
7309,PM2.5 SPECIATION NETWORK,2007,4,20-Nov-07,00:00,24 HOURS,0.00000,105,UG/CU METER (LC),3,0.00000,L,0.010,1649,40.787628,-74.676301,hopatcong
7310,PM2.5 SPECIATION NETWORK,2007,4,26-Nov-07,00:00,24 HOURS,0.00024,105,UG/CU METER (LC),3,0.00024,L,0.010,1649,40.787628,-74.676301,hopatcong


In [147]:
city_group = cadmium_24hours.groupby(by=['YEAR','City'])
city_group.head()

Unnamed: 0,PROGRAM,YEAR,QUARTER,SAMPLE_DATE,SAMPLE_START_TIME,DURATION_DESC,SAMPLE_VALUE_REPORTED,AQS_UNIT_CODE,UNIT_DESC,SAMPLING_FREQUENCY_CODE,SAMPLE_VALUE_STD_FINAL_UG_M3,SAMPLE_VALUE_STD_FINAL_TYPE,MDL_STD_UG_M3,CENSUS_TRACT_POPULATION_2010,MONITOR_LATITUDE,MONITOR_LONGITUDE,City
0,PM2.5 SPECIATION NETWORK,2007,4,21-Oct-07,00:00,24 HOURS,0.00175,105,UG/CU METER (LC),3,0.00175,L,0.010,1841,40.472825,-74.422403,highland park
1,PM2.5 SPECIATION NETWORK,2007,3,03-Sep-07,00:00,24 HOURS,0.00000,105,UG/CU METER (LC),3,0.00000,L,0.017,1841,40.472825,-74.422403,highland park
2,PM2.5 SPECIATION NETWORK,2007,3,06-Sep-07,00:00,24 HOURS,0.00000,105,UG/CU METER (LC),3,0.00000,L,0.019,1841,40.472825,-74.422403,highland park
3,PM2.5 SPECIATION NETWORK,2007,3,09-Sep-07,00:00,24 HOURS,0.00000,105,UG/CU METER (LC),3,0.00000,L,0.019,1841,40.472825,-74.422403,highland park
4,PM2.5 SPECIATION NETWORK,2007,3,12-Sep-07,00:00,24 HOURS,0.00000,105,UG/CU METER (LC),3,0.00000,L,0.010,1841,40.472825,-74.422403,highland park
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7083,PM2.5 SPECIATION NETWORK,2008,3,19-Aug-08,00:00,24 HOURS,0.00000,105,UG/CU METER (LC),3,0.00000,L,0.010,1649,40.787628,-74.676301,hopatcong
7084,PM2.5 SPECIATION NETWORK,2008,3,05-Jul-08,00:00,24 HOURS,0.00000,105,UG/CU METER (LC),3,0.00000,L,0.019,1649,40.787628,-74.676301,hopatcong
7085,PM2.5 SPECIATION NETWORK,2008,3,08-Jul-08,00:00,24 HOURS,0.00000,105,UG/CU METER (LC),3,0.00000,L,0.019,1649,40.787628,-74.676301,hopatcong
7086,PM2.5 SPECIATION NETWORK,2008,3,14-Jul-08,00:00,24 HOURS,0.00467,105,UG/CU METER (LC),3,0.00467,L,0.010,1649,40.787628,-74.676301,hopatcong


In [148]:
city_df = pd.DataFrame(city_group['SAMPLE_VALUE_STD_FINAL_UG_M3'].mean())
city_df

Unnamed: 0_level_0,Unnamed: 1_level_0,SAMPLE_VALUE_STD_FINAL_UG_M3
YEAR,City,Unnamed: 2_level_1
2001,camden,0.002403
2001,elizabeth,0.001725
2001,highland park,0.001341
2001,hopatcong,0.002016
2002,camden,0.002494
...,...,...
2016,elizabeth,0.003252
2016,highland park,0.004366
2016,hopatcong,0.003459
2016,new brunswick,0.002672


In [149]:
#Get cities with Risk of getting cancer by Cadmium EPA at Risk levels by mean levels of the data duing the time evaluated
cadmiumcities_at_risk= city_df[city_df['SAMPLE_VALUE_STD_FINAL_UG_M3'] > 0.0018]
cadmiumcities_at_risk=cadmiumcities_at_risk.rename(columns={'SAMPLE_VALUE_STD_FINAL_UG_M3':'FINAL VALUE MEAN UG M3'})
cadmiumcities_at_risk.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,FINAL VALUE MEAN UG M3
YEAR,City,Unnamed: 2_level_1
2001,camden,0.002403
2001,hopatcong,0.002016
2002,camden,0.002494
2002,elizabeth,0.002805
2002,hopatcong,0.003017


In [151]:
cadmiumcities_at_risk.plot(kind='bar', figsize=(10,5))
plt.title('(Cadmium) Cities and Years at Risk in NJ', fontsize=14)
plt.xlabel('Year', fontsize=14)
plt.ylabel('FINAL VALUE MEAN UG M3', fontsize=14)

<IPython.core.display.Javascript object>

Text(0, 0.5, 'FINAL VALUE MEAN UG M3')

In [159]:
city_df_pivot=city_df.reset_index().pivot('YEAR','City','SAMPLE_VALUE_STD_FINAL_UG_M3')
city_df_pivot=city_df_pivot.fillna(0)

city_df_pivot

City,camden,elizabeth,highland park,hopatcong,new brunswick,newark,secaucus
YEAR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2001,0.002403,0.001725,0.001341,0.002016,0.0,0.0,0.0
2002,0.002494,0.002805,0.001767,0.003017,0.0,0.0,0.0
2003,0.001763,0.003077,0.001957,0.002705,0.0,0.0,0.0
2004,0.002728,0.002668,0.0023,0.002433,0.0,0.0,0.0
2005,0.002041,0.002181,0.002248,0.001966,0.0,0.0,0.0
2006,0.001851,0.002006,0.001617,0.001591,0.0,0.0,0.0
2007,0.000321,0.000425,0.000629,0.000874,0.0,0.0,0.0
2008,0.000743,0.001681,0.000878,0.001058,0.0,0.0,0.002031
2009,0.0,0.002028,0.002165,0.002253,0.0,0.0,0.0
2010,0.0,0.002301,0.002922,0.002728,0.0,0.002002,0.0


In [160]:

city_df_elizabeth=city_df_pivot['elizabeth']
city_df_camden=city_df_pivot['camden']
city_df_highland_park=city_df_pivot['highland park']
city_df_hopatcong=city_df_pivot['hopatcong']

In [162]:
fig=plt.figure(figsize=(10,5))
city_df_elizabeth.plot(x='YEAR', y='elizabeth', color="r", marker='o', markersize=5, linestyle="dashed", linewidth=0.50, label='elizabeth')
city_df_camden.plot(x='YEAR', y='camden', color="b", marker='^', markersize=5, linestyle="dashed", linewidth=0.50, label='camden')
city_df_highland_park.plot(x='YEAR', y='highland park', color="g", marker='s', markersize=5, linestyle="dashed", linewidth=0.50, label='highland park')
city_df_hopatcong.plot(x='YEAR', y='hopatcong', color="k", marker='d', markersize=5, linestyle="dashed", linewidth=0.50, label='hopatcong')
plt.title('Change of Mean Values Over Time',fontsize=12)
plt.xlabel('Time (Years)',fontsize=12)
plt.ylabel('FINAL VALUE MEAN UG M3',fontsize=10)
plt.legend(loc="best", fontsize="small", fancybox=True)
plt.show()

<IPython.core.display.Javascript object>

In [154]:
#Out of the City with Highest Risk Show change of Mean over the time evaluated
city_at_higher_risk=cadmium_24hours[cadmium_24hours['City'] == 'highland park']
city_at_higher_risk_year=city_at_higher_risk.groupby(by='YEAR')
city_at_higher_risk_df = pd.DataFrame(city_at_higher_risk_year['SAMPLE_VALUE_STD_FINAL_UG_M3'].mean())
city_at_higher_risk_df=city_at_higher_risk_df.rename(columns={'SAMPLE_VALUE_STD_FINAL_UG_M3':'FINAL VALUE MEAN UG M3'})
city_at_higher_risk_df
# city_at_higher_risk

Unnamed: 0_level_0,FINAL VALUE MEAN UG M3
YEAR,Unnamed: 1_level_1
2001,0.001341
2002,0.001767
2003,0.001957
2004,0.0023
2005,0.002248
2006,0.001617
2007,0.000629
2008,0.000878
2009,0.002165
2010,0.002922


In [156]:
year_list = ['2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011',
            '2012', '2013', '2014', '2015', '2016']

In [155]:
Final_value_mean=city_at_higher_risk_df['FINAL VALUE MEAN UG M3'].values
Final_value_mean

array([0.00134137, 0.00176671, 0.00195727, 0.00229973, 0.00224786,
       0.00161702, 0.0006287 , 0.00087795, 0.00216494, 0.00292157,
       0.00288884, 0.00162296, 0.00140616, 0.00192406, 0.00214506,
       0.00436608])

In [158]:
#Show change of mean Value of City with highest risk level over the time evaluated
plt.figure(figsize=(10,5))
plt.plot(year_list, Final_value_mean, marker='o')
plt.title('Highest at Risk City (Highland Park)', fontsize=14)
plt.xlabel('Year', fontsize=14)
plt.ylabel('FINAL VALUE MEAN UG M3', fontsize=14)
plt.grid(True)
plt.show()

<IPython.core.display.Javascript object>