In [2]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy.stats as st

In [3]:
# Importing data files

pollution_data_df = pd.read_csv("Resources/pollution_2006_2010.csv")

pollution_data_df

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,County,City,Year,NO2 Units,NO2 Mean,NO2 1st Max Value,NO2 1st Max Hour,NO2 AQI,...,SO2 Units,SO2 Mean,SO2 1st Max Value,SO2 1st Max Hour,SO2 AQI,CO Units,CO Mean,CO 1st Max Value,CO 1st Max Hour,CO AQI
0,216292,216292,Contra Costa,Concord,2006,Parts per billion,9.478261,21.0,2,20,...,Parts per billion,0.045455,1.0,18,1.0,Parts per million,0.395652,0.7,9,
1,216293,216293,Contra Costa,Concord,2006,Parts per billion,9.478261,21.0,2,20,...,Parts per billion,0.045455,1.0,18,1.0,Parts per million,0.388889,0.5,6,6.0
2,216294,216294,Contra Costa,Concord,2006,Parts per billion,9.478261,21.0,2,20,...,Parts per billion,0.042857,0.3,20,,Parts per million,0.395652,0.7,9,
3,216295,216295,Contra Costa,Concord,2006,Parts per billion,9.478261,21.0,2,20,...,Parts per billion,0.042857,0.3,20,,Parts per million,0.388889,0.5,6,6.0
4,216296,216296,Contra Costa,Concord,2006,Parts per billion,11.521739,20.0,8,19,...,Parts per billion,0.227273,2.0,5,3.0,Parts per million,0.413043,0.6,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
181235,397527,397527,Solano,Vallejo,2010,Parts per billion,11.956522,30.7,6,28,...,Parts per billion,0.385714,0.7,23,,Parts per million,0.287500,0.5,23,6.0
181236,397528,397528,Solano,Vallejo,2010,Parts per billion,12.943478,23.6,0,22,...,Parts per billion,0.834783,1.8,1,1.0,Parts per million,0.578261,1.6,1,
181237,397529,397529,Solano,Vallejo,2010,Parts per billion,12.943478,23.6,0,22,...,Parts per billion,0.834783,1.8,1,1.0,Parts per million,0.641667,1.2,5,14.0
181238,397530,397530,Solano,Vallejo,2010,Parts per billion,12.943478,23.6,0,22,...,Parts per billion,0.757143,1.5,2,,Parts per million,0.578261,1.6,1,


In [4]:
# Removing decimal and zero from 'Year' column
pollution_data_df['Year'] = pollution_data_df['Year'].astype(str).replace('\.0', '', regex=True)
pollution_data_df.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,County,City,Year,NO2 Units,NO2 Mean,NO2 1st Max Value,NO2 1st Max Hour,NO2 AQI,...,SO2 Units,SO2 Mean,SO2 1st Max Value,SO2 1st Max Hour,SO2 AQI,CO Units,CO Mean,CO 1st Max Value,CO 1st Max Hour,CO AQI
0,216292,216292,Contra Costa,Concord,2006,Parts per billion,9.478261,21.0,2,20,...,Parts per billion,0.045455,1.0,18,1.0,Parts per million,0.395652,0.7,9,
1,216293,216293,Contra Costa,Concord,2006,Parts per billion,9.478261,21.0,2,20,...,Parts per billion,0.045455,1.0,18,1.0,Parts per million,0.388889,0.5,6,6.0
2,216294,216294,Contra Costa,Concord,2006,Parts per billion,9.478261,21.0,2,20,...,Parts per billion,0.042857,0.3,20,,Parts per million,0.395652,0.7,9,
3,216295,216295,Contra Costa,Concord,2006,Parts per billion,9.478261,21.0,2,20,...,Parts per billion,0.042857,0.3,20,,Parts per million,0.388889,0.5,6,6.0
4,216296,216296,Contra Costa,Concord,2006,Parts per billion,11.521739,20.0,8,19,...,Parts per billion,0.227273,2.0,5,3.0,Parts per million,0.413043,0.6,0,


####  Relationship between years and pollutants:

####  1- Years verses NO2 AQI (Nitrogen dioxide Air Quality Index):

In [5]:
avg_no2_df = pollution_data_df.groupby(['Year']).agg({'NO2 AQI': [np.mean]})
avg_no2_df

Unnamed: 0_level_0,NO2 AQI
Unnamed: 0_level_1,mean
Year,Unnamed: 1_level_2
2006,25.066858
2007,23.675629
2008,23.720774
2009,22.838768
2010,21.96083


In [6]:
# rename columns
avg_no2_df.columns = ['NO2_AQI_Mean']
avg_no2_df

Unnamed: 0_level_0,NO2_AQI_Mean
Year,Unnamed: 1_level_1
2006,25.066858
2007,23.675629
2008,23.720774
2009,22.838768
2010,21.96083


In [7]:
avg_no2_df = avg_no2_df.reset_index()
avg_no2_df

Unnamed: 0,Year,NO2_AQI_Mean
0,2006,25.066858
1,2007,23.675629
2,2008,23.720774
3,2009,22.838768
4,2010,21.96083


In [8]:
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np

# Use DataFrame.plot() in order to create a bar chart of the data
avg_no2_df.plot(kind="bar", figsize=(8,5))

# Give our chart some labels and a tile
plt.title("Years Vs. NO2 AQI Levels for California State")
plt.xlabel("Years")
plt.ylabel("NO2 AQI Levels")
plt.show()
plt.tight_layout()

plt.savefig('mq_charts/years_no2.png')

<IPython.core.display.Javascript object>

####  2- Years verses SO2 AQI (Sulphur dioxide Air Quality Index):

In [9]:
avg_so2_df = pollution_data_df.groupby(['Year']).agg({'SO2 AQI': [np.mean]})
avg_so2_df

Unnamed: 0_level_0,SO2 AQI
Unnamed: 0_level_1,mean
Year,Unnamed: 1_level_2
2006,4.758542
2007,3.937925
2008,3.550363
2009,2.835863
2010,2.085824


In [10]:
# rename columns
avg_so2_df.columns = ['SO2_AQI_Mean']
avg_so2_df

Unnamed: 0_level_0,SO2_AQI_Mean
Year,Unnamed: 1_level_1
2006,4.758542
2007,3.937925
2008,3.550363
2009,2.835863
2010,2.085824


In [11]:
avg_so2_df = avg_so2_df.reset_index()
avg_so2_df

Unnamed: 0,Year,SO2_AQI_Mean
0,2006,4.758542
1,2007,3.937925
2,2008,3.550363
3,2009,2.835863
4,2010,2.085824


In [12]:
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np

# Use DataFrame.plot() in order to create a bar chart of the data
avg_so2_df.plot(kind="bar", figsize=(8,5))

# Give our chart some labels and a tile
plt.title("Years Vs. SO2 AQI Levels for California State")
plt.xlabel("Years")
plt.ylabel("SO2 AQI Levels")
plt.show()
plt.tight_layout()

plt.savefig('mq_charts/years_so2.png')

<IPython.core.display.Javascript object>

####  3- Years verses CO AQI (Carbon monoxide Air Quality Index):

In [13]:
avg_co_df = pollution_data_df.groupby(['Year']).agg({'CO AQI': [np.mean]})
avg_co_df

Unnamed: 0_level_0,CO AQI
Unnamed: 0_level_1,mean
Year,Unnamed: 1_level_2
2006,7.403125
2007,6.735081
2008,6.336745
2009,6.420749
2010,5.539193


In [14]:
# rename columns
avg_co_df.columns = ['CO_AQI_Mean']
avg_co_df

Unnamed: 0_level_0,CO_AQI_Mean
Year,Unnamed: 1_level_1
2006,7.403125
2007,6.735081
2008,6.336745
2009,6.420749
2010,5.539193


In [15]:
avg_co_df = avg_co_df.reset_index()
avg_co_df

Unnamed: 0,Year,CO_AQI_Mean
0,2006,7.403125
1,2007,6.735081
2,2008,6.336745
3,2009,6.420749
4,2010,5.539193


In [16]:
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np

# Use DataFrame.plot() in order to create a bar chart of the data
avg_co_df.plot(kind="bar", figsize=(8,5))

# Give our chart some labels and a tile
plt.title("Years Vs. CO AQI Levels for California State")
plt.xlabel("Years")
plt.ylabel("CO AQI Levels")
plt.show()
plt.tight_layout()

plt.savefig('mq_charts/years_co.png')

<IPython.core.display.Javascript object>

####  4- Years verses O3 AQI (Ozone Air Quality Index):

In [17]:
avg_o3_df = pollution_data_df.groupby(['Year']).agg({'O3 AQI': [np.mean]})
avg_o3_df

Unnamed: 0_level_0,O3 AQI
Unnamed: 0_level_1,mean
Year,Unnamed: 1_level_2
2006,34.933054
2007,34.514307
2008,36.653721
2009,35.336751
2010,33.543562


In [18]:
# rename columns
avg_o3_df.columns = ['O3_AQI_Mean']
avg_o3_df

Unnamed: 0_level_0,O3_AQI_Mean
Year,Unnamed: 1_level_1
2006,34.933054
2007,34.514307
2008,36.653721
2009,35.336751
2010,33.543562


In [19]:
avg_o3_df = avg_o3_df.reset_index()
avg_o3_df

Unnamed: 0,Year,O3_AQI_Mean
0,2006,34.933054
1,2007,34.514307
2,2008,36.653721
3,2009,35.336751
4,2010,33.543562


In [20]:
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np

# Use DataFrame.plot() in order to create a bar chart of the data
avg_o3_df.plot(kind="bar", figsize=(8,5))

# Give our chart some labels and a tile
plt.title("Years Vs. O3 AQI Levels for California State")
plt.xlabel("Years")
plt.ylabel("O3 AQI Levels")
plt.show()
plt.tight_layout()

plt.savefig('mq_charts/years_o3.png')

<IPython.core.display.Javascript object>

In [21]:
co = list(avg_co_df['CO_AQI_Mean'])
so2 = list(avg_so2_df['SO2_AQI_Mean'])
no2 = list(avg_no2_df['NO2_AQI_Mean'])
o3 = list(avg_o3_df['O3_AQI_Mean'])
year = list(avg_o3_df['Year'])

In [22]:
df = pd.DataFrame({
   'carbon monoxide': co,
   'sulphur dioxide': so2,
    'nitrogen dioxide': no2,
    'ozone': o3   
   }, index=year)
lines = df.plot.line(marker='D', title='Average Pollutants Air Quality Index Vs. Years')
plt.legend(loc='best', bbox_to_anchor=(1, 0.5))

lines.set_xlabel("Years (2006 - 2010)")
lines.set_ylabel("Pollutants AQI levels")

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Pollutants AQI levels')

In [23]:
plt.savefig('mq_charts/years_avg_pollutants.png')