In [2]:
## importing libraries
import pandas as pd
import matplotlib.pyplot as plt
import altair as alt
from altair import datum
import openpyxl

In [3]:
## cleaning data

#carbon monoxide and airquality data
carbonmonoxideaqi = pd.read_csv("CarbonMonoxideAQIValuesuffockcounty2022.csv")
#removing columbs
columntoremovecoa = ['Source', 'Site ID']
carbonmonoxideaqi = carbonmonoxideaqi.drop(columntoremovecoa, axis=1)

# pm2.5 airquality data
pm25aqi = pd.read_csv("pm2.5 air quality.csv")
#removing columbs
columntoremovecoa = ['Source', 'Site ID', 'Main Pollutant']
pm25aqi = pm25aqi.drop(columntoremovecoa, axis=1)

# pm10 airquality data
pm10aqi = pd.read_csv("pm10 air quality.csv")
#removing columbs
columntoremovecoa = ['Source', 'Site ID', 'Main Pollutant']
pm10aqi = pm10aqi.drop(columntoremovecoa, axis=1)

# Ozone airquality data
ozoneaqi = pd.read_csv("ozone air quality.csv")
#removing columbs
columntoremovecoa = ['Source', 'Site ID', 'Main Pollutant']
ozoneaqi = ozoneaqi.drop(columntoremovecoa, axis=1)

# NO2 airquality data
no2aqui = pd.read_csv("Nitrogen monoxide air quality data.csv")
#removing columbs
columntoremovecoa = ['Source', 'Site ID', 'Main Pollutant']
no2aqui = no2aqui.drop(columntoremovecoa, axis=1)

# Merge DataFrames based on matching columns 'Date' and 'Site Name'
merged_df = pd.merge(pm25aqi, pm10aqi, on=['Date', 'Site Name'], how='outer')
merged_df1 = pd.merge(ozoneaqi, no2aqui, on=['Date', 'Site Name'], how='outer')
merged_df2 = pd.merge(merged_df1, merged_df, on=['Date', 'Site Name'], how='outer')
merged_df3 = pd.merge(merged_df2, carbonmonoxideaqi, on=['Date', 'Site Name'], how='outer')
merged_df3['Site Name'] = merged_df3['Site Name'].replace('VON HILLERN ST', 'Von Hillern ST')
merged_df3['Site Name'] = merged_df3['Site Name'].replace('DUDLEY SQUARE ROXBURY', 'Dudley Square Roxbury')
merged_df3['Site Name'] = merged_df3['Site Name'].replace('BOSTON KENMORE SQ', 'Boston Kenmore SQ')

merged_df3

Unnamed: 0,Date,Ozone AQI Value,Site Name,Nitrogen Dioxide AQI Value,PM2.5 AQI Value,PM10 AQI Value,Carbon Monoxide AQI Value,Main Pollutant
0,01/01/2022,9.0,Dudley Square Roxbury,24.0,,,,
1,01/02/2022,29.0,Dudley Square Roxbury,16.0,,,,
2,01/03/2022,27.0,Dudley Square Roxbury,,,,,
3,01/06/2022,22.0,Dudley Square Roxbury,22.0,,,,
4,01/07/2022,24.0,Dudley Square Roxbury,,,,,
...,...,...,...,...,...,...,...,...
963,11/29/2022,,Von Hillern ST,,,,5.0,
964,12/03/2022,,Von Hillern ST,,,,6.0,
965,12/04/2022,,Von Hillern ST,,,,7.0,
966,12/06/2022,,Von Hillern ST,,,,20.0,


In [4]:
# Melt the DataFrame to convert the pollutants into a single column
melted_df = merged_df3.melt(id_vars=['Site Name', 'Date'], var_name='Pollutant', value_name='AQI Value')
#Dropping empty AQI Levels and site name
melted_df = melted_df.dropna(subset=['AQI Value'])
melted_df = melted_df[melted_df['Site Name'] != 'Not Available']
melted_df.to_csv('melted_data.csv', index=False)

melted_df

Unnamed: 0,Site Name,Date,Pollutant,AQI Value
0,Dudley Square Roxbury,01/01/2022,Ozone AQI Value,9.0
1,Dudley Square Roxbury,01/02/2022,Ozone AQI Value,29.0
2,Dudley Square Roxbury,01/03/2022,Ozone AQI Value,27.0
3,Dudley Square Roxbury,01/06/2022,Ozone AQI Value,22.0
4,Dudley Square Roxbury,01/07/2022,Ozone AQI Value,24.0
...,...,...,...,...
4835,Von Hillern ST,11/29/2022,Carbon Monoxide AQI Value,5.0
4836,Von Hillern ST,12/03/2022,Carbon Monoxide AQI Value,6.0
4837,Von Hillern ST,12/04/2022,Carbon Monoxide AQI Value,7.0
4838,Von Hillern ST,12/06/2022,Carbon Monoxide AQI Value,20.0


In [5]:
#Dropping empty AQI Levels and site name
merged_df4 = merged_df3.dropna(subset=['Nitrogen Dioxide AQI Value'])
merged_df4 = merged_df4.dropna(subset=['Ozone AQI Value'])
merged_df4 = merged_df4[merged_df4['Site Name'] != 'Not Available']

#merged_df4

In [11]:
# Define the brush selection
brush = alt.selection_interval(encodings=['x', 'y'])

# Base chart for scatter plot
base = alt.Chart(merged_df4).mark_circle().encode(
    x=alt.X('Date:T', axis=alt.Axis(title='Month', format='%b'), timeUnit='monthdate'),
    opacity=alt.condition(brush,alt.value(1.0),alt.value(0.5))
).add_params(
    brush
).properties(
    width=400,
    height=250
)




# Scatter plot for Nitrogen Dioxide AQI Value
max_index = merged_df4['Nitrogen Dioxide AQI Value'].idxmax()

# Scatter plot for Nitrogen Dioxide AQI Value
chart_nitrogen = base.encode(
    y='Nitrogen Dioxide AQI Value',
    color=alt.condition(
        alt.datum['Nitrogen Dioxide AQI Value'] == alt.expr.max(alt.datum['Ozone AQI Value']),
        alt.value('blue'),  
        alt.value('green')  
    

),tooltip=[alt.Tooltip('Date:T', title='Date', format='%d/%m/%y'),'Nitrogen Dioxide AQI Value:Q']
                        
                            )

# Scatter plot for Ozone AQI Value
chart_ozone = base.encode(
    y='Ozone AQI Value', 
    color=alt.condition(
    alt.datum['Ozone AQI Value'] == alt.expr.max(alt.datum['Nitrogen Dioxide AQI Value']),
    alt.value('blue'),  
    alt.value('green')
),tooltip=[alt.Tooltip('Date:T', title='Date', format='%d/%m/%y'), 'Ozone AQI Value:Q']
                         )



# Combine scatter plots vertically
chart_nitrogen_and_ozone = alt.hconcat(chart_nitrogen, chart_ozone)

# Data tables
ranked_text = alt.Chart(merged_df4).mark_text(align='center').encode(
    y=alt.Y('row_number:O').axis(None)
).transform_filter(
    brush
).transform_window(
    row_number='row_number()'
).transform_filter(
    alt.datum.row_number < 15
)



name = ranked_text.encode(text='Site Name:N').properties(
    width=200,
    title=alt.Title(text='Site Name', align='center')
)
ndAQI = ranked_text.encode(text='Nitrogen Dioxide AQI Value:N').properties(
    width=100,
    title=alt.Title(text='Nitrogen Dioxide AQI Value', align='center')
)
ozAQI = ranked_text.encode(text='Ozone AQI Value:N').properties(
    width=100,
    title=alt.Title(text='Ozone AQI Value', align='center')
)
date = ranked_text.encode(text=alt.Text('Date:T', format='%m/%d/%y')).properties(
    width=100,
    title=alt.Title(text='Date', align='center')
)

text = alt.hconcat(date) # Combine data tables
text2 = alt.hconcat(ndAQI, ozAQI)
text3 = alt.hconcat(text, text2)

# Combine scatter plots and data tables horizontally
chart_nitrogen_and_ozone_with_table = alt.vconcat(chart_nitrogen_and_ozone, text3)

# Set the title
chart_nitrogen_and_ozone_with_table = chart_nitrogen_and_ozone_with_table.properties(
    title="Nitrogen AQI value and Ozone Dioxide AQI value in Boston- 2022"
)

chart_nitrogen_and_ozone_with_table

## Design Justification:

### Popout:
For our popout we decided to single out specific data points where the two AQI values are equal because these could be of potential significance. Perhaps these values being the same could have been caused by something. Nitrogen Dioxide contributes to Ozone production so exploring this relationship is important, and more important is seeing how they can equal out as singled out by the popout color choice of the two specific data points on the graph.

### Color Choices:
We chose the color green to represent all our data points as it is fits thematically with the concept of the environment. 

### Brushing & Linking:
The brushing and linking in this chart is important to show how the values vary day to day. It easily selects the same day or time frame on both charts enabling easy comprehension. Additionally, we included a table at the bottom to synthesize that information as well. 

### Tooltips:
Within each scatterplot we also included tooltips because we felt it would make it more user friendly as you can see specific value of data points if you just are browsing over the graph.