In [163]:
import pandas as pd
import altair as alt
import datetime as dt
from datetime import timedelta

alt.renderers.enable('default')
df = pd.read_csv("../appa_data_26-07-2022.csv", encoding='windows-1252')

pollutant_limit = {
    "Ossido di Carbonio": 10, # daily mean maximum, correct
    "Biossido di Azoto": 200, # daily mean mixxing, used hour, incorrect, aprroximated
    "Biossido Zolfo": 125,    # daily mean maximum, correct
    "Ozono": 120,             # daily mean maximum, correct
    "PM10": 50,               # daily mean maximum, correct
    "PM2.5": 25               # daily mean missing, used year, incorrect, approximated
}

In [141]:

############################
# DATA AND INDEX REPLACING #
############################

# replace hour 24 with 00
df.loc[df["Ora"] == 24, "Ora"] = 00
# add 0 before numbers with 1 digit
df["Ora"] = df["Ora"].astype(str).str.zfill(2)
# add Date and Hour, then convert to datetime object
df["Data"] = pd.to_datetime(
    df["Data"] + " " + df["Ora"],
    format="%Y-%m-%d %H"
)

# remove useless columns
df.drop(["Ora", "Unit� di misura"], axis=1, inplace=True)

# fill NaN values with -1
df.replace("n.d.", -1, inplace=True)
df.fillna(-1, inplace=True)
df["Valore"] = df["Valore"].apply(float)

df.set_index("Data", inplace = True)


# renaming of axis
df.rename(
    columns={
        "Stazione": "station",
        "Inquinante": "pollutant",
        "Valore": "value"
    },
    inplace=True
)
df.index.rename("date", inplace=True)

df

Unnamed: 0_level_0,station,pollutant,value
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-06-26 01:00:00,Parco S. Chiara,PM10,18.0
2022-06-26 02:00:00,Parco S. Chiara,PM10,17.0
2022-06-26 03:00:00,Parco S. Chiara,PM10,17.0
2022-06-26 04:00:00,Parco S. Chiara,PM10,16.0
2022-06-26 05:00:00,Parco S. Chiara,PM10,17.0
...,...,...,...
2022-07-26 20:00:00,Monte Gaza,Ozono,155.0
2022-07-26 21:00:00,Monte Gaza,Ozono,148.0
2022-07-26 22:00:00,Monte Gaza,Ozono,137.0
2022-07-26 23:00:00,Monte Gaza,Ozono,126.0


In [142]:
####################################################
# PER HOUR MEAN OF EACH POLLUTANT FOR EACH STATION #
####################################################
daily_mean_data = pd.DataFrame({
    "station": [],
    "pollutant": []
})
daily_mean_data.index.rename("date", inplace = True)

for station in df["station"].unique():
    for pollutant in df[df["station"] == station]["pollutant"].unique():
        mean_df = df[
            (df["station"] == station) &
            (df["pollutant"] == pollutant)
        ].reset_index().groupby(
            by = pd.Grouper(
                key  = "date",
                freq = "D"
            )
        ).mean()
        mean_df.insert(1, "pollutant", pollutant)
        mean_df.insert(1, "station", station)

        daily_mean_data = pd.concat([daily_mean_data, mean_df])

daily_mean_data

Unnamed: 0_level_0,station,pollutant,value
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-06-26,Parco S. Chiara,PM10,18.125000
2022-06-27,Parco S. Chiara,PM10,31.916667
2022-06-28,Parco S. Chiara,PM10,36.708333
2022-06-29,Parco S. Chiara,PM10,21.166667
2022-06-30,Parco S. Chiara,PM10,26.083333
...,...,...,...
2022-07-22,Monte Gaza,Ozono,168.541667
2022-07-23,Monte Gaza,Ozono,157.916667
2022-07-24,Monte Gaza,Ozono,144.541667
2022-07-25,Monte Gaza,Ozono,168.375000


In [143]:
######################################################
# CONVERT READINGS WITH PERCENTAGES OF MAXIMUM LEVEL #
######################################################
station_data_percentage = daily_mean_data.copy().reset_index()

val = station_data_percentage["value"]
pol = station_data_percentage["pollutant"]
for i in range(len(station_data_percentage)):
    station_data_percentage.loc[i, "value"] = val[i]/pollutant_limit[pol[i]]

station_data_percentage

Unnamed: 0,date,station,pollutant,value
0,2022-06-26,Parco S. Chiara,PM10,0.362500
1,2022-06-27,Parco S. Chiara,PM10,0.638333
2,2022-06-28,Parco S. Chiara,PM10,0.734167
3,2022-06-29,Parco S. Chiara,PM10,0.423333
4,2022-06-30,Parco S. Chiara,PM10,0.521667
...,...,...,...,...
860,2022-07-22,Monte Gaza,Ozono,1.404514
861,2022-07-23,Monte Gaza,Ozono,1.315972
862,2022-07-24,Monte Gaza,Ozono,1.204514
863,2022-07-25,Monte Gaza,Ozono,1.403125


In [173]:
###################
# DATE SEPARATION #
###################

today = dt.date(2022, 7, 10)

data_before = station_data_percentage[station_data_percentage["date"] <= pd.to_datetime(today)]
data_previs = station_data_percentage[station_data_percentage["date"] >= pd.to_datetime(today)]

print(data_before)
print(data_previs)

          date          station pollutant     value
0   2022-06-26  Parco S. Chiara      PM10  0.362500
1   2022-06-27  Parco S. Chiara      PM10  0.638333
2   2022-06-28  Parco S. Chiara      PM10  0.734167
3   2022-06-29  Parco S. Chiara      PM10  0.423333
4   2022-06-30  Parco S. Chiara      PM10  0.521667
..         ...              ...       ...       ...
844 2022-07-06       Monte Gaza     Ozono  0.999603
845 2022-07-07       Monte Gaza     Ozono  1.044792
846 2022-07-08       Monte Gaza     Ozono  0.866319
847 2022-07-09       Monte Gaza     Ozono  0.902431
848 2022-07-10       Monte Gaza     Ozono  0.913194

[417 rows x 4 columns]
          date          station pollutant     value
14  2022-07-10  Parco S. Chiara      PM10  0.235000
15  2022-07-11  Parco S. Chiara      PM10  0.277500
16  2022-07-12  Parco S. Chiara      PM10  0.370833
17  2022-07-13  Parco S. Chiara      PM10  0.354167
18  2022-07-14  Parco S. Chiara      PM10  0.439167
..         ...              ...       ..

In [174]:
##########################
# GRAPH DATA PREPARATION #
##########################

x_axis_config = alt.X(
    "date:T",
    axis = alt.Axis(
        title = "Date",
        format = "%e %B %Y",
        labelAngle = -35
    ),
    scale = alt.Scale(
        domain = [
            pd.to_datetime(today - timedelta(days = 3)),
            pd.to_datetime(today + timedelta(days = 3)),
        ]
    )
)

y_axis_config = alt.Y(
    "value:Q",
    axis = alt.Axis(
        title = "Legal limit percentage",
        format = "%"
    ),
    scale = alt.Scale(domain = [0, 2]) # set y view range from 0 to 200%
)


In [175]:
#########################
# MAIN GRAPH GENERATION #
#########################

def get_station_past_graph(station):
    station_data = data_before[
        data_before["station"] == station
    ]

    # define main chart
    return alt.Chart(
        station_data
    ).mark_line().encode(
        x = x_axis_config,
        y = y_axis_config,
        color = "pollutant"
    ).interactive(
        bind_y = False
    ).properties(
        width = 1000,
        height = 500
    )

def get_station_future_graph(station):
    station_data = data_previs[
        data_previs["station"] == station
    ]

    # define main chart
    return alt.Chart(
        station_data
    ).mark_line(
        strokeDash = [1,1]
    ).encode(
        x = x_axis_config,
        y = y_axis_config,
        color = "pollutant"
    ).interactive(
        bind_y = False
    ).properties(
        width = 300,
        height = 150
    )


In [176]:
########################
# GENERATE STATIC BARS #
########################

# create a line basically with equation y = 1
# with red color and width of 5
fixed_line_data_frame = pd.DataFrame({'y': [1]})
horizont_fixed_line = alt.Chart(
    fixed_line_data_frame
).mark_rule(
    strokeWidth = 5,
    color = "red"
).encode(
    y = "y"
)

# create a vertical line where the last record is
vertical_fixed_line = alt.Chart(
    pd.DataFrame({
        "Date": pd.to_datetime(today),
        "color": ["green"]
    })
).mark_rule(
    strokeDash = [1,1]
).encode(
  x = "Date:T"
)


In [177]:
#################
# GRAPH DISPLAY #
#################

for station in station_data_percentage["station"].unique():
    past_chart = get_station_past_graph(station)
    future_chart = get_station_future_graph(station)

    # sum the charts and display them
    alt.layer(
        horizont_fixed_line,
        vertical_fixed_line,
        past_chart,
        future_chart
    ).configure_axis(
        labelFontSize = 15,
        titleFontSize = 20
    ).configure_legend(
        orient = "right",
        labelFontSize = 20
    ).display()