In [161]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import seaborn as sns

from utils import *

datasets_folder = './datasets'
starting_date = datetime(2019,1,1)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
air_pollution_dataset = os.path.join(datasets_folder,'pollution/pollution.csv')

air_pollution_df = pd.read_csv(air_pollution_dataset, sep=';')
air_pollution_df.rename(columns={
    'COD_STAZ': 'Station', 
    'AGENTE': 'Agent', 
    'DATA_INIZIO': 'Date', 
    'DATA_FINE': 'Ending_date', 
    'VALORE': 'Value', 
    'UM': 'Unit'
}, inplace=True)

air_pollution_df.drop(columns=['Ending_date'], inplace=True)
air_pollution_df=air_pollution_df.sort_values(by='Date')
air_pollution_df['Date'] = pd.to_datetime(air_pollution_df['Date'].apply(lambda x: ' '.join(x.split('T')).split('+')[0]))


air_pollution_df.head(3)

Unnamed: 0,Station,Agent,Date,Value,Unit
6726,GIARDINI MARGHERITA,PM2.5,2019-01-01 00:01:00,37.0,ug/m3
8067,GIARDINI MARGHERITA,PM10,2019-01-01 00:01:00,50.0,ug/m3
419631,PORTA SAN FELICE,PM2.5,2019-01-01 00:01:00,41.0,ug/m3


## Station estimations

In [181]:
giardini_df = air_pollution_df[air_pollution_df['Station'] == 'GIARDINI MARGHERITA']
san_felice_df = air_pollution_df[air_pollution_df['Station'] == 'PORTA SAN FELICE']
chiarini_df = air_pollution_df[air_pollution_df['Station'] == 'VIA CHIARINI']

### Giardini Margherita

In [None]:
def resample_agents(df):
    agents = np.unique(df['Agent'])
    dfs = []
    
    for agent in agents:
        agent_df = df[df['Agent'] == agent]
        agent_df_resampled = agent_df.resample('1h', on='Date').mean(numeric_only=True)
        dfs.append(agent_df_resampled)
    
    return pd.concat(dfs, ignore_index=True)

In [182]:
giardini_df

Unnamed: 0,Station,Agent,Date,Value,Unit
6726,GIARDINI MARGHERITA,PM2.5,2019-01-01 00:01:00,37.0,ug/m3
8067,GIARDINI MARGHERITA,PM10,2019-01-01 00:01:00,50.0,ug/m3
139658,GIARDINI MARGHERITA,O3 (OZONO),2019-01-01 00:01:00,2.0,ug/m3
14649,GIARDINI MARGHERITA,NO2 (BIOSSIDO DI AZOTO),2019-01-01 00:01:00,29.0,ug/m3
19857,GIARDINI MARGHERITA,O3 (OZONO),2019-01-01 01:01:00,6.0,ug/m3
...,...,...,...,...,...
217052,GIARDINI MARGHERITA,O3 (OZONO),2024-12-31 21:01:00,31.0,ug/m3
359903,GIARDINI MARGHERITA,NO2 (BIOSSIDO DI AZOTO),2024-12-31 22:01:00,22.0,ug/m3
217054,GIARDINI MARGHERITA,O3 (OZONO),2024-12-31 22:01:00,23.0,ug/m3
247963,GIARDINI MARGHERITA,NO2 (BIOSSIDO DI AZOTO),2024-12-31 23:01:00,21.0,ug/m3


In [180]:
giardini_df=resample_agents(giardini_df)
giardini_df

Unnamed: 0,Value
0,29.0
1,
2,23.0
3,29.0
4,26.0
...,...
210381,
210382,
210383,
210384,


In [178]:
start=datetime(2019,1,1)
end=datetime(2024,12,31)

for agent in np.unique(giardini_df['Agent']):
    mask=(giardini_df['Agent']==agent)&(giardini_df['Date']>start)&(giardini_df['Date']<end)
    timedeltas_hours = giardini_df['Date'][mask].diff().dt.total_seconds()[1:] / 3600  # Convert to hours

    plt.figure(figsize=(40, 5))
    plt.plot(giardini_df['Date'][mask][1:], timedeltas_hours, marker='o', linestyle='-')
    plt.xlabel('Datetime')
    plt.ylim(0,max(timedeltas_hours)+1)
    plt.ylabel('Time Difference (hours)')
    plt.title(f'{agent}')
    plt.xticks(rotation=45)  # Rotate labels for better readability
    plt.grid(True)
    plt.show()

KeyError: 'Agent'

### Porta San Felice

### Via Chiarini