## Config

In [2]:
# parameters
INPUT_DATA_PATH = "../data/interim/"
INPUT_PROCESSED_DATA_PATH = "../data/processed/"

YEARS = [2018,2019,2020,2021,2022,2023,2024]

SAVE_OUTPUT = True
OUTPUT_DATA_PATH = "../data/interim/"


In [3]:
# Import all necessary libraries
import time
start = time.time()
import geopandas as gpd
import pandas as pd
from functools import reduce
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
import plotly.express as px

import warnings
from pathlib import Path


# Load data

In [4]:
#Read observations by hour 
data_all_years = []
for YEAR in YEARS:
    counters_hour = pd.read_parquet(Path(INPUT_DATA_PATH) / f'cleaned_data{YEAR}.parquet')
    data_all_years.append(counters_hour)

counters_hour = pd.concat(data_all_years)


# Explore the data (shape, columns, head, etc)
print(counters_hour.shape)
print(counters_hour.columns)
counters_hour.head()

(11319864, 11)
Index(['Id_aforament', 'date', 'hour', 'intensity', 'prev_hour_count',
       'Daily_Counts', 'duration', 'Number_of_hrs_data_collected', 'day',
       'month', 'Number_of_days_data_collected'],
      dtype='object')


Unnamed: 0,Id_aforament,date,hour,intensity,prev_hour_count,Daily_Counts,duration,Number_of_hrs_data_collected,day,month,Number_of_days_data_collected
0,20005,2018-12-01,0,132.0,,1835.0,1,24,1,12,30
1,20005,2018-12-01,1,72.0,,1835.0,1,24,1,12,30
2,20005,2018-12-01,2,34.0,,1835.0,1,24,1,12,30
3,20005,2018-12-01,3,19.0,,1835.0,1,24,1,12,30
4,20005,2018-12-01,4,12.0,,1835.0,1,24,1,12,30


## Fix formats

In [5]:
counters_hour.dtypes

Id_aforament                              int32
date                             datetime64[ns]
hour                                     object
intensity                               float64
prev_hour_count                         float64
Daily_Counts                            float64
duration                                  int64
Number_of_hrs_data_collected              int64
day                                       int32
month                                     int32
Number_of_days_data_collected             int64
dtype: object

In [6]:
counters_hour.rename(columns={'Id_aforament': 'id'}, inplace=True)
counters_hour["intensity"] = counters_hour["intensity"].astype(int)
#Make date column datetime
counters_hour['date'] = pd.to_datetime(counters_hour['date'])
counters_hour["weekday"] = counters_hour["date"].dt.weekday
counters_hour["hour"] = counters_hour["hour"].astype(int)
counters_hour["year"] = counters_hour["date"].dt.year

# Visualize data

## Visualize by year

In [7]:
# Aggregate hours by year
counters_by_year = counters_hour.groupby(['hour', 'year']).agg({'intensity': 'mean', 'id': pd.Series.nunique}).reset_index()

# Visualize the data using Plotly Express
fig = px.line(counters_by_year, x='hour', y='intensity', color='year', 
              title='Average intensity by hour of the day for each year', markers=True,
              hover_data={'id': True})
fig.update_layout(xaxis_title='Hour', yaxis_title='Intensity')
fig.show()


In [8]:
counters_by_year

Unnamed: 0,hour,year,intensity,id
0,0,2018,41.147107,159
1,0,2019,43.599085,219
2,0,2020,28.322915,240
3,0,2021,45.318087,256
4,0,2022,71.950377,319
...,...,...,...,...
163,23,2020,45.591106,240
164,23,2021,66.422983,256
165,23,2022,98.310990,319
166,23,2023,111.112276,347


## Visualize by month

In [9]:
counters_by_month = counters_hour.groupby(['hour', 'month']).agg({'intensity': 'mean'}).reset_index()
# Visualize the data using Plotly Express
fig = px.line(counters_by_month, x='hour', y='intensity', color='month', 
              title='Average intensity by hour of the day for each month', markers=True,
              color_discrete_sequence=px.colors.cyclical.Phase
) 
fig.update_layout(xaxis_title='Hour', yaxis_title='Intensity')
fig.show()


## By weekday

In [16]:

# Aggregate hours by weekday
counters_by_weekday = counters_hour.groupby(['hour', 'weekday']).agg({'intensity': 'mean'}).reset_index()

# Visualize the data using Plotly Express with a divergent color palette
fig = px.line(counters_by_weekday, x='hour', y='intensity', color='weekday', title='Average intensity by hour of the day for each weekday', markers=True,
              color_discrete_sequence=px.colors.diverging.Spectral
)
fig.update_layout(xaxis_title='Hour', yaxis_title='Intensity')
fig.show()


# Save output