In [None]:
from os.path import join

import pandas as pd
import plotly.graph_objs as go
from plotly.subplots import make_subplots

from config import data_raw_dir

# Load air quality data

In [None]:
filename = join(data_raw_dir, "urban-platform-air-quality-2022.csv")

In [None]:
df = pd.read_csv(
    filename,
    header=0,
    index_col="timestamp",
    parse_dates=True,
    infer_datetime_format=True,
    low_memory=False,
)

In [None]:
df.isna().sum()

# Group by location name

In [None]:
# Grouping by 'name' column
locations = df.groupby("name")

# Creating a dictionary of sub-DataFrames
sub_dfs = {location_name: location_df for location_name, location_df in locations}

In [None]:
sub_dfs.keys()

# EDA - Parque da Cidade

In [None]:
pc = sub_dfs["Parque da Cidade"]
pc = pc.drop(
    columns=[
        "latitude",
        "longitude",
        "name",
        "entity_id",
        "entity_type",
        "date_observed",
        "time_observed",
    ]
)

In [None]:
pc.head(1)

In [None]:
pc.tail(1)

In [None]:
pc.columns

In [None]:
fig = make_subplots(
    rows=len(pc.columns), cols=1, shared_xaxes=True, subplot_titles=pc.columns
)

for i, col in enumerate(pc.columns, start=1):
    fig.add_trace(
        go.Scatter(x=pc.index, y=pc[col], mode="lines", name=col), row=i, col=1
    )

fig.update_layout(
    title="Air Quality 2022 - Parque da Cidade",
    showlegend=False,
    height=1000,  # Adjust the height of the plot
)

for i, col_name in enumerate(pc.columns, start=1):
    fig.update_yaxes(title_text=col_name, row=i, col=1)

fig.show()