In [1]:
import sys
from io import StringIO
from pathlib import Path

import altair as alt
import folium
import geopandas as gpd
import numpy as np
import pandas as pd
import streamlit as st
from branca.colormap import linear
from folium.plugins import HeatMap, HeatMapWithTime, MarkerCluster

import utils

alt.renderers.set_embed_options(format_locale="pt-BR", time_format_locale="pt-BR")
sys.path.append(str(Path().resolve()))

In [2]:
from importlib import reload

reload(utils)

<module 'utils' from 'c:\\Users\\lucas\\OneDrive\\Desktop\\Desafio_IHC\\src\\utils\\__init__.py'>

In [3]:
# Lê o arquivo CSV removendo aspas duplas
# O CSV original é um CSV "sujo", criado com Excel, que adiciona aspas duplas
# em torno de cada campo, o que causa problemas na leitura.
with open(
    "../data/dados_exemplo_poluentes_no_acentos.csv", "r", encoding="utf-8-sig"
) as f:
    linhas = [linha.replace('"', "") for linha in f]

# Converte as linhas limpas para um buffer em memória
csv_buffer = StringIO("".join(linhas))

# Lê o CSV normalmente após a limpeza
df = pd.read_csv(csv_buffer, sep=",")

# Remove a coluna "unit" se todos os valores forem iguais
# Neste caso, todos os valores são "mg/L", então a coluna é desnecessária
if "unit" in df.columns and df["unit"].nunique() == 1:
    df = df.drop(columns=["unit"])

# Converte as colunas de latitude e longitude para o tipo numérico
df["lat"] = pd.to_numeric(df["lat"], errors="coerce")
df["lon"] = pd.to_numeric(df["lon"], errors="coerce")

# Remove Estacao do nome das estações e converte para categoria
df["station_name"] = df["station_name"].str.replace("Estacao", "")
df["station_name"] = df["station_name"].str.strip()
df["station_name"] = df["station_name"].astype("category")

# Converte a coluna de data para o tipo datetime
df["sample_dt"] = pd.to_datetime(df["sample_dt"], format="%Y-%m-%d")

# Remove a coluna station_id, já que station_name é suficiente para identificar os pontos de coleta
# Cada station_id tem um station_name único
df = df.drop(columns=["station_id"])

# Transforma o DataFrame para o formato longo (long format) para facilitar a plotagem
# com bibliotecas de visualização
df_longo = df.melt(
    id_vars=["station_name", "lat", "lon", "sample_dt"],
    value_vars=["pol_a", "pol_b"],
    var_name="pollutant",
    value_name="value",
)

gdf = gpd.GeoDataFrame(
    df, geometry=gpd.points_from_xy(df["lon"], df["lat"]), crs="EPSG:4326"
)

gdf_longo = gpd.GeoDataFrame(
    df_longo,
    geometry=gpd.points_from_xy(df_longo["lon"], df_longo["lat"]),
    crs="EPSG:4326",
)

In [4]:
df

Unnamed: 0,station_name,lat,lon,sample_dt,pol_a,pol_b
0,L1,-22.717512,-41.511239,2025-03-02,3.15,7.15
1,H1,-22.995127,-43.843707,2025-05-01,9.98,3.03
2,B1,-22.314549,-41.720023,2025-04-01,7.60,5.63
3,M1,-20.947215,-41.693095,2025-05-01,5.11,104.76
4,Z1,-21.097152,-42.468495,2025-03-02,6.45,8.29
...,...,...,...,...,...,...
254,D1,-20.949884,-43.151648,2025-03-02,6.12,3.11
255,J2,-22.849573,-43.616518,2025-03-02,9.19,9.61
256,M1,-20.947215,-41.693095,2025-03-02,9.43,99.75
257,T1,-20.983199,-41.918786,2025-01-01,2.88,0.87


In [5]:
# Criar GeoDataFrame
geo = gpd.GeoDataFrame(gdf, geometry="geometry", crs="EPSG:4326")
pol_a_log = np.log(geo["pol_a"])
pol_b_log = np.log(geo["pol_b"])
# Colormap para pol_a
colormap = linear.BuGn_06.scale(pol_a_log.min(), pol_a_log.max())
# colormap = linear.BuGn_06.scale(geo["pol_a"].min(), geo["pol_a"].max())
colormap = colormap.to_step(n=10)

m = folium.Map(location=[-15, -55], zoom_start=4)
marker_cluster = MarkerCluster().add_to(m)

for idx, row in geo.iterrows():
    folium.CircleMarker(
        location=[row["lat"], row["lon"]],
        radius=6,
        fill=True,
        color=colormap(row["pol_a"]),
        fill_opacity=0.7,
        popup=folium.Popup(
            f"""
            <b>Estação:</b> {row["station_name"].replace("Estacao", "")}<br>
            <b>Poluente A:</b> {row["pol_a"]}<br>
            <b>Poluente B:</b> {row["pol_b"]}<br>
            <b>Data:</b> {row["sample_dt"]}
        """,
            max_width=250,
        ),
    ).add_to(marker_cluster)

colormap.caption = "ln do poluente A (mg/L)"
colormap.add_to(m)

m

In [6]:
estados_sudeste = ["ES", "MG", "RJ", "SP"]
municipios_sudeste = utils.json_municipios(estados_sudeste)

pontos_coleta = (
    gdf[["station_name", "geometry"]].drop_duplicates().reset_index(drop=True)
)

pontos_coleta_municipios = gpd.sjoin(
    municipios_sudeste, pontos_coleta, how="inner", predicate="intersects"
).drop(columns="index_right")

pontos_coleta_municipios = utils.gpd_merge(
    gdf,
    pontos_coleta_municipios[["station_name", "city", "state"]],
    on="station_name",
    how="left",
)

non_cat_cols = pontos_coleta_municipios.select_dtypes(exclude=["category"]).columns
pontos_coleta_municipios[non_cat_cols] = pontos_coleta_municipios[non_cat_cols].fillna(
    "N/A"
)

In [7]:
utils.cria_mapa_geodataframe(
    pontos_coleta_municipios,
    cols=["station_name", "city", "state"],
    aliases=["Estação", "Cidade", "Estado"],
)


In [8]:
df_longo_formatado = df_longo.copy(deep=True)
df_longo_formatado["pollutant"] = df_longo_formatado["pollutant"].replace(
    {"pol_a": "A", "pol_b": "B"}
)

source = df_longo_formatado[df_longo["station_name"] == "M1"]

points = (
    alt.Chart(source)
    .mark_line(point=alt.OverlayMarkDef(filled=False, fill="white"))
    .encode(
        x=alt.X("sample_dt:T", axis=alt.Axis(format="%d %b", title="Data")),
        y=alt.Y("value:Q", title="Valor (mg/L)"),
        color=alt.Color(
            "pollutant:N",
            legend=alt.Legend(title="Poluente"),
            scale=alt.Scale(range=["green", "purple"]),
        ),
        tooltip=[
            alt.Tooltip("station_name:N", title="Estação"),
            alt.Tooltip("sample_dt:T", title="Data da coleta", format="%d-%m-%Y"),
            alt.Tooltip("value:Q", title="Valor (mg/L)"),
        ],
    )
).add_params()

points


In [9]:
locale = utils.carrega_locale_altair("pt-BR")


In [10]:
def cria_mapa_com_graficos(
    df_mapa: gpd.GeoDataFrame, df_longo: pd.DataFrame
) -> folium.Map:
    centroide = utils.obtem_centroide(pontos=pontos_coleta_municipios["geometry"])
    m = folium.Map(location=centroide, zoom_start=8)

    df_longo["pollutant"] = df_longo["pollutant"].replace({"pol_a": "A", "pol_b": "B"})
    marker_group = folium.FeatureGroup(name="Coletas").add_to(m)
    for _, row in df_mapa.iterrows():
        nome_estacao = row["station_name"]
        cidade = row["city"]
        estado = row["state"]
        lat = row.geometry.y
        lon = row.geometry.x

        dados_estacao = df_longo[df_longo["station_name"] == nome_estacao]

        if dados_estacao.empty:
            continue

        # Cria o gráfico exibido no popup
        chart = (
            alt.Chart(dados_estacao)
            .mark_line(point=alt.OverlayMarkDef(filled=False, fill="white"))
            .encode(
                x=alt.X("sample_dt:T", axis=alt.Axis(format="%d %b", title="Data")),
                y=alt.Y("value:Q", title="Valor (mg/L)"),
                color=alt.Color(
                    "pollutant:N",
                    legend=alt.Legend(title="Poluente"),
                    scale=alt.Scale(range=["green", "purple"]),
                ),
                tooltip=[
                    alt.Tooltip("station_name:N", title="Estação"),
                    alt.Tooltip(
                        "sample_dt:T", title="Data da coleta", format="%d-%m-%Y"
                    ),
                    alt.Tooltip("value:Q", title="Valor (mg/L)"),
                ],
            )
            .properties(
                width=280,
                height=220,
                title=(
                    f"Estação {nome_estacao}"
                    f"{'' if cidade == 'N/A' else f' - {cidade}'}"
                    f"{'' if estado == 'N/A' else f'/{estado}'}"
                ),
            )
            .configure_title(fontSize=14, font="Courier", color="gray", anchor="start")
            .configure_legend(labelFontSize=10, titleFontSize=12)
        ).configure(locale=locale)

        vega = folium.VegaLite(chart, width=400, height=260)
        popup = folium.Popup(max_width=400)
        vega.add_to(popup)

        tooltip = f"<b>Estação:</b> {nome_estacao}<br><b>Cidade:</b> {cidade}<br><b>Estado:</b> {estado}"

        # Cria os marcadores no mapa

        marker = folium.Marker(
            location=(lat, lon),
            icon=folium.Icon(icon="flask", prefix="fa", color="green"),
            tooltip=tooltip,
            popup=popup,
        )

        # Adiciona o popup ao marcador e o marcador ao mapa
        popup.add_to(marker)
        marker.add_to(marker_group)

    m.add_child(marker_group)
    return m


In [11]:
m = cria_mapa_com_graficos(pontos_coleta_municipios, df_longo)
# m.save("mapa_com_graficos.html")

In [12]:
pontos_coleta_municipios_longo = pontos_coleta_municipios.melt(
    id_vars=["station_name", "lat", "lon", "sample_dt"],
    value_vars=["pol_a", "pol_b"],
    var_name="pollutant",
    value_name="value",
)

pontos_coleta_municipios_longo["sample_dt"] = pd.to_datetime(
    pontos_coleta_municipios_longo["sample_dt"], format="%d/%m/%Y"
)

pontos_coleta_municipios_longo = pontos_coleta_municipios_longo.sort_values("sample_dt")

In [13]:
pol_a_longo = pontos_coleta_municipios_longo[
    pontos_coleta_municipios_longo["pollutant"] == "pol_a"
]

map_data = []
time_data = []

for date, sample in pol_a_longo.groupby("sample_dt"):
    map_data.append(sample[["lat", "lon", "value"]].values.tolist())
    time_data.append(date.strftime(format="%d/%m/%Y"))

# m2 = folium.Map(
#     location=[pol_a_longo["lat"].mean(), pol_a_longo["lon"].mean()], zoom_start=10
# )

HeatMapWithTime(data=map_data, index=time_data, name="Poluente A", radius=100).add_to(m)

folium.LayerControl().add_to(m)
m.save("heatmap_pol_a.html")

# Streamlit

In [14]:
from streamlit_jupyter import StreamlitPatcher, tqdm

StreamlitPatcher().jupyter()


ModuleNotFoundError: No module named 'streamlit_jupyter'

In [None]:
st.write("Hello, world!")

2025-09-28 13:36:42.342 
  command:

    streamlit run c:\Users\lucas\OneDrive\Desktop\Desafio_IHC\.venv\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
