In [6]:
import pandas as pd
import json
from urllib.request import urlopen
import plotly.express as px

# ── 1. Load & annotate GeoJSON ───────────────────────
with urlopen('https://raw.githubusercontent.com/codeforamerica/click_that_hood/master/public/data/brazil-states.geojson') as response:
    brazil_geo = json.load(response)                                 # :contentReference[oaicite:3]{index=3}

DEP_NAMES = {11:'Rondônia',12:'Acre',13:'Amazonas',14:'Roraima',15:'Pará',
             16:'Amapá',17:'Tocantins',21:'Maranhão',22:'Piauí',23:'Ceará',
             24:'Rio Grande do Norte',25:'Paraíba',26:'Pernambuco',
             27:'Alagoas',28:'Sergipe',29:'Bahia',31:'Minas Gerais',
             32:'Espírito Santo',33:'Rio de Janeiro',35:'São Paulo',
             41:'Paraná',42:'Santa Catarina',43:'Rio Grande do Sul',
             50:'Mato Grosso do Sul',51:'Mato Grosso',52:'Goiás',53:'Distrito Federal'}
DEP_NAMES_INV = {v:k for k,v in DEP_NAMES.items()}

for feat in brazil_geo["features"]:
    name = feat["properties"]["name"]                               # :contentReference[oaicite:4]{index=4}
    feat["id"] = DEP_NAMES_INV[name]                                # numeric id ← name

# ── 2. Load & reshape dengue data ────────────────────
df = pd.read_csv("./code/raw_ensemble_predictions_Brazil.csv")
df['date']      = pd.to_datetime(df.assign(DAY=1)[['Year','Month','DAY']])
df['month_str'] = df['date'].dt.strftime('%Y-%m')
df['state_name']= df['dep_id'].map(DEP_NAMES)

df_long = df.melt(
    id_vars=['dep_id','state_name','month_str'],
    value_vars=['actual_all','prediction_all'],
    var_name='type', value_name='cases'
).replace({'type':{'actual_all':'Actual','prediction_all':'Predicted'}})

# ensure full grid
all_states = [f["id"] for f in brazil_geo["features"]]
all_months = sorted(df_long['month_str'].unique())
all_types  = df_long['type'].unique()
idx = pd.MultiIndex.from_product([all_states,all_months,all_types],
                                 names=['dep_id','month_str','type'])
df_full = (df_long.set_index(['dep_id','month_str','type'])
                 .reindex(idx, fill_value=0)
                 .reset_index())
df_full['state_name'] = df_full['dep_id'].map(DEP_NAMES)

# ── 3. Plot with featureidkey="id" ─────────────────
vmin,vmax = df_full['cases'].min(), df_full['cases'].max()
fig = px.choropleth(
    df_full,
    geojson=brazil_geo,
    locations='dep_id',
    featureidkey='id',                # match df.dep_id → geojson.feature.id :contentReference[oaicite:5]{index=5}
    color='cases',
    animation_frame='month_str',
    facet_col='type',
    projection='mercator',
    range_color=[vmin,vmax],
    color_continuous_scale='OrRd',
    hover_name='state_name',
    hover_data={'dep_id':False,'month_str':False},
    facet_col_spacing=0.02
)

fig.update_geos(
    fitbounds="locations",
    visible=False,
    showcountries=True,               # show country outlines :contentReference[oaicite:6]{index=6}
    showland=True
)
fig.update_traces(marker_line_width=0.5, marker_line_color='white')
fig.update_layout(title_text='Dengue in Brazil: Actual vs Predicted',
                  coloraxis_colorbar=dict(title='Cases'),
                  margin={'r':0,'t':50,'l':0,'b':0},
                  height=700)

fig.write_html('dengue_brazil_improved.html')


In [13]:
import pandas as pd
import json
from urllib.request import urlopen
import plotly.express as px
import math

# ── 1. Load & annotate GeoJSON ───────────────────────
with urlopen('https://raw.githubusercontent.com/codeforamerica/click_that_hood/master/public/data/brazil-states.geojson') as response:
    brazil_geo = json.load(response)

DEP_NAMES = {
    11:'Rondônia',12:'Acre',13:'Amazonas',14:'Roraima',15:'Pará',
    16:'Amapá',17:'Tocantins',21:'Maranhão',22:'Piauí',23:'Ceará',
    24:'Rio Grande do Norte',25:'Paraíba',26:'Pernambuco',
    27:'Alagoas',28:'Sergipe',29:'Bahia',31:'Minas Gerais',
    32:'Espírito Santo',33:'Rio de Janeiro',35:'São Paulo',
    41:'Paraná',42:'Santa Catarina',43:'Rio Grande do Sul',
    50:'Mato Grosso do Sul',51:'Mato Grosso',52:'Goiás',53:'Distrito Federal'
}
INV = {v:k for k,v in DEP_NAMES.items()}

for feat in brazil_geo['features']:
    feat['id'] = INV[feat['properties']['name']]

# ── 2. Load & reshape dengue data ────────────────────
df = pd.read_csv("./code/raw_ensemble_predictions_Brazil.csv")
raw_max  = max(df['actual_all'].max(), df['prediction_all'].max(), 0)
data_max = math.ceil(raw_max/100) * 100
data_min = 0


df['date']      = pd.to_datetime(df.assign(DAY=1)[['Year','Month','DAY']])
df['month_str'] = df['date'].dt.strftime('%Y-%m')
df['state_name']= df['dep_id'].map(DEP_NAMES)

# Melt to long form for Actual vs Predicted
df_long = df.melt(
    id_vars=['dep_id','state_name','month_str'],
    value_vars=['actual_all','prediction_all'],
    var_name='type', value_name='cases'
).replace({'type':{'actual_all':'Actual','prediction_all':'Predicted'}})

# Compute MAE & MAPE per state/month
wide = df.pivot_table(
    index=['dep_id','state_name','month_str'],
    values=['actual_all','prediction_all']
).reset_index()
wide['MAE']  = (wide['actual_all'] - wide['prediction_all']).abs()                # Absolute Error
wide['MAPE'] = (wide['MAE'] / wide['actual_all'].replace(0, pd.NA) * 100).fillna(0)  # % error

# Merge MAE/ MAPE back into long frame
df_full = df_long.merge(
    wide[['dep_id','month_str','MAE','MAPE']],
    on=['dep_id','month_str'], how='left'
)

# Ensure full grid so all 27 states appear
all_states = [f['id'] for f in brazil_geo['features']]
all_months = sorted(df_full['month_str'].unique())
all_types  = df_full['type'].unique()
idx = pd.MultiIndex.from_product(
    [all_states, all_months, all_types],
    names=['dep_id','month_str','type']
)
df_full = (
    df_full.set_index(['dep_id','month_str','type'])
           .reindex(idx, fill_value=0)
           .reset_index()
)
df_full['state_name'] = df_full['dep_id'].map(DEP_NAMES)

# ── 3. Plot with custom hover & colorscale ────────────
# Custom nine‑step colorscale
custom_scale = ["#115f9a", "#1984c5", "#22a7f0", "#48b5c4", "#76c68f", "#a6d75b", "#c9e52f", "#d0ee11", "#d0f400"]


vmin, vmax = df_full['cases'].min(), df_full['cases'].max()

fig = px.choropleth(
    df_full,
    geojson=brazil_geo,
    locations='dep_id',
    featureidkey='id',
    color='cases',
    animation_frame='month_str',
    facet_col='type',
    projection='mercator',
    color_continuous_scale=custom_scale,          # :contentReference[oaicite:1]{index=1}
    range_color=[data_min, data_max],
    hover_name='state_name',
    hover_data={                                  # :contentReference[oaicite:0]{index=0}
        'dep_id': False,
        'month_str': False,
        'MAE': ':.1f',     # show MAE with one decimal
        'MAPE': ':.1f'     # show MAPE with one decimal
    },
    facet_col_spacing=0.02,
    title='Dengue in Brazil: Actual vs Predicted'
)

fig.update_geos(
    scope='south america',
    fitbounds='locations',
    visible=False,
    showcountries=True,               # show country outlines :contentReference[oaicite:6]{index=6}
    showland=True
)
fig.update_traces(
    marker_line_width=0.5,
    marker_line_color='white'
)
fig.update_coloraxes(
    cmin=data_min,
    cmax=data_max,
    colorbar=dict(
        tickmode="linear",
        tick0=data_min,
        dtick=100
    )
)
fig.update_layout(
    coloraxis_colorbar=dict(title='Cases'),
    margin={'r':0,'t':50,'l':0,'b':0},
    height=700
)

fig.write_html('dengue_brazil_with_metrics.html')
