Computes the necessary files for the three Datawrapper data visualization pieces.

In [279]:
from babel.dates import format_date
from credentials import access_token # Create a credentials.py file with your API key in a 'key' variable
from datetime import datetime, timedelta
from datawrapper import Datawrapper
import pandas as  pd

In [280]:
dw = Datawrapper(access_token=access_token)

In [281]:
dw.account_info()

This method is deprecated and will be removed in a future version. Use get_account_info instead.


{'id': 539155,
 'email': 'rodrigo.menegat-schuinski@dw.com',
 'name': None,
 'role': 'editor',
 'language': 'en-US',
 'presenceColor': 'color-dw-presence-40',
 'avatar': None,
 'customAvatar': None,
 'teams': [{'id': 'dw',
   'name': 'Deutsche Welle',
   'url': '/v3/teams/dw',
   'active': True}],
 'chartCount': 212,
 'url': '/v3/users/539155',
 'activeTeam': 'dw',
 'entitlements': {}}

#### Reading data

In [282]:
df = pd.read_csv("../output/city-recent-daily-means.csv")

In [283]:
df = df.set_index(pd.to_datetime(df.date))

In [284]:
df = df.rename(columns={'mean':'daily_mean'})

#### Detects the months needed and merges

In [285]:
# Adds the month
df['month'] = df.index.month

In [286]:
# Gets the latest month
latest_month = df['month'].idxmax().month

In [287]:
# Keeps only the latest month
df = df[df['month']==latest_month]

In [288]:
# Reads the associated data
month_history = pd.read_csv(f"../output/city-monthly-means/City_Month_{latest_month}_Mean")
month_history = month_history.rename(columns={"mean":"historical_mean"})

In [289]:
# Adds historical mean
df = df.merge(month_history)

In [290]:
# Converts from kg/m3 to microgram/m3
df['daily_mean'], df['historical_mean'] = df['daily_mean'] * 1e9, df['historical_mean'] * 1e9

In [291]:
# Gets the variation from the mean
df['anomaly'] = df['daily_mean'] / df['historical_mean']

In [292]:
df.head()

Unnamed: 0,CTR_MN_ISO,CTR_MN_NM,ID_HDC_G0,P15,UC_NM_LST,UC_NM_MN,date,daily_mean,month,lat,lon,historical_mean,anomaly
0,ARG,Argentina,962,441985.894928,San Juan,San Juan,2024-09-01,5.152734,9,-31.536676,-68.542058,5.142087,1.002071
1,ARG,Argentina,962,441985.894928,San Juan,San Juan,2024-09-02,17.472118,9,-31.536676,-68.542058,5.142087,3.397865
2,ARG,Argentina,962,441985.894928,San Juan,San Juan,2024-09-03,3.206359,9,-31.536676,-68.542058,5.142087,0.623552
3,ARG,Argentina,962,441985.894928,San Juan,San Juan,2024-09-04,5.140476,9,-31.536676,-68.542058,5.142087,0.999687
4,ARG,Argentina,962,441985.894928,San Juan,San Juan,2024-09-05,7.467998,9,-31.536676,-68.542058,5.142087,1.452328


#### Selecting cities

In [293]:
# Brazilian capitals, with the correct correspondence
br_cities =  {
    'Porto Alegre': 'Porto Alegre (RS)',
    'Florianopolis': 'Florianópolis (SC)',
    'Curitiba': 'Curitiba (PR)',
    'Sao Paulo': 'São Paulo (SP)',
    'Belo Horizonte': 'Belo Horizonte (MG)',
    'Rio de Janeiro': 'Rio de Janeiro (RJ)',
    'Vila Velha': 'Vitória (ES)', # Dado leva o nome de Vila Velha, mas é da região metropolitana de Vitória
    'Cuiaba': 'Cuiabá (MT)',
    'Campo Grande': 'Campo Grande (MS)',
    'Brasilia': 'Brasília (DF)',
    'Goiania': 'Goiânia (GO)',
    'Palmas': 'Palmas (TO)',
    'Manaus': 'Manaus (AM)',
    'Belem': 'Belém (PA)',
    'Rio Branco': 'Rio Branco (AC)',
    'Boa Vista': 'Boa Vista (RR)',
    'Macapa': 'Macapá (AP)',
    'Porto Velho': 'Porto Velho (RO)',
    'Sao Luis': 'São Luís (MA)',
    'Teresina': 'Teresina (PI)',
    'Natal': 'Natal (RN)',
    'Fortaleza': 'Fortaleza (CE)',
    'Recife': 'Recife (PE)',
    'Joao Pessoa': 'João Pessoa (PB)',
    'Aracaju': 'Aracaju (SE)',
    'Maceio': 'Maceió (AL)',
    'Salvador': 'Salvador (BA)'
}

In [294]:
# The Brazilian charts will derive from here
df_br = df[df.UC_NM_MN.isin(br_cities.keys())]
df_br['UC_NM_MN'] = df_br['UC_NM_MN'].replace(br_cities)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_br['UC_NM_MN'] = df_br['UC_NM_MN'].replace(br_cities)


In [295]:
# The South American charts will derive from here
# TO DO

In [296]:
df_br['datetime'] = pd.to_datetime(df.date)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_br['datetime'] = pd.to_datetime(df.date)


#### Multiline chart

In [297]:
# Datawrapper forma
dw_multiline_br = df_br.pivot(index='date', columns=['UC_NM_MN'], values='anomaly')

In [298]:
# Show in percentage points of total
dw_multiline_br = dw_multiline_br * 100

In [299]:
dw_multiline_br['baseline'] = 100

In [300]:
# Date should be its own column
dw_multiline_br = dw_multiline_br.reset_index()

In [301]:
# This chart was created manually beforehand, on the id specified.
# We can simply update the data using the API.
chart_id = 'eIXW7'
dw.add_data(chart_id, dw_multiline_br)

True

'16/09/2024'

In [309]:
dw.get_chart(chart_id)['metadata']['annotate']

{'notes': 'Dados atualizados em 16/09/2024'}

In [313]:
dw.update_metadata(chart_id, {
    "annotate": {"notes":
                f"Datos atualizados em {datetime.now().strftime("%d/%m/%Y")}"}
})

SyntaxError: f-string: unmatched '(' (3543541013.py, line 3)

In [302]:
dw.publish_chart(chart_id, display=True)

In [None]:
dw_multiline_br.to_csv("../output/datawrapper/datawrapper-multiline-br.csv", index=False)

#### Pollution stripes

In [None]:
dw_stripes_br = df_br.pivot(index='UC_NM_MN', columns='date', values='daily_mean').reset_index()

In [None]:
# Adds placeholder columns for sorting the most polluted first
dw_stripes_br['weekly_mean'] = dw_stripes_br.loc[1:, dw_stripes_br.columns[2] : dw_stripes_br.columns[-1]].mean(axis=1)

In [None]:
dw_stripes_br = dw_stripes_br.sort_values(by='weekly_mean', ascending=False)\
    .drop(columns='weekly_mean')\
    .reset_index(drop=True)

In [None]:
# Adds the necessary extra row for information
dw_stripes_br.loc[-1] = ""  # adding a row
dw_stripes_br.index = dw_stripes_br.index + 1  # shifting index
dw_stripes_br = dw_stripes_br.sort_index()  # sorting by index

In [None]:
# Insert new column in specific position
dw_stripes_br.insert(loc=1, column="label", value="")

In [None]:
# Adds other informative values
min_date = pd.to_datetime(dw_stripes_br.columns[2]).strftime("%d/%m")
max_date = pd.to_datetime(dw_stripes_br.columns[-1]).strftime("%d/%m")

dw_stripes_br.loc[0, 'label'] = f'{min_date} – {max_date}'
dw_stripes_br.loc[0, 'UC_NM_MN'] = 'Cidade'

In [None]:
dw_stripes_br.to_csv("../output/datawrapper/datawrapper-stripes-br.csv", index=False)

#### Map

In [None]:
# Geet only the desired columns
dw_map_br = df_br[['UC_NM_MN', 'CTR_MN_NM', 'daily_mean', 'date', 'datetime', 'lat', 'lon']]

In [None]:
# Keeps only the last seven days
max_date = dw_map_br.datetime.max()
week_ago = max_date - timedelta(days=7)

dw_map_br = dw_map_br[(dw_map_br.datetime <= max_date) & (dw_map_br.datetime > week_ago)]

In [None]:
dw_map_br = dw_map_br.groupby(['UC_NM_MN', 'lat', 'lon'])['daily_mean'].mean().reset_index()

In [None]:
start, end = week_ago.strftime('%d/%m'), max_date.strftime('%d/%m')
time_interval = f"Média semanal de partículas PM 2.5 por metro cúbico entre {start} e {end}"
time_interval

In [None]:
dw_map_br = dw_map_br.rename(columns={'daily_mean':time_interval})

In [None]:
dw_map_br.head()

In [None]:
dw_map_br.to_csv("../output/datawrapper/datwrapper-map-br.csv", index=False)