Computes the necessary files for the three Datawrapper data visualization pieces.

In [318]:
from babel.dates import format_date
from credentials import access_token # Create a credentials.py file with your API key in a 'key' variable
from datetime import datetime, timedelta
from datawrapper import Datawrapper
import pandas as  pd

In [319]:
dw = Datawrapper(access_token=access_token)

In [320]:
dw.account_info()

This method is deprecated and will be removed in a future version. Use get_account_info instead.


{'id': 539155,
 'email': 'rodrigo.menegat-schuinski@dw.com',
 'name': None,
 'role': 'editor',
 'language': 'en-US',
 'presenceColor': 'color-dw-presence-40',
 'avatar': None,
 'customAvatar': None,
 'teams': [{'id': 'dw',
   'name': 'Deutsche Welle',
   'url': '/v3/teams/dw',
   'active': True}],
 'chartCount': 212,
 'url': '/v3/users/539155',
 'activeTeam': 'dw',
 'entitlements': {}}

#### Reading data

In [321]:
df = pd.read_csv("../output/city-recent-daily-means.csv")

In [322]:
df = df.set_index(pd.to_datetime(df.date))

In [323]:
df = df.rename(columns={'mean':'daily_mean'})

#### Detects the months needed and merges

In [324]:
# Adds the month
df['month'] = df.index.month

In [325]:
# Gets the latest month
latest_month = df['month'].idxmax().month

In [326]:
# Keeps only the latest month
df = df[df['month']==latest_month]

In [327]:
# Reads the associated data
month_history = pd.read_csv(f"../output/city-monthly-means/City_Month_{latest_month}_Mean")
month_history = month_history.rename(columns={"mean":"historical_mean"})

In [328]:
# Adds historical mean
df = df.merge(month_history)

In [329]:
# Converts from kg/m3 to microgram/m3
df['daily_mean'], df['historical_mean'] = df['daily_mean'] * 1e9, df['historical_mean'] * 1e9

In [330]:
# Gets the variation from the mean
df['anomaly'] = df['daily_mean'] / df['historical_mean']

In [331]:
df.head()

Unnamed: 0,CTR_MN_ISO,CTR_MN_NM,ID_HDC_G0,P15,UC_NM_LST,UC_NM_MN,date,daily_mean,month,lat,lon,historical_mean,anomaly
0,ARG,Argentina,962,441985.894928,San Juan,San Juan,2024-09-01,5.152734,9,-31.536676,-68.542058,5.142087,1.002071
1,ARG,Argentina,962,441985.894928,San Juan,San Juan,2024-09-02,17.472118,9,-31.536676,-68.542058,5.142087,3.397865
2,ARG,Argentina,962,441985.894928,San Juan,San Juan,2024-09-03,3.206359,9,-31.536676,-68.542058,5.142087,0.623552
3,ARG,Argentina,962,441985.894928,San Juan,San Juan,2024-09-04,5.140476,9,-31.536676,-68.542058,5.142087,0.999687
4,ARG,Argentina,962,441985.894928,San Juan,San Juan,2024-09-05,7.467998,9,-31.536676,-68.542058,5.142087,1.452328


#### Selecting cities

In [332]:
# Brazilian capitals, with the correct correspondence
br_cities =  {
    'Porto Alegre': 'Porto Alegre (RS)',
    'Florianopolis': 'Florianópolis (SC)',
    'Curitiba': 'Curitiba (PR)',
    'Sao Paulo': 'São Paulo (SP)',
    'Belo Horizonte': 'Belo Horizonte (MG)',
    'Rio de Janeiro': 'Rio de Janeiro (RJ)',
    'Vila Velha': 'Vitória (ES)', # Dado leva o nome de Vila Velha, mas é da região metropolitana de Vitória
    'Cuiaba': 'Cuiabá (MT)',
    'Campo Grande': 'Campo Grande (MS)',
    'Brasilia': 'Brasília (DF)',
    'Goiania': 'Goiânia (GO)',
    'Palmas': 'Palmas (TO)',
    'Manaus': 'Manaus (AM)',
    'Belem': 'Belém (PA)',
    'Rio Branco': 'Rio Branco (AC)',
    'Boa Vista': 'Boa Vista (RR)',
    'Macapa': 'Macapá (AP)',
    'Porto Velho': 'Porto Velho (RO)',
    'Sao Luis': 'São Luís (MA)',
    'Teresina': 'Teresina (PI)',
    'Natal': 'Natal (RN)',
    'Fortaleza': 'Fortaleza (CE)',
    'Recife': 'Recife (PE)',
    'Joao Pessoa': 'João Pessoa (PB)',
    'Aracaju': 'Aracaju (SE)',
    'Maceio': 'Maceió (AL)',
    'Salvador': 'Salvador (BA)'
}

In [333]:
# The Brazilian charts will derive from here
df_br = df[df.UC_NM_MN.isin(br_cities.keys())]
df_br['UC_NM_MN'] = df_br['UC_NM_MN'].replace(br_cities)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_br['UC_NM_MN'] = df_br['UC_NM_MN'].replace(br_cities)


In [334]:
# The South American charts will derive from here
# TO DO

In [335]:
df_br['datetime'] = pd.to_datetime(df.date)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_br['datetime'] = pd.to_datetime(df.date)


#### Multiline chart

In [336]:
# This chart was created manually beforehand, on the id specified.
# We can simply update the data using the API.
chart_id = 'eIXW7'

In [337]:
# Datawrapper format
dw_multiline_br = df_br.pivot(index='date', columns=['UC_NM_MN'], values='anomaly')

In [338]:
# Show in percentage points of total
dw_multiline_br = dw_multiline_br * 100

In [339]:
dw_multiline_br['baseline'] = 100

In [340]:
# Date should be its own column
dw_multiline_br = dw_multiline_br.reset_index()

In [341]:
# Adds the new fetched data
dw.add_data(chart_id, dw_multiline_br)

True

In [342]:
dw.update_metadata(chart_id, {
    "annotate": {"notes":
                f"Dados atualizados em {datetime.now().strftime('%d/%m/%Y')}"}
})

This method is deprecated and will be removed in a future version. Use update_chart instead.


{'publicId': 'eIXW7',
 'language': 'pt-BR',
 'theme': 'deutsche-welle-v2',
 'id': 'eIXW7',
 'type': 'multiple-lines',
 'title': 'Capitais brasileiras atingiram níveis de poluição de mais de 400% da média histórica',
 'lastEditStep': 5,
 'publishedAt': '2024-09-16T20:59:17.000Z',
 'publicUrl': 'https://datawrapper.dwcdn.net/eIXW7/33/',
 'publicVersion': 33,
 'deleted': False,
 'deletedAt': None,
 'forkable': False,
 'isFork': False,
 'metadata': {'data': {'changes': [],
   'transpose': False,
   'vertical-header': True,
   'horizontal-header': True,
   'external-data': 'https://raw.githubusercontent.com/dw-data/latam-pollution-levels/main/output/datawrapper/datawrapper-multiline-br.csv',
   'upload-method': 'copy',
   'use-datawrapper-cdn': True},
  'describe': {'source-name': 'Análise da DW com dados do Copernicus Atmosphere Monitoring Service (CAMS)',
   'source-url': '',
   'intro': 'Concentração média diária de partículas PM 2.5, em comparação com a média observada no mês de Setembr

In [343]:
dw.publish_chart(chart_id, display=True)

In [377]:
dw.publish_chart(chart_id, display=True)

In [344]:
dw_multiline_br.to_csv("../output/datawrapper/datawrapper-multiline-br.csv", index=False)

#### Pollution stripes

In [347]:
# Redefines chart id
chart_id = 'KiCH4'

In [348]:
# Formats data in Datawrapper format
dw_stripes_br = df_br.pivot(index='UC_NM_MN', columns='date', values='daily_mean').reset_index()

In [349]:
# Adds placeholder columns for sorting the most polluted first
dw_stripes_br['weekly_mean'] = dw_stripes_br.loc[1:, dw_stripes_br.columns[2] : dw_stripes_br.columns[-1]].mean(axis=1)

In [350]:
dw_stripes_br = dw_stripes_br.sort_values(by='weekly_mean', ascending=False)\
    .drop(columns='weekly_mean')\
    .reset_index(drop=True)

In [351]:
# Adds the necessary extra row for information
dw_stripes_br.loc[-1] = ""  # adding a row
dw_stripes_br.index = dw_stripes_br.index + 1  # shifting index
dw_stripes_br = dw_stripes_br.sort_index()  # sorting by index

In [352]:
# Insert new column in specific position
dw_stripes_br.insert(loc=1, column="label", value="")

In [353]:
# Adds other informative values
min_date = pd.to_datetime(dw_stripes_br.columns[2]).strftime("%d/%m")
max_date = pd.to_datetime(dw_stripes_br.columns[-1]).strftime("%d/%m")

dw_stripes_br.loc[0, 'label'] = f'{min_date} – {max_date}'
dw_stripes_br.loc[0, 'UC_NM_MN'] = 'Cidade'

In [364]:
# Tries formatting columns using the API
metadata = dw.get_chart(chart_id)

In [370]:
# The elements value columns need
column_format = {'style': {'bold': False,
   'color': False,
   'italic': False,
   'fontSize': 1,
   'underline': False,
   'background': False},
  'width': 0.05,
  'append': '',
  'format': '0,0',
  'heatmap': {'enabled': True},
  'prepend': '',
  'barColor': 0,
  'barStyle': 'normal',
  'minWidth': 1,
  'sortable': True,
  'alignment': 'auto',
  'flagStyle': '1x1',
  'showAsBar': False,
  'sparkline': {'area': False,
   'type': 'line',
   'color': 0,
   'title': '',
   'format': '0.[0]a',
   'height': 20,
   'stroke': 2,
   'dotLast': True,
   'enabled': False,
   'colorNeg': 0,
   'dotFirst': True,
   'rangeMax': '',
   'rangeMin': '',
   'labelDiff': False},
  'borderLeft': 'none',
  'fixedWidth': False,
  'barRangeMax': '',
  'barRangeMin': '',
  'borderRight': 'none',
  'compactMode': False,
  'customColor': False,
  'replaceFlags': False,
  'showOnMobile': True,
  'customColorBy': 0,
  'showOnDesktop': True,
  'customBarColor': False,
  'barNoBackground': False,
  'borderLeftColor': '#333333',
  'customColorText': {'__object': True},
  'barColorNegative': False,
  'customBarColorBy': 0,
  'alignmentVertical': 'middle',
  'customColorBackground': {'__object': True},
  'customColorBarBackground': {'__object': True}}

In [371]:
# Selects all the columns that will display data
date_columns = dw_stripes_br.columns[2:]

In [373]:
# And create a dictionary with info for them
date_columns = { date: column_format for date in date_columns}

In [375]:
# First updates the data
# Adds the new fetched data
dw.add_data(chart_id, dw_stripes_br)

True

In [376]:
# Then adds metadata
dw.update_metadata(chart_id, {
    "visualize": {
        "columns": date_columns
    }
})

This method is deprecated and will be removed in a future version. Use update_chart instead.


{'publicId': 'KiCH4',
 'language': 'pt-BR',
 'theme': 'deutsche-welle-v2',
 'id': 'KiCH4',
 'type': 'tables',
 'title': 'Rio Branco e Porto Velho tiveram maior concentração de poluentes',
 'lastEditStep': 5,
 'publishedAt': '2024-09-12T20:30:00.000Z',
 'publicUrl': 'https://datawrapper.dwcdn.net/KiCH4/5/',
 'publicVersion': 5,
 'deleted': False,
 'deletedAt': None,
 'forkable': False,
 'isFork': False,
 'metadata': {'data': {'changes': [],
   'transpose': False,
   'vertical-header': True,
   'horizontal-header': True,
   'external-data': 'https://raw.githubusercontent.com/dw-data/latam-pollution-levels/main/output/datawrapper-stripes-br.csv',
   'upload-method': 'copy',
   'use-datawrapper-cdn': True},
  'describe': {'source-name': 'Análise da DW com dados do Copernicus Atmosphere Monitoring Service (CAMS)',
   'source-url': '',
   'intro': 'Média diária de partículas PM 2.5 por metro cúbico nas capitais estaduais',
   'byline': '',
   'aria-description': '',
   'number-format': '-',


In [None]:
dw_stripes_br.to_csv("../output/datawrapper/datawrapper-stripes-br.csv", index=False)

#### Map

In [None]:
# Geet only the desired columns
dw_map_br = df_br[['UC_NM_MN', 'CTR_MN_NM', 'daily_mean', 'date', 'datetime', 'lat', 'lon']]

In [None]:
# Keeps only the last seven days
max_date = dw_map_br.datetime.max()
week_ago = max_date - timedelta(days=7)

dw_map_br = dw_map_br[(dw_map_br.datetime <= max_date) & (dw_map_br.datetime > week_ago)]

In [None]:
dw_map_br = dw_map_br.groupby(['UC_NM_MN', 'lat', 'lon'])['daily_mean'].mean().reset_index()

In [None]:
start, end = week_ago.strftime('%d/%m'), max_date.strftime('%d/%m')
time_interval = f"Média semanal de partículas PM 2.5 por metro cúbico entre {start} e {end}"
time_interval

In [None]:
dw_map_br = dw_map_br.rename(columns={'daily_mean':time_interval})

In [None]:
dw_map_br.head()

In [None]:
dw_map_br.to_csv("../output/datawrapper/datwrapper-map-br.csv", index=False)