Computes the necessary files for the three Datawrapper data visualization pieces.

In [365]:
from datetime import datetime, timedelta
from datawrapper import Datawrapper
import os
import pandas as  pd
from pathlib import Path

In [366]:
# Resolves enviornment valuables locally.
# If on GH Actions, this won't run.
from dotenv import load_dotenv

env_path = Path('.env')
if env_path.exists():
    load_dotenv()

In [367]:
# Fetch the environment variables
EARTH_ENGINE_AUTH = os.getenv("EARTH_ENGINE_AUTH")
DATAWRAPPER_AUTH = os.getenv("DATAWRAPPER_AUTH")

In [368]:
dw = Datawrapper(access_token=DATAWRAPPER_AUTH)

#### Reading data

In [369]:
df = pd.read_csv("../output/city-recent-daily-means.csv")

In [370]:
df = df.set_index(pd.to_datetime(df.date))

In [371]:
df = df.rename(columns={'mean':'daily_mean'})

#### Detects the months needed and merges

In [372]:
# Adds the month
df['month'] = df.index.month

In [373]:
# Gets the latest month
latest_month = df['month'].idxmax().month

In [374]:
# Keeps only the latest month
df = df[df['month']==latest_month]

In [375]:
# Reads the associated data
month_history = pd.read_csv(f"../output/city-monthly-means/City_Month_{latest_month}_Mean")
month_history = month_history.rename(columns={"mean":"historical_mean"})

In [376]:
# Adds historical mean
df = df.merge(month_history)

In [377]:
# Converts from kg/m3 to microgram/m3
df['daily_mean'], df['historical_mean'] = df['daily_mean'] * 1e9, df['historical_mean'] * 1e9

In [378]:
# Gets the variation from the mean
df['anomaly'] = df['daily_mean'] / df['historical_mean']

In [379]:
df.head()

Unnamed: 0,CTR_MN_ISO,CTR_MN_NM,ID_HDC_G0,P15,UC_NM_LST,UC_NM_MN,date,daily_mean,month,lat,lon,historical_mean,anomaly
0,ARG,Argentina,962,441985.894928,San Juan,San Juan,2024-09-01,5.152734,9,-31.536676,-68.542058,5.142087,1.002071
1,ARG,Argentina,962,441985.894928,San Juan,San Juan,2024-09-02,17.472118,9,-31.536676,-68.542058,5.142087,3.397865
2,ARG,Argentina,962,441985.894928,San Juan,San Juan,2024-09-03,3.206359,9,-31.536676,-68.542058,5.142087,0.623552
3,ARG,Argentina,962,441985.894928,San Juan,San Juan,2024-09-04,5.140476,9,-31.536676,-68.542058,5.142087,0.999687
4,ARG,Argentina,962,441985.894928,San Juan,San Juan,2024-09-05,7.467998,9,-31.536676,-68.542058,5.142087,1.452328


#### Selecting cities

In [380]:
# Brazilian capitals, with the correct correspondence
br_cities =  {
    'Porto Alegre': 'Porto Alegre (RS)',
    'Florianopolis': 'Florianópolis (SC)',
    'Curitiba': 'Curitiba (PR)',
    'Sao Paulo': 'São Paulo (SP)',
    'Belo Horizonte': 'Belo Horizonte (MG)',
    'Rio de Janeiro': 'Rio de Janeiro (RJ)',
    'Vila Velha': 'Vitória (ES)', # Dado leva o nome de Vila Velha, mas é da região metropolitana de Vitória
    'Cuiaba': 'Cuiabá (MT)',
    'Campo Grande': 'Campo Grande (MS)',
    'Brasilia': 'Brasília (DF)',
    'Goiania': 'Goiânia (GO)',
    'Palmas': 'Palmas (TO)',
    'Manaus': 'Manaus (AM)',
    'Belem': 'Belém (PA)',
    'Rio Branco': 'Rio Branco (AC)',
    'Boa Vista': 'Boa Vista (RR)',
    'Macapa': 'Macapá (AP)',
    'Porto Velho': 'Porto Velho (RO)',
    'Sao Luis': 'São Luís (MA)',
    'Teresina': 'Teresina (PI)',
    'Natal': 'Natal (RN)',
    'Fortaleza': 'Fortaleza (CE)',
    'Recife': 'Recife (PE)',
    'Joao Pessoa': 'João Pessoa (PB)',
    'Aracaju': 'Aracaju (SE)',
    'Maceio': 'Maceió (AL)',
    'Salvador': 'Salvador (BA)'
}

In [381]:
# The Brazilian charts will derive from here
df_br = df[df.UC_NM_MN.isin(br_cities.keys())]
df_br['UC_NM_MN'] = df_br['UC_NM_MN'].replace(br_cities)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_br['UC_NM_MN'] = df_br['UC_NM_MN'].replace(br_cities)


In [382]:
df_br['datetime'] = pd.to_datetime(df.date)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_br['datetime'] = pd.to_datetime(df.date)


#### Multiline chart

In [383]:
# This chart was created manually beforehand, on the id specified.
# We can simply update the data using the API.
chart_id = 'eIXW7'

In [384]:
# Datawrapper format
dw_multiline_br = df_br.pivot(index='date', columns=['UC_NM_MN'], values='anomaly')

In [385]:
# Show in percentage points of total
dw_multiline_br = dw_multiline_br * 100

In [386]:
dw_multiline_br['baseline'] = 100

In [387]:
# Date should be its own column
dw_multiline_br = dw_multiline_br.reset_index()

In [388]:
# Adds the new fetched data
dw.add_data(chart_id, dw_multiline_br)

True

In [389]:
dw.update_metadata(chart_id, {
    "annotate": {"notes":
                f"Dados atualizados em {datetime.now().strftime('%d/%m/%Y')}"}
})

This method is deprecated and will be removed in a future version. Use update_chart instead.


{'publicId': 'eIXW7',
 'language': 'pt-BR',
 'theme': 'deutsche-welle-v2',
 'id': 'eIXW7',
 'type': 'multiple-lines',
 'title': 'Capitais brasileiras atingiram níveis de poluição de mais de 400% da média histórica',
 'lastEditStep': 5,
 'publishedAt': '2024-09-17T15:27:24.000Z',
 'publicUrl': 'https://datawrapper.dwcdn.net/eIXW7/42/',
 'publicVersion': 42,
 'deleted': False,
 'deletedAt': None,
 'forkable': False,
 'isFork': False,
 'metadata': {'data': {'changes': [],
   'transpose': False,
   'vertical-header': True,
   'horizontal-header': True,
   'external-data': 'https://raw.githubusercontent.com/dw-data/latam-pollution-levels/main/output/datawrapper/datawrapper-multiline-br.csv',
   'upload-method': 'copy',
   'use-datawrapper-cdn': True},
  'describe': {'source-name': 'Análise da DW com dados do Copernicus Atmosphere Monitoring Service (CAMS)',
   'source-url': '',
   'intro': 'Concentração média diária de partículas PM 2.5, em comparação com a média observada no mês de Setembr

In [391]:
dw.publish_chart(chart_id, display=True)

In [392]:
dw_multiline_br.to_csv("../output/datawrapper/datawrapper-multiline-br.csv", index=False)

#### Pollution stripes

In [393]:
# Redefines chart id
chart_id = 'KiCH4'

In [394]:
# Formats data in Datawrapper format
dw_stripes_br = df_br.pivot(index='UC_NM_MN', columns='date', values='daily_mean').reset_index()

In [395]:
df_br.date.max()

'2024-09-16'

In [396]:
# Adds placeholder columns for sorting the most polluted first
dw_stripes_br['weekly_mean'] = dw_stripes_br.loc[1:, dw_stripes_br.columns[2] : dw_stripes_br.columns[-1]].mean(axis=1)

In [397]:
dw_stripes_br = dw_stripes_br.sort_values(by='weekly_mean', ascending=False)\
    .drop(columns='weekly_mean')\
    .reset_index(drop=True)

In [398]:
# Adds the necessary extra row for information
dw_stripes_br.loc[-1] = ""  # adding a row
dw_stripes_br.index = dw_stripes_br.index + 1  # shifting index
dw_stripes_br = dw_stripes_br.sort_index()  # sorting by index

In [399]:
# Insert new column in specific position
dw_stripes_br.insert(loc=1, column="label", value="")

In [400]:
# Most recent date
max_date = pd.to_datetime(dw_stripes_br.columns[-1])

# Minimum date
min_date = pd.to_datetime(dw_stripes_br.columns[2])

In [401]:
# Format to string
min_date_str = min_date.strftime("%d/%m")
max_date_str = max_date.strftime("%d/%m")

In [402]:
# Adds other informative values
dw_stripes_br.loc[0, 'label'] = f'{min_date_str} – {max_date_str}'
dw_stripes_br.loc[0, 'UC_NM_MN'] = 'Cidade'

In [403]:
# Tries formatting columns using the API
metadata = dw.get_chart(chart_id)

In [404]:
# The elements value columns need
column_format = {'style': {'bold': False,
   'color': False,
   'italic': False,
   'fontSize': 1,
   'underline': False,
   'background': False},
  'width': 0.05,
  'append': '',
  'format': '0,0',
  'heatmap': {'enabled': True},
  'prepend': '',
  'barColor': 0,
  'barStyle': 'normal',
  'minWidth': 1,
  'sortable': True,
  'alignment': 'auto',
  'flagStyle': '1x1',
  'showAsBar': False,
  'sparkline': {'area': False,
   'type': 'line',
   'color': 0,
   'title': '',
   'format': '0.[0]a',
   'height': 20,
   'stroke': 2,
   'dotLast': True,
   'enabled': False,
   'colorNeg': 0,
   'dotFirst': True,
   'rangeMax': '',
   'rangeMin': '',
   'labelDiff': False},
  'borderLeft': 'none',
  'fixedWidth': False,
  'barRangeMax': '',
  'barRangeMin': '',
  'borderRight': 'none',
  'compactMode': False,
  'customColor': False,
  'replaceFlags': False,
  'showOnMobile': True,
  'customColorBy': 0,
  'showOnDesktop': True,
  'customBarColor': False,
  'barNoBackground': False,
  'borderLeftColor': '#333333',
  'customColorText': {'__object': True},
  'barColorNegative': False,
  'customBarColorBy': 0,
  'alignmentVertical': 'middle',
  'customColorBackground': {'__object': True},
  'customColorBarBackground': {'__object': True}}

In [405]:
# Selects all the columns that will display data
date_columns = dw_stripes_br.columns[2:]

In [406]:
# And create a dictionary with info for them
date_columns = { date: column_format for date in date_columns}

In [407]:
# Adds the new fetched data
dw.add_data(chart_id, dw_stripes_br)

True

In [408]:
# Then adds metadata
dw.update_metadata(chart_id, {
    "visualize": {
        "columns": date_columns
    }
})

This method is deprecated and will be removed in a future version. Use update_chart instead.


{'publicId': 'KiCH4',
 'language': 'pt-BR',
 'theme': 'deutsche-welle-v2',
 'id': 'KiCH4',
 'type': 'tables',
 'title': 'Rio Branco e Porto Velho tiveram maior concentração de poluentes',
 'lastEditStep': 5,
 'publishedAt': '2024-09-17T15:27:52.000Z',
 'publicUrl': 'https://datawrapper.dwcdn.net/KiCH4/11/',
 'publicVersion': 11,
 'deleted': False,
 'deletedAt': None,
 'forkable': False,
 'isFork': False,
 'metadata': {'data': {'changes': [],
   'transpose': False,
   'vertical-header': True,
   'horizontal-header': True,
   'external-data': 'https://raw.githubusercontent.com/dw-data/latam-pollution-levels/main/output/datawrapper-stripes-br.csv',
   'upload-method': 'copy',
   'use-datawrapper-cdn': True},
  'describe': {'source-name': 'Análise da DW com dados do Copernicus Atmosphere Monitoring Service (CAMS)',
   'source-url': '',
   'intro': 'Média diária de partículas PM 2.5 por metro cúbico nas capitais estaduais',
   'byline': '',
   'aria-description': '',
   'number-format': '-'

In [409]:
dw.update_metadata(chart_id, {
    "annotate": {"notes":
                f"Dados atualizados em {datetime.now().strftime('%d/%m/%Y')}"}
})

This method is deprecated and will be removed in a future version. Use update_chart instead.


{'publicId': 'KiCH4',
 'language': 'pt-BR',
 'theme': 'deutsche-welle-v2',
 'id': 'KiCH4',
 'type': 'tables',
 'title': 'Rio Branco e Porto Velho tiveram maior concentração de poluentes',
 'lastEditStep': 5,
 'publishedAt': '2024-09-17T15:27:52.000Z',
 'publicUrl': 'https://datawrapper.dwcdn.net/KiCH4/11/',
 'publicVersion': 11,
 'deleted': False,
 'deletedAt': None,
 'forkable': False,
 'isFork': False,
 'metadata': {'data': {'changes': [],
   'transpose': False,
   'vertical-header': True,
   'horizontal-header': True,
   'external-data': 'https://raw.githubusercontent.com/dw-data/latam-pollution-levels/main/output/datawrapper-stripes-br.csv',
   'upload-method': 'copy',
   'use-datawrapper-cdn': True},
  'describe': {'source-name': 'Análise da DW com dados do Copernicus Atmosphere Monitoring Service (CAMS)',
   'source-url': '',
   'intro': 'Média diária de partículas PM 2.5 por metro cúbico nas capitais estaduais',
   'byline': '',
   'aria-description': '',
   'number-format': '-'

In [410]:
dw.publish_chart(chart_id, display=True)

In [411]:
dw_stripes_br.to_csv("../output/datawrapper/datawrapper-stripes-br.csv", index=False)

#### Map

In [412]:
# Chart ID
chart_id = 'bxlJm'
metadata = dw.get_chart(chart_id)

In [413]:
# Geet only the desired columns
dw_map_br = df_br[['UC_NM_MN', 'CTR_MN_NM', 'daily_mean', 'date', 'datetime', 'lat', 'lon']]

In [414]:
# Keeps only the last seven days
max_date = dw_map_br.datetime.max()
week_ago = max_date - timedelta(days=7)

dw_map_br = dw_map_br[(dw_map_br.datetime <= max_date) & (dw_map_br.datetime > week_ago)]

In [415]:
dw_map_br = dw_map_br.groupby(['UC_NM_MN', 'lat', 'lon'])['daily_mean'].mean().reset_index()

In [416]:
start, end = week_ago.strftime('%d/%m'), max_date.strftime('%d/%m')
time_interval_legend = f"Média semanal de partículas PM 2.5 por metro cúbico entre {start} e {end}"
time_interval_legend

'Média semanal de partículas PM 2.5 por metro cúbico entre 09/09 e 16/09'

In [417]:
dw_map_br = dw_map_br.rename(columns={'daily_mean':'weekly_mean'})

In [418]:
dw_map_br

Unnamed: 0,UC_NM_MN,lat,lon,weekly_mean
0,Aracaju (SE),-10.921559,-37.070369,8.116293
1,Belo Horizonte (MG),-19.900457,-44.008427,9.719217
2,Belém (PA),-1.373024,-48.431316,7.466232
3,Boa Vista (RR),2.814118,-60.713712,12.557356
4,Brasília (DF),-15.768424,-47.881512,5.644465
5,Campo Grande (MS),-20.481465,-54.618454,48.464004
6,Cuiabá (MT),-15.619976,-56.091379,54.913855
7,Curitiba (PR),-25.442783,-49.261484,23.547951
8,Florianópolis (SC),-27.595206,-48.617162,22.57131
9,Fortaleza (CE),-3.785749,-38.560313,8.492437


In [419]:
# Adds the new fetched data
dw.add_data(chart_id, dw_map_br)

True

In [420]:
dw.update_metadata(chart_id, {
    'describe': {
        'intro': (
            'Média da concentração de PM 2.5 nas capitais brasileiras em um intervalo entre ' + 
            start + ' e ' + end
        )
    }
})

This method is deprecated and will be removed in a future version. Use update_chart instead.


{'publicId': 'bxlJm',
 'language': 'pt-BR',
 'theme': 'deutsche-welle-v2',
 'id': 'bxlJm',
 'type': 'd3-maps-symbols',
 'title': 'Nível de poluição nas capitais brasileiras na última semana',
 'lastEditStep': 5,
 'publishedAt': '2024-09-17T15:28:03.000Z',
 'publicUrl': 'https://datawrapper.dwcdn.net/bxlJm/11/',
 'publicVersion': 11,
 'deleted': False,
 'deletedAt': None,
 'forkable': False,
 'isFork': False,
 'metadata': {'data': {'changes': [],
   'transpose': False,
   'vertical-header': True,
   'horizontal-header': True,
   'auto-geocode': False,
   'column-order': [0, 2, 3, 1],
   'column-format': {'lat': {'type': 'number'},
    'lon': {'type': 'number'},
    'UC_NM_MN': {'type': 'text'}},
   'upload-method': 'copy'},
  'describe': {'source-name': 'Análise da DW com dados do Copernicus Atmosphere Monitoring Service (CAMS) ',
   'source-url': '',
   'intro': 'Média da concentração de PM 2.5 nas capitais brasileiras em um intervalo entre 09/09 e 16/09',
   'byline': '',
   'aria-des

In [421]:
dw.update_metadata(chart_id, {
    "annotate": {"notes":
                f"Dados atualizados em {datetime.now().strftime('%d/%m/%Y')}"}
})

This method is deprecated and will be removed in a future version. Use update_chart instead.


{'publicId': 'bxlJm',
 'language': 'pt-BR',
 'theme': 'deutsche-welle-v2',
 'id': 'bxlJm',
 'type': 'd3-maps-symbols',
 'title': 'Nível de poluição nas capitais brasileiras na última semana',
 'lastEditStep': 5,
 'publishedAt': '2024-09-17T15:28:03.000Z',
 'publicUrl': 'https://datawrapper.dwcdn.net/bxlJm/11/',
 'publicVersion': 11,
 'deleted': False,
 'deletedAt': None,
 'forkable': False,
 'isFork': False,
 'metadata': {'data': {'changes': [],
   'transpose': False,
   'vertical-header': True,
   'horizontal-header': True,
   'auto-geocode': False,
   'column-order': [0, 2, 3, 1],
   'column-format': {'lat': {'type': 'number'},
    'lon': {'type': 'number'},
    'UC_NM_MN': {'type': 'text'}},
   'upload-method': 'copy'},
  'describe': {'source-name': 'Análise da DW com dados do Copernicus Atmosphere Monitoring Service (CAMS) ',
   'source-url': '',
   'intro': 'Média da concentração de PM 2.5 nas capitais brasileiras em um intervalo entre 09/09 e 16/09',
   'byline': '',
   'aria-des

In [423]:
dw.update_metadata(chart_id, metadata={ 
        'visualize': {
            'legends': {
                'color': {
                    'title': "Concentração de partículas PM 2.5 por metro cúbico" }}}})

This method is deprecated and will be removed in a future version. Use update_chart instead.


{'publicId': 'bxlJm',
 'language': 'pt-BR',
 'theme': 'deutsche-welle-v2',
 'id': 'bxlJm',
 'type': 'd3-maps-symbols',
 'title': 'Nível de poluição nas capitais brasileiras na última semana',
 'lastEditStep': 5,
 'publishedAt': '2024-09-17T15:28:03.000Z',
 'publicUrl': 'https://datawrapper.dwcdn.net/bxlJm/11/',
 'publicVersion': 11,
 'deleted': False,
 'deletedAt': None,
 'forkable': False,
 'isFork': False,
 'metadata': {'data': {'changes': [],
   'transpose': False,
   'vertical-header': True,
   'horizontal-header': True,
   'auto-geocode': False,
   'column-order': [0, 2, 3, 1],
   'column-format': {'lat': {'type': 'number'},
    'lon': {'type': 'number'},
    'UC_NM_MN': {'type': 'text'}},
   'upload-method': 'copy'},
  'describe': {'source-name': 'Análise da DW com dados do Copernicus Atmosphere Monitoring Service (CAMS) ',
   'source-url': '',
   'intro': 'Média da concentração de PM 2.5 nas capitais brasileiras em um intervalo entre 09/09 e 16/09',
   'byline': '',
   'aria-des

In [424]:
tooltip_data = {'body': (
    'Média de {{ ROUND(weekly_mean) }} microgramas de partículas PM 2.5 por metro cúbico entre ' + start + ' e ' + end
    ),
 'title': '{{ uc_nm_mn }}',
 'sticky': True,
 'enabled': True}

dw.update_metadata(chart_id, metadata={
    'visualize': {'tooltip': tooltip_data }
})

This method is deprecated and will be removed in a future version. Use update_chart instead.


{'publicId': 'bxlJm',
 'language': 'pt-BR',
 'theme': 'deutsche-welle-v2',
 'id': 'bxlJm',
 'type': 'd3-maps-symbols',
 'title': 'Nível de poluição nas capitais brasileiras na última semana',
 'lastEditStep': 5,
 'publishedAt': '2024-09-17T15:28:03.000Z',
 'publicUrl': 'https://datawrapper.dwcdn.net/bxlJm/11/',
 'publicVersion': 11,
 'deleted': False,
 'deletedAt': None,
 'forkable': False,
 'isFork': False,
 'metadata': {'data': {'changes': [],
   'transpose': False,
   'vertical-header': True,
   'horizontal-header': True,
   'auto-geocode': False,
   'column-order': [0, 2, 3, 1],
   'column-format': {'lat': {'type': 'number'},
    'lon': {'type': 'number'},
    'UC_NM_MN': {'type': 'text'}},
   'upload-method': 'copy'},
  'describe': {'source-name': 'Análise da DW com dados do Copernicus Atmosphere Monitoring Service (CAMS) ',
   'source-url': '',
   'intro': 'Média da concentração de PM 2.5 nas capitais brasileiras em um intervalo entre 09/09 e 16/09',
   'byline': '',
   'aria-des

In [425]:
metadata['metadata']['visualize']['tooltip']

{'body': 'Média de {{ ROUND(weekly_mean) }} microgramas de partículas PM 2.5 por metro cúbico entre 09/09 e 16/09',
 'title': '{{ uc_nm_mn }}',
 'sticky': True,
 'enabled': True}

In [426]:
dw.publish_chart(chart_id, display=True)

In [427]:
dw_map_br.to_csv("../output/datawrapper/datwrapper-map-br.csv", index=False)