# Imports

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import geopandas
from pathlib import Path

# Constants

In [None]:
DATA_FILEPATH = Path.cwd() / '..' / 'data_concelhos.csv'
POPULATION_FILEPATH = Path.cwd() / '..' / 'extra' / 'populacional' / 'PORDATA_Estimativas-a-31-12_concelhos.csv'

# Reading the data

In [None]:
data = pd.read_csv(DATA_FILEPATH, parse_dates=[0], index_col=[0], infer_datetime_format=True)

# Plotting a chart

In [None]:
data[['LISBOA', 'PORTO']].plot(kind='line', title='Cases in Lisboa and Porto', linestyle='--', marker='o', rot=45, fontsize=8)

# Merge with Population

In [None]:
population = pd.read_csv(POPULATION_FILEPATH)

# rename to Concelho and upper case values
population['Concelho'] = population['Anos'].str.upper()

data = pd.read_csv(DATA_FILEPATH)
# convert date string to proper date
data['data'] = pd.to_datetime(data['data'], dayfirst=True)

In [None]:
# Only data since it's weekly
df = data[data['data'] > '2020-07-12']

df = df.melt(id_vars=['data'], var_name='Concelho', value_name='Casos')
df = df.merge(population[['Concelho', '2019']], how='left', left_on='Concelho', right_on='Concelho')
df.fillna(0, inplace=True)
df['Racio'] = round(df['Casos'] * 100*1000 / df['2019'], 1)
df = df.pivot_table(values='Racio', index='data', columns='Concelho')

df = df.reset_index(level=0)
df = df.set_index('data')
df = df.diff(2)  # 14 days

In [None]:
df[['LISBOA', 'PORTO', 'PAÇOS DE FERREIRA']].plot(kind='line', title='Cases in Lisboa, Porto, and Paços de Ferreira, 14 days, per 100k', linestyle='--', marker='o', rot=45, fontsize=8)

# Maps

In [None]:
PATH_MAP = "../extra/mapas/concelhos/"
df_map = geopandas.read_file(f"{PATH_MAP}/concelhos.shp")

# Concelho as upper case, for matching
df_map['Concelho'] = df_map['NAME_2'].str.upper()

df_map['Concelho'][ df_map['Concelho'] == 'PONTE DE SÔR' ] = 'PONTE DE SOR'
df_map['Concelho'][ df_map['Concelho'] == 'PRAIA DA VITÓRIA' ] = 'VILA DA PRAIA DA VITÓRIA'
df_map['Concelho'][ (df_map['Concelho'] == 'LAGOA') & (df_map['NAME_1'] == 'Faro') ] = 'LAGOA (FARO)'
df_map['Concelho'][ (df_map['Concelho'] == 'CALHETA') & (df_map['NAME_1'] == 'Azores') ] = 'CALHETA (AÇORES)'

df_map_orig = df_map


In [None]:
# Validate concelhos
# TODO: Concelhos missing from map: 2: {'TAVIRA', 'GUIMARÃES'}

concelhos1 = list(df[-1:].melt(id_vars=[])['Concelho'])
concelhos2 = list(df_map['Concelho'])
common = list(set(concelhos1) & set(concelhos2))

diff1 = set(concelhos1).difference(common)
if len(diff1):
    print(f"Concelhos missing from map: {len(diff1)}: {diff1}")

diff2 = set(concelhos2).difference(common)
if len(diff2):
    print(f"Concelhos missing from data.csv: {len(diff2)}: {diff2}")


In [None]:
categories = df[-1:].melt(id_vars=[])
# split into four categories
# 0 = Nan
# 1 = 0
# 2 = ]0,120[
# 3 = [120, 240[
# 4 = [240-
categories['category'] = 1 + np.ceil(categories.value.div(120))
categories['category'] = categories['category'].clip(0,4)


In [None]:
# merge map data with concelhos category
df_map = df_map_orig.merge(categories[['Concelho', 'category']], how='left', left_on='Concelho', right_on='Concelho')

# Skip islands to show only continent
df_map = df_map[~df_map.NAME_1.isin(['Azores', 'Madeira'])]


In [None]:
# Finally, let's generate a choropleth map of a GeoDataFrame with Matplotlib.
fig, ax = plt.subplots(figsize=(15,6))
ax.set_title(f"Portugal", loc="left", pad=12.0)
ax.axis('off')
df_map.plot(
    column='category', 
    cmap='Oranges', 
    ax=ax,
    legend=True,
    linewidth=0.5,
    edgecolor='0.8'
)

fig.tight_layout()
# plt.savefig('map.png', dpi=300, bbox_inches='tight')
plt.show()