In [None]:
import re
import json
import string
import requests
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

In [None]:
def get_provincias():
    url = "https://flowmaps.life.bsc.es/api/cnig.provincias?max_results=100"
    response = requests.get(url).json()
    result = {doc['cod_prov']:doc['provincia'] for doc in response["_items"]}
    return result


def get_incoming_risk(pro_id):
    filters = {"source_layer":"cnig_provincias", "target_layer":"cnig_provincias",
               "target":pro_id, "ev":"ES.covid_cpro","total":False}
    
    url_base = url = "https://flowmaps.life.bsc.es/api/incoming_risk_history"
    url = url_base + "?where=" + json.dumps(filters)
    
    data = []
    columns = ['source', 'trips', 'date', 'source_active_cases_14', 'source_population', 'incoming_risk']
    response = requests.get(url).json()
    
    for document in response['_items']:
        data.append([document[i] for i in columns])
    
    df = pd.DataFrame(data, columns=columns)
    df = df.sort_values(by='date')
    return df
    
    
def get_k_top_mar_source(df, top_k=4):
    aux = df[['source', 'incoming_risk']].groupby(['source']).sum()
    top_sources = aux.sort_values(by='incoming_risk', ascending=False).index[:top_k]
    df_top_k  = df[df.source.isin(top_sources)]

    return df_top_k


def apply_rolling_average(df, rolling=7, other_cols=['date', 'Province'],
                          data_cols=['prob_inf', 'trips', 'incoming_risk']):
    
    df = df.copy()
    df['prob_inf'] = df['source_active_cases_14'] / df['source_population']
    df = df[other_cols + data_cols]

    for y in data_cols:
        for p in provincias.values:
            mask = df['Province']==p
            df.loc[mask, y] = df.loc[mask, y].rolling(rolling).mean()

    df = df[~df["trips"].isna()]
    df['date'] = pd.to_datetime(df.date)
    return df


def plot_risk(df_data, rolling=7):
    fig, axes = plt.subplots(3,1 , figsize=(9,4.5), dpi=300, sharex=True)
    
    data_cols = ['prob_inf', 'trips', 'incoming_risk']
    df_data = apply_rolling_average(df_top_k)

    ylabels = [r'$I^{14D}/N$', 'Trips', 'Incoming MAR']
    for i,y in enumerate(data_cols):
        ax = axes[i]
        ax.set_ylabel(ylabels[i])

        ax.text(-0.025, 1.08, string.ascii_lowercase[i], fontsize=14, 
                transform=ax.transAxes, weight='bold', color='#333333')

        sns.lineplot(x="date", y=y, hue="Province", style="Province" ,data=df_data, ax=ax)
        legend = ax.legend(loc="upper left", ncol=4, fontsize='small')

    ax.set(xlabel="", xlim=(df_data['date'].values[0], df_data['date'].values[-1]))
    ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%b'))
    fig.tight_layout()
    
    return fig



In [None]:
# Needed to get provinces names
provincias = pd.Series(get_provincias()).rename('Province').sort_index()

pro_id = "33" # Asturias
top_k = 4 # top k sources (sorted by risk)

print("Fetching incomming MAR sources for %s" % provincias[pro_id])
df_in_mar = get_incoming_risk(pro_id)

# # just appending the province name
df_in_mar = pd.merge(df_in_mar, provincias, left_on='source', right_index=True)

print("Filtering the top %i sources:" % top_k)
df_top_k = get_k_top_mar_source(df_in_mar, top_k=top_k)
print(" / ".join(df_top_k.Province.unique()))

sns.set_style("whitegrid")
fig = plot_risk(df_top_k, rolling=7)
fig.savefig('../out/Figure_9.png')