In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import copy
import os
import yaml
from datetime import timedelta

import sys
sys.path.append('../../')

from data.processing.processing import generate_simulated_data
from data.dataloader import SimulatedDataLoader

from main.seir.fitting import single_fitting_cycle
from utils.generic.config import read_config
from utils.fitting.loss import Loss_Calculator
from utils.fitting.smooth_jump import smooth_big_jump

from viz.data import plot_data
from utils.generic.enums.columns import *
from viz.utils import setup_plt, axis_formatter

In [None]:
config_filename = 'simulate_data.yaml'
config = read_config(config_filename)

In [None]:
def perform_smoothing_experiment(config, smoothing_method='weighted-mag'):
    # Load synthetic data config file
    config_file = config['fitting']['data']['dataloading_params']['config_file']
    with open(os.path.join("../../configs/simulated_data/", config_file)) as configfile:
        sim_config = yaml.load(configfile, Loader=yaml.SafeLoader)
    # Generate synthetic data
    loader = SimulatedDataLoader()
    res = loader.load_data(**sim_config)
    df = res['data_frame']

    # Perform smoothing
    dl_params = copy.deepcopy(config['fitting']['data']['dataloading_params'])
    dl_params['simulate_spike'] = True
    # Randomly generate frac_to_report and smoothing length params
    start_date = dl_params['simulate_spike_params']['start_date']
    spike_days = np.random.randint(low=14, high=28)
    # Simulate Spike
    dl_params['simulate_spike_params']['end_date'] = start_date + timedelta(days=spike_days)
    print('spike generation params', dl_params['simulate_spike_params'])
    df_spike = loader.simulate_spike(df=df, **dl_params['simulate_spike_params'])
    
    # Create smoothing params on the basis of start and end date of spikes
    comp = dl_params['simulate_spike_params']['comp']
    edate = dl_params['simulate_spike_params']['end_date']
    sdate = dl_params['simulate_spike_params']['start_date']
    smooth_params = {}
    smooth_params[edate - timedelta(days=1)] = [sdate, comp, 'active', False, smoothing_method]

    # Perform smoothing
    print('smoothing params', smooth_params)
    df_spike_smooth, _ = smooth_big_jump(df_spike, smooth_params)

    # Compare df_spike_smooth and df
    processed_vals = df_spike_smooth.loc[(df_spike_smooth['date'].dt.date >= sdate) & 
                                         (df_spike_smooth['date'].dt.date <= edate), ['active', 'recovered']].to_numpy()
    true_vals = df.loc[(df['date'].dt.date >= sdate) & (df['date'].dt.date <= edate), ['active', 'recovered']].to_numpy()
    return (np.mean(np.abs(processed_vals - true_vals)/true_vals, axis=0)*100).tolist()

In [None]:
np.random.seed(0)

In [None]:
results = []
for i in range(5000):
    results.append(perform_smoothing_experiment(config, 'uniform'))

In [None]:
np.array(results).mean(axis=0)

In [None]:
config_file = config['fitting']['data']['dataloading_params']['config_file']
with open(os.path.join("../../configs/simulated_data/", config_file)) as configfile:
    sim_config = yaml.load(configfile, Loader=yaml.SafeLoader)
# Generate synthetic data
loader = SimulatedDataLoader()
res = loader.load_data(**sim_config)
df = res['data_frame']

# Perform smoothing
dl_params = copy.deepcopy(config['fitting']['data']['dataloading_params'])
dl_params['simulate_spike'] = True
# Randomly generate frac_to_report and smoothing length params
start_date = dl_params['simulate_spike_params']['start_date']
spike_days = np.random.randint(low=14, high=28)
# Simulate Spike
dl_params['simulate_spike_params']['end_date'] = start_date + timedelta(days=spike_days)
print('spike generation params', dl_params['simulate_spike_params'])
df_spike = loader.simulate_spike(df=df, **dl_params['simulate_spike_params'])

# Create smoothing params on the basis of start and end date of spikes
comp = dl_params['simulate_spike_params']['comp']
edate = dl_params['simulate_spike_params']['end_date']
sdate = dl_params['simulate_spike_params']['start_date']
smooth_params = {}
smooth_params[edate - timedelta(days=1)] = [sdate, comp, 'active', False, 'weighted-mag']

# Perform smoothing
print('smoothing params', smooth_params)
df_spike_smooth, _ = smooth_big_jump(df_spike, smooth_params)

# Compare df_spike_smooth and df
processed_vals = df_spike_smooth.loc[(df_spike_smooth['date'].dt.date >= sdate) & 
                                     (df_spike_smooth['date'].dt.date <= edate), ['active', 'recovered']].to_numpy()
true_vals = df.loc[(df['date'].dt.date >= sdate) & (df['date'].dt.date <= edate), ['active', 'recovered']].to_numpy()

In [None]:
fig = plot_data(df_spike_smooth)

In [None]:
import matplotlib as mpl
## for Palatino and other serif fonts use:
plt.rcParams.update({
    'text.usetex': True,
    'font.size': 20,
    'font.family': 'Palatino',
 })

In [None]:
fig, ax = plt.subplots(figsize=(12, 12))
for comp in ['active', 'total', 'recovered', 'deceased']:
    compartment = Columns.from_name(comp)
    ax.plot(df[compartments['date'][0].name].to_numpy(), df[compartment.name].to_numpy(),
            '--', color=compartment.color, label='Simulated Data, Unspiked ({})'.format(compartment.label))
    ax.plot(df_spike[compartments['date'][0].name].to_numpy(), df_spike[compartment.name].to_numpy(),
            '-o', color=compartment.color, label='Simulated Data, Spiked ({})'.format(compartment.label), ms=5)
    ax.plot(df_spike_smooth[compartments['date'][0].name].to_numpy(), df_spike_smooth[compartment.name].to_numpy(),
            '-', color=compartment.color, label='Smoothed Data ({})'.format(compartment.label))
legend_elements = [
    Line2D([0], [0], ls='--', color='black', label='Simulated Data, Unspiked'),
    Line2D([0], [0], ls='-', marker='o', ms=5, color='black', label='Simulated Data, Spiked'),
    Line2D([0], [0], ls='-', color='black', label='Smoothed Data')
]
first_legend = ax.legend(handles=legend_elements, loc='upper left')
ax.add_artist(first_legend)
legend_elements = [
    Line2D([0], [0], ls='-', color='C0', label=f'Confirmed Cases'),
    Line2D([0], [0], ls='-', color='orange', label=f'Active Cases'),
    Line2D([0], [0], ls='-', color='green', label=f'Recovered'),
    Line2D([0], [0], ls='-', color='red', label=f'Deceased'),
]
ax.legend(handles=legend_elements, loc=[0.015, 0.64])
axis_formatter(ax)
ax.set_title('Comparison of smoothing algorithm with ground truth simulated data')
plt.tight_layout()
fig.savefig(f'../../../paper/plots/smoothing-simulated.pdf', format='pdf', bbox_inches='tight', pad_inches=0)