## Estudo de critérios de transição de sigmoides para modelagem matemática de múltiplas ondas epidemiológicas

Testando diferentes filtros

Importando bibliotecas e módulos implementados

In [1]:
# External libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from scipy.signal import butter, lfilter

# Implemented modules
import optimize_tg
# Obs: modified version of the new_wave module
import new_wave_filt
from new_wave_filt import get_transition_points

Definindo condições iniciais de cada sigmoide no modelo

In [2]:
# Initial Conditions

def initial_cond_0(y_t):
    A0 = 2*max(y_t)
    tp0 = (2/3)*len(y_t)
    delta0 = (1/4)*len(y_t)
    nu0 = 1
    return [A0, tp0, delta0, nu0]

optimize_tg.initial_cond = initial_cond_0

def update_cond_nw(A0, tp0):
    return [A0, tp0]
optimize_tg.update_cond = update_cond_nw

## Testando diferentes filtros

Implementação dos filtros

In [3]:
# Moving average filter
def moving_average(x, win_size):
    filtered = np.convolve(x, np.ones(win_size), 'valid') / win_size
    filtered = np.append(np.zeros(win_size-1), filtered) # fill the w-1 first slots with zeros
    return filtered

# Median filter
def median_filter(x, win_size):
    x = np.array(x)
    S = 1
    nrows = ((x.size-win_size)//S)+1
    n = x.strides[0]
    strided = np.lib.stride_tricks.as_strided(x, shape=(nrows,win_size), strides=(S*n,n))
    filtered  = np.median(strided,axis=1)
    filtered = np.append(np.zeros(win_size-1), filtered) # fill the w-1 first slots with zeros
    return filtered

def butterworth_lowpass_filter(data, cutoff_freq, fs, order=2):
    #fs is the sampling rate
    nyq = 0.5 * fs
    normal_cutoff = cutoff_freq / nyq
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    # lfilter apply filter along one dimension
    y = lfilter(b, a, data)
    return y

In [44]:
# 1 MA
def ma(data):
    # Moving average with 21-day window
    filtered_data = moving_average(data, 21)
    n_days_shift = 10
    filtered_data = filtered_data[n_days_shift:] 
    return filtered_data

# 2 ME
def me(data):
    filtered_data = median_filter(data, 14)
    n_days_shift = 10
    filtered_data = filtered_data[n_days_shift:]    
    return filtered_data

# 3 BW
def bw(data):
    # 2nd Order Low-Pass Filter with 14-day window
    order = 2
    fs = len(data) # sampling rate       
    cutoff = 14 # cutoff freq.
    filtered_data =  butterworth_lowpass_filter(data, cutoff, fs, order)
    n_days_shift = 15
    filtered_data = filtered_data[n_days_shift:]    
    return filtered_data

# 4 MA BW
def ma_bw(data):
    # Moving average with 14-day window
    filtered_data = moving_average(data, 14)        
    # 2nd Order Low-Pass Filter with 14-day window
    order = 2
    fs = len(data) # sampling rate       
    cutoff = 14 # cutoff freq.
    filtered_data =  butterworth_lowpass_filter(filtered_data, cutoff, fs, order)
    n_days_shift = 20
    filtered_data = filtered_data[n_days_shift:]    
    return filtered_data

# 5 MA ME
def ma_me(data):
    # Moving average with 14-day window
    filtered_data = moving_average(data, 14)
    # Median filter with 14-day window
    filtered_data = median_filter(filtered_data, 14)
    n_days_shift = 10
    filtered_data = filtered_data[n_days_shift:]
    return filtered_data

# 6 BW MA
def bw_ma(data):         
    # 2nd Order Low-Pass Filter with 14-day window
    order = 2
    fs = len(data) # sampling rate       
    cutoff = 14 # cutoff freq.
    filtered_data =  butterworth_lowpass_filter(data, cutoff, fs, order)
    # Moving average with 14-day window
    filtered_data = moving_average(filtered_data, 14)    
    n_days_shift = 20
    filtered_data = filtered_data[n_days_shift:]    
    return filtered_data

# 7 BW ME
def bw_me(data):        
    # 2nd Order Low-Pass Filter with 14-day window
    order = 2
    fs = len(data) # sampling rate       
    cutoff = 14 # cutoff freq.
    filtered_data =  butterworth_lowpass_filter(data, cutoff, fs, order)
    # Median filter with 14-day window
    filtered_data = median_filter(filtered_data, 14)
    n_days_shift = 15
    filtered_data = filtered_data[n_days_shift:]
    return filtered_data

# 8 ME MA
def me_ma(data):        
    # Median filter with 14-day window
    filtered_data = median_filter(data, 14)
    # Moving average with 14-day window
    filtered_data = moving_average(filtered_data, 14)    
    n_days_shift = 15
    filtered_data = filtered_data[n_days_shift:]
    return filtered_data
    
# 9 ME BW
def me_bw(data):
    # Median filter with 14-day window
    filtered_data = median_filter(data, 14)
    # 2nd Order Low-Pass Filter with 14-day window
    order = 2
    fs = len(data) # sampling rate       
    cutoff = 14 # cutoff freq.
    filtered_data =  butterworth_lowpass_filter(filtered_data, cutoff, fs, order)    
    n_days_shift = 15
    filtered_data = filtered_data[n_days_shift:]    
    return filtered_data
    
# 10 MA BW ME (Default)
def ma_bw_me(data):
    # Moving average with 14-day window
    filtered_data = moving_average(data, 14)
    # 2nd Order Low-Pass F
    order = 2
    fs = len(data) # sampling rate       
    cutoff = 14 # cutoff freq.
    filtered_data =  butterworth_lowpass_filter(filtered_data, cutoff, fs, order)
    # Median filter with 14-day window
    filtered_data = median_filter(filtered_data, 14)
    n_days_shift = 25
    filtered_data = filtered_data[n_days_shift:]
    return filtered_data

# 11 MA ME BW
def ma_me_bw(data):
    # Moving average with 14-day window
    filtered_data = moving_average(data, 14)
    # Median filter with 14-day window
    filtered_data = median_filter(filtered_data, 14)
    # 2nd Order Low-Pass F
    order = 2
    fs = len(data) # sampling rate       
    cutoff = 14 # cutoff freq.
    filtered_data =  butterworth_lowpass_filter(filtered_data, cutoff, fs, order)
    n_days_shift = 25
    filtered_data = filtered_data[n_days_shift:]
    return filtered_data

# 12 BW MA ME
def bw_ma_me(data):
    # 2nd Order Low-Pass F
    order = 2
    fs = len(data) # sampling rate       
    cutoff = 14 # cutoff freq.
    filtered_data =  butterworth_lowpass_filter(data, cutoff, fs, order)
    # Moving average with 14-day window
    filtered_data = moving_average(filtered_data, 14)
    # Median filter with 14-day window
    filtered_data = median_filter(filtered_data, 14)
    n_days_shift = 25
    filtered_data = filtered_data[n_days_shift:]
    return filtered_data

# 13 BW ME MA
def bw_me_ma(data):
    # 2nd Order Low-Pass F
    order = 2
    fs = len(data) # sampling rate       
    cutoff = 14 # cutoff freq.
    filtered_data =  butterworth_lowpass_filter(data, cutoff, fs, order)
    # Median filter with 14-day window
    filtered_data = median_filter(filtered_data, 14)
    # Moving average with 14-day window
    filtered_data = moving_average(filtered_data, 14)
    n_days_shift = 25
    filtered_data = filtered_data[n_days_shift:]
    return filtered_data

# 14 ME MA BW
def me_ma_bw(data):
    # Median filter with 14-day window
    filtered_data = median_filter(data, 14)
    # Moving average with 14-day window
    filtered_data = moving_average(filtered_data, 14)
    # 2nd Order Low-Pass F
    order = 2
    fs = len(data) # sampling rate       
    cutoff = 14 # cutoff freq.
    filtered_data =  butterworth_lowpass_filter(filtered_data, cutoff, fs, order)
    n_days_shift = 25
    filtered_data = filtered_data[n_days_shift:]
    return filtered_data

# 15 ME BW MA
def me_bw_ma(data):
    # Median filter with 14-day window
    filtered_data = median_filter(data, 14)
    # 2nd Order Low-Pass F
    order = 2
    fs = len(data) # sampling rate       
    cutoff = 14 # cutoff freq.
    filtered_data =  butterworth_lowpass_filter(filtered_data, cutoff, fs, order)
    # Moving average with 14-day window
    filtered_data = moving_average(filtered_data, 14)
    n_days_shift = 25
    filtered_data = filtered_data[n_days_shift:]
    return filtered_data

### Filtragem padrão (todos os filtros)

In [45]:
rel_rmse_by_test = {}
rel_rmse_by_test['Interval'] = ['Wave 1', 'Waves 1 to 2', 'Waves 1 to 3', 'Waves 1 to 4', 'Waves 1 to 5']

filter_name = ['ma','me','bw','ma_bw','ma_me','bw_ma','bw_me',\
    'me_ma','me_bw','ma_bw_me','ma_me_bw','bw_ma_me','bw_me_ma',\
    'me_ma_bw','me_bw_ma']

In [46]:
filter_result = {}
for i in range(len(filter_name)):
    # Define filtro a ser utilizado
    new_wave_filt.filter_data = globals()[filter_name[i]]

    rel_rmse_by_test = {}
    rel_rmse_by_test['Interval'] = ['Wave 1', 'Waves 1 to 2', 'Waves 1 to 3', 'Waves 1 to 4', 'Waves 1 to 5']

    # Most populous prefectures in Japan
    japan_pref = ['Tokyo','Kanagawa','Osaka','Aichi','Saitama','Chiba','Hyōgo','Hokkaido','Fukuoka','Shizuoka']
    #japan_pref = ['Saitama','Chiba']

    filtered_data_by_pref = {}
    filtered_daily_data_by_pref = {}

    for city_name in japan_pref:
        # Import data
        data = pd.read_csv(f"../Datasets/{city_name.lower()}.csv") 
        indicator='cases'

        acc_data = data.cumulative_confirmed #(Japan)
        normalized_acc_data = acc_data / max(acc_data)

        t = np.linspace(0, len(acc_data)-1, len(acc_data))

        normalized_acc_data = normalized_acc_data.tolist()
        daily_data = data.new_confirmed

        scaling_factor = 500
        acc_data = acc_data / scaling_factor
        daily_data = list(daily_data/ scaling_factor)

        # =================================================================================================

        # Transition Points
        x_nw = get_transition_points(acc_data, visual=False, city_name=city_name, threshold=2e-6, indicator = indicator)
        if( len(x_nw) > 5):
            x_nw = x_nw[1:6]

        print('x_nw:', x_nw)

        sig_params, rel_rmse_list = optimize_tg.fit_data(acc_data, 
                                    daily_data, 
                                    city_name, 
                                    x_nw, 
                                    indicator = indicator, 
                                    n_weeks_pred = 0,
                                    scaling_factor = scaling_factor,
                                    visual = False
                                    )

        rel_rmse_by_test[city_name] = rel_rmse_list

    filter_result[filter_name[i]] = rel_rmse_by_test
    

x_nw: [51, 58, 63, 70, 76]
Sigmoid #1 - A0:10.865003846310563 | tp0:74.19860786065563 | delta0:18.536287923220208 | nu0:1.0 
(optimal) Sigmoid #1 - A0:10.865003846310563 | tp0:74.19860786065563 | delta0:18.536287923220208 | nu0:1.0 
Sigmoid #2 - A0:1.6213134874229584 | tp0:78.40571792001172 | delta0:11.88458116599592 | nu0:1.0 
(optimal) Sigmoid #2 - A0:1.6213134874229584 | tp0:78.40571792001172 | delta0:11.88458116599592 | nu0:1.0 
Sigmoid #3 - A0:1.1102230246251565e-16 | tp0:78.58363351900276 | delta0:11.580058219422034 | nu0:1.0 
(optimal) Sigmoid #3 - A0:1.1102230246251565e-16 | tp0:78.58363351900276 | delta0:11.580058219422034 | nu0:1.0 
Sigmoid #4 - A0:1.1102230246251565e-16 | tp0:78.58363351900276 | delta0:11.580058219422034 | nu0:1.0 
(optimal) Sigmoid #4 - A0:1.1102230246251565e-16 | tp0:78.58363351900276 | delta0:11.580058219422034 | nu0:1.0 
Sigmoid #5 - A0:1.1102230246251565e-16 | tp0:78.58363351900276 | delta0:11.580058219422034 | nu0:1.0 
x_nw: [64, 77, 79, 86, 148]
Sigmo

  return A / ((1 + nu * np.exp(-1*(t - tp)/(delta)))**(1/nu))


Sigmoid #5 - A0:6.782366825038056 | tp0:118.65059560064334 | delta0:11.769791036152961 | nu0:1.0 
x_nw: [64, 70, 77, 161, 165]
Sigmoid #1 - A0:0.555456400805971 | tp0:42.68458323934739 | delta0:16.00408032723155 | nu0:1.0 
(optimal) Sigmoid #1 - A0:0.555456400805971 | tp0:42.68458323934739 | delta0:16.00408032723155 | nu0:1.0 
Sigmoid #2 - A0:0.1357885917695625 | tp0:42.68985492311457 | delta0:16.004307157780048 | nu0:1.0 
(optimal) Sigmoid #2 - A0:0.1357885917695625 | tp0:42.68985492311457 | delta0:16.004307157780048 | nu0:1.0 
Sigmoid #3 - A0:2.411982766885692 | tp0:73.04252696240316 | delta0:3.0133301893306736 | nu0:1.0 
(optimal) Sigmoid #3 - A0:2.411982766885692 | tp0:73.04252696240316 | delta0:3.0133301893306736 | nu0:1.0 
Sigmoid #4 - A0:15.194159009957742 | tp0:102.83920966001028 | delta0:11.545263368860466 | nu0:1.0 
(optimal) Sigmoid #4 - A0:15.194159009957742 | tp0:102.83920966001028 | delta0:11.545263368860466 | nu0:1.0 
Sigmoid #5 - A0:0.07200900483929303 | tp0:104.5788377

  return A / ((1 + nu * np.exp(-1*(t - tp)/(delta)))**(1/nu))
  return (A * g(t))/(delta * (1 + nu*g(t))**((nu+1)/nu))


(optimal) Sigmoid #3 - A0:0.37822201228616104 | tp0:94.56516583077263 | delta0:13.07114991433107 | nu0:1.0 
Sigmoid #4 - A0:0.00030239631779127585 | tp0:94.56607106127333 | delta0:13.071946362945877 | nu0:1.0 
(optimal) Sigmoid #4 - A0:0.00030239631779127585 | tp0:94.56607106127333 | delta0:13.071946362945877 | nu0:1.0 
Sigmoid #5 - A0:0.00732812580877695 | tp0:94.56607194224287 | delta0:13.071945862443986 | nu0:1.0 
x_nw: [74, 79, 88, 179, 181]
Sigmoid #1 - A0:0.7667668959633396 | tp0:57.21948842941457 | delta0:5.938174060513298 | nu0:1.0 
(optimal) Sigmoid #1 - A0:0.7667668959633396 | tp0:57.21948842941457 | delta0:5.938174060513298 | nu0:1.0 
Sigmoid #2 - A0:0.11665144078630635 | tp0:57.23523609602303 | delta0:5.948933907070261 | nu0:1.0 
(optimal) Sigmoid #2 - A0:0.11665144078630635 | tp0:57.23523609602303 | delta0:5.948933907070261 | nu0:1.0 
Sigmoid #3 - A0:1.3672953527083993 | tp0:82.29018691552112 | delta0:3.499601848250031 | nu0:1.0 
(optimal) Sigmoid #3 - A0:1.367295352708399

  g = lambda x: np.exp(-1*(t - tp)/delta)
  return (A * g(t))/(delta * (1 + nu*g(t))**((nu+1)/nu))


x_nw: [63, 86, 191, 193, 202]
Sigmoid #1 - A0:0.6842212361289706 | tp0:42.00766056054716 | delta0:15.751592429613961 | nu0:1.0 
(optimal) Sigmoid #1 - A0:0.6842212361289706 | tp0:42.00766056054716 | delta0:15.751592429613961 | nu0:1.0 
Sigmoid #2 - A0:6.377177476987463 | tp0:87.30743803581282 | delta0:4.917159393141348 | nu0:1.0 
(optimal) Sigmoid #2 - A0:6.377177476987463 | tp0:87.30743803581282 | delta0:4.917159393141348 | nu0:1.0 
Sigmoid #3 - A0:1.9266148326859687 | tp0:117.60695268400175 | delta0:4.92064542626497 | nu0:1.0 
(optimal) Sigmoid #3 - A0:1.9266148326859687 | tp0:117.60695268400175 | delta0:4.92064542626497 | nu0:1.0 
Sigmoid #4 - A0:0.00975356136194161 | tp0:117.62716566456946 | delta0:4.939079657531619 | nu0:1.0 
(optimal) Sigmoid #4 - A0:0.00975356136194161 | tp0:117.62716566456946 | delta0:4.939079657531619 | nu0:1.0 
Sigmoid #5 - A0:0.0775279423406244 | tp0:117.62716107541713 | delta0:4.939017456665894 | nu0:1.0 
x_nw: [74, 172, 179, 183, 190]
Sigmoid #1 - A0:0.056

  return A / ((1 + nu * np.exp(-1*(t - tp)/(delta)))**(1/nu))


(optimal) Sigmoid #2 - A0:0.5220348725459308 | tp0:122.99647846917915 | delta0:26.592167115314517 | nu0:1.0 
Sigmoid #3 - A0:39.84455117980939 | tp0:258.27793042027247 | delta0:13.73776293761707 | nu0:1.0 
(optimal) Sigmoid #3 - A0:39.84455117980939 | tp0:258.27793042027247 | delta0:13.73776293761707 | nu0:1.0 
Sigmoid #4 - A0:0.23642305532388044 | tp0:267.0710276030683 | delta0:21.939168207798332 | nu0:1.0 
(optimal) Sigmoid #4 - A0:0.23642305532388044 | tp0:267.0710276030683 | delta0:21.939168207798332 | nu0:1.0 
Sigmoid #5 - A0:18.15343617113276 | tp0:365.7881818594431 | delta0:14.359975003432222 | nu0:1.0 
x_nw: [221, 229, 305, 418, 593]
Sigmoid #1 - A0:696.1281227972937 | tp0:453.6794470003836 | delta0:58.67064222584109 | nu0:1.0 
(optimal) Sigmoid #1 - A0:696.1281227972937 | tp0:453.6794470003836 | delta0:58.67064222584109 | nu0:1.0 
Sigmoid #2 - A0:695.6972268594362 | tp0:459.23402450046166 | delta0:32.039670307355564 | nu0:1.0 
(optimal) Sigmoid #2 - A0:695.6972268594362 | tp0:

  outputs = ufunc(*inputs)


Sigmoid #5 - A0:-2.773277208301901e-09 | tp0:466.0794403861479 | delta0:0.001000000000030532 | nu0:1.0 
x_nw: [237, 332, 421, 593, 683]
Sigmoid #1 - A0:33.91182085740017 | tp0:210.0815223119781 | delta0:21.993073076839806 | nu0:1.0 
(optimal) Sigmoid #1 - A0:33.91182085740017 | tp0:210.0815223119781 | delta0:21.993073076839806 | nu0:1.0 
Sigmoid #2 - A0:14.576830756176868 | tp0:264.62854780905525 | delta0:14.31669995089438 | nu0:1.0 
(optimal) Sigmoid #2 - A0:14.576830756176868 | tp0:264.62854780905525 | delta0:14.31669995089438 | nu0:1.0 
Sigmoid #3 - A0:41.71424537485416 | tp0:373.1928542702754 | delta0:9.955655594551688 | nu0:1.0 
(optimal) Sigmoid #3 - A0:41.71424537485416 | tp0:373.1928542702754 | delta0:9.955655594551688 | nu0:1.0 
Sigmoid #4 - A0:39.25234859572762 | tp0:465.68839178776483 | delta0:10.563789775538208 | nu0:1.0 
(optimal) Sigmoid #4 - A0:39.25234859572762 | tp0:465.68839178776483 | delta0:10.563789775538208 | nu0:1.0 
Sigmoid #5 - A0:267.35156735512203 | tp0:645.1

In [48]:
fr_df = pd.DataFrame(filter_result)

In [13]:
fr_df.to_excel('filter_results.xlsx')

In [49]:
percent_fmt = np.vectorize(lambda x: float(x[:-1])/100)

In [50]:
from collections import defaultdict
filter_result_mean = defaultdict(dict)
for fname in filter_name:
    for pref in japan_pref:
        mean_rel_rmse = str(round(percent_fmt(filter_result[fname][pref]).mean()*100, 3)) + '%'
        filter_result_mean[fname][pref] = mean_rel_rmse

In [51]:
frm_df = pd.DataFrame(filter_result_mean)
#frm_df.to_excel('filter_result_mean.xlsx')

In [52]:
frm_df


Unnamed: 0,ma,me,bw,ma_bw,ma_me,bw_ma,bw_me,me_ma,me_bw,ma_bw_me,ma_me_bw,bw_ma_me,bw_me_ma,me_ma_bw,me_bw_ma
Tokyo,6.844%,3.726%,1.467%,0.864%,1.815%,0.864%,64.883%,1.0%,0.911%,0.88%,0.879%,0.88%,0.879%,0.886%,0.886%
Kanagawa,7.19%,9.212%,4.282%,1.009%,68.138%,1.009%,0.779%,7.958%,0.851%,1.035%,1.023%,1.035%,1.034%,0.827%,0.827%
Osaka,5.286%,6.379%,0.58%,0.563%,3.708%,0.563%,0.738%,0.496%,0.66%,0.58%,0.58%,0.58%,0.58%,0.566%,0.566%
Aichi,2.487%,1.985%,0.793%,0.815%,1.913%,0.815%,0.913%,0.852%,0.952%,16.313%,0.877%,16.313%,16.313%,0.896%,0.896%
Saitama,2.735%,11.075%,2.879%,0.964%,1.877%,0.964%,1.272%,1.406%,1.04%,0.981%,0.987%,0.981%,0.981%,0.99%,0.99%
Chiba,5.039%,38.28%,1.307%,1.563%,1.15%,1.563%,28.679%,1.016%,1.207%,1.549%,1.788%,1.549%,1.549%,1.202%,1.202%
Hyōgo,11.683%,13.819%,162.653%,173.848%,7.338%,173.848%,199.479%,3.529%,179.696%,181.953%,182.248%,181.953%,182.248%,152.359%,152.359%
Hokkaido,6.622%,7.243%,1.134%,0.811%,1.755%,0.811%,1.156%,1.79%,0.819%,0.824%,0.815%,0.824%,0.815%,0.812%,0.812%
Fukuoka,4.109%,6.004%,0.785%,2.157%,2.268%,2.157%,0.935%,1.205%,0.95%,0.837%,0.831%,0.837%,0.831%,0.851%,0.851%
Shizuoka,6.762%,8.421%,319.241%,260.049%,7.444%,260.049%,199.213%,2.957%,180.233%,224.037%,224.297%,224.037%,224.297%,232.881%,232.881%


In [None]:
fig, axs = plt.subplots(2, 5, figsize=(25,10))
for i in range(len(japan_pref)):
    row =  1 if (i < 5) else 0

    labels = ['Wave 1', 'Wave 2', 'Wave 3', 'Wave 4', 'Wave 5']
    x = np.arange(len(labels))
    width = 0.30  # the width of the bars
    
    standard = percent_fmt(standard_filt[japan_pref[i]])
    bw_only = percent_fmt(bw_filt[japan_pref[i]])

    axs[row][i%5].set_title(japan_pref[i])
    axs[row][i%5].bar(x - width/2, standard, width, label=f'MA+BW+ME (média: {str(round(standard.mean()*100, 3)) + "%"})')
    axs[row][i%5].bar(x + width/2, bw_only, width, label=f'BW (média: {str(round(bw_only.mean()*100, 3)) + "%"})')
    axs[row][i%5].set_ylabel('Relative RMSE')
    axs[row][i%5].yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.2%}'.format(y))) 
    axs[row][i%5].legend()
    axs[row][i%5].set_xticks(x)
    axs[row][i%5].set_xticklabels(labels)
plt.tight_layout()
plt.savefig('Figuras/10-pref-filter-test.png', dpi=200, facecolor='white')
plt.show()

## Anotações

Filtragem padrão foi melhor em: **Tokyo, Kanagawa, Saitama, Hokkaido, Shizuoka**


Filtragem só com butterworth foi melhor em: **Osaka, Aichi, Chiba, Hyōgo, Fukuoka**


### MA + BW

In [None]:
def filter_data(data):    

    plt.figure(figsize=(12, 6))
    plt.title(f"Processo de filtragem - {city_name}")
    plt.xlabel("t (dias)")
    plt.ylabel("número de casos por dia normalizado")

    # Plotting unfiltered version
    plt.plot(data, label="sinal não filtrado", c='silver', linestyle='dashed')

    # Moving average with 14-day window
    filtered_data = moving_average(data, 14)
    plt.plot(filtered_data, label="sinal filtrado (m.móvel[14])")
    
    # 2nd Order Low-Pass Filter with 14-day window
    order = 2
    fs = len(data) # sampling rate       
    cutoff = 14 # cutoff freq.
    filtered_data =  butterworth_lowpass_filter(filtered_data, cutoff, fs, order)
    plt.plot(filtered_data, label="sinal filtrado (m.móvel[14] + bw-passa-baixas)")
    
    # Reduce the delay effect introduced by the filtering process
    # Advance the signal by 25 days
    n_days_shift = 20
    filtered_data = filtered_data[n_days_shift:]    
    plt.plot(filtered_data, label=f"sinal filtrado e adiantado({n_days_shift} dias)")

    plt.legend()

    plt.show()

    return filtered_data

new_wave_filt.filter_data = filter_data

In [None]:
rel_rmse_by_test = {}
rel_rmse_by_test['Interval'] = ['Wave 1', 'Waves 1 to 2', 'Waves 1 to 3', 'Waves 1 to 4', 'Waves 1 to 5']

# Most populous prefectures in Japan
japan_pref = ['Tokyo','Kanagawa','Osaka','Aichi','Saitama','Chiba','Hyōgo','Hokkaido','Fukuoka','Shizuoka']
#japan_pref = ['Saitama','Chiba']

filtered_data_by_pref = {}
filtered_daily_data_by_pref = {}

for city_name in japan_pref:
    # Import data
    data = pd.read_csv(f"../Datasets/{city_name.lower()}.csv") 
    indicator='cases'

    acc_data = data.cumulative_confirmed #(Japan)
    normalized_acc_data = acc_data / max(acc_data)

    t = np.linspace(0, len(acc_data)-1, len(acc_data))

    normalized_acc_data = normalized_acc_data.tolist()
    daily_data = data.new_confirmed

    scaling_factor = 500
    acc_data = acc_data / scaling_factor
    daily_data = list(daily_data/ scaling_factor)

    # =================================================================================================

    filtered_data_by_pref[city_name] = filter_data(acc_data)
    filtered_daily_data_by_pref[city_name] = filter_data(daily_data)

    # Transition Points
    x_nw = get_transition_points(acc_data, visual=False, city_name=city_name, threshold=2e-6, indicator = indicator)
    if( len(x_nw) > 5):
        x_nw = x_nw[1:6]

    print('x_nw:', x_nw)

    sig_params, rel_rmse_list = optimize_tg.fit_data(acc_data, 
                                daily_data, 
                                city_name, 
                                x_nw, 
                                indicator = indicator, 
                                n_weeks_pred = 0,
                                scaling_factor = scaling_factor,
                                visual = False
                                )

    rel_rmse_by_test[city_name] = rel_rmse_list

    

In [None]:
ma_bw = rel_rmse_by_test
pd.DataFrame(ma_bw)

### BW + ME

In [None]:
def filter_data(data):    

    plt.figure(figsize=(12, 6))
    plt.title(f"Processo de filtragem - {city_name}")
    plt.xlabel("t (dias)")
    plt.ylabel("número de casos por dia normalizado")

    # Plotting unfiltered version
    plt.plot(data, label="sinal não filtrado", c='silver', linestyle='dashed')

    # 2nd Order Low-Pass Filter with 14-day window
    order = 2
    fs = len(data) # sampling rate       
    cutoff = 14 # cutoff freq.
    filtered_data =  butterworth_lowpass_filter(data, cutoff, fs, order)
    plt.plot(filtered_data, label="sinal filtrado (m.móvel[14] + bw-passa-baixas)")

    # Median filter with 14-day window
    filtered_data = median_filter(filtered_data, 14)
    plt.plot(filtered_data, label="sinal filtrado (m.móvel[14] + bw-passa-baixas+ mediana[14])")

    # Reduce the delay effect introduced by the filtering process
    # Advance the signal by 25 days
    n_days_shift = 20
    filtered_data = filtered_data[n_days_shift:]    
    plt.plot(filtered_data, label=f"sinal filtrado e adiantado({n_days_shift} dias)")

    plt.legend()

    plt.show()

    return filtered_data

new_wave_filt.filter_data = filter_data

In [None]:
rel_rmse_by_test = {}
rel_rmse_by_test['Interval'] = ['Wave 1', 'Waves 1 to 2', 'Waves 1 to 3', 'Waves 1 to 4', 'Waves 1 to 5']

# Most populous prefectures in Japan
japan_pref = ['Tokyo','Kanagawa','Osaka','Aichi','Saitama','Chiba','Hyōgo','Hokkaido','Fukuoka','Shizuoka']
#japan_pref = ['Saitama','Chiba']

filtered_data_by_pref = {}
filtered_daily_data_by_pref = {}

for city_name in japan_pref:
    # Import data
    data = pd.read_csv(f"../Datasets/{city_name.lower()}.csv") 
    indicator='cases'

    acc_data = data.cumulative_confirmed #(Japan)
    normalized_acc_data = acc_data / max(acc_data)

    t = np.linspace(0, len(acc_data)-1, len(acc_data))

    normalized_acc_data = normalized_acc_data.tolist()
    daily_data = data.new_confirmed

    scaling_factor = 500
    acc_data = acc_data / scaling_factor
    daily_data = list(daily_data/ scaling_factor)

    # =================================================================================================

    filtered_data_by_pref[city_name] = filter_data(acc_data)
    filtered_daily_data_by_pref[city_name] = filter_data(daily_data)

    # Transition Points
    x_nw = get_transition_points(acc_data, visual=False, city_name=city_name, threshold=2e-6, indicator = indicator)
    if( len(x_nw) > 5):
        x_nw = x_nw[1:6]

    print('x_nw:', x_nw)

    sig_params, rel_rmse_list = optimize_tg.fit_data(acc_data, 
                                daily_data, 
                                city_name, 
                                x_nw, 
                                indicator = indicator, 
                                n_weeks_pred = 0,
                                scaling_factor = scaling_factor,
                                visual = False
                                )

    rel_rmse_by_test[city_name] = rel_rmse_list

    

In [None]:
bw_me = rel_rmse_by_test
pd.DataFrame(bw_me)

### BW + MA + ME

In [None]:
def filter_data(data):    

    plt.figure(figsize=(12, 6))
    plt.title(f"Processo de filtragem - {city_name}")
    plt.xlabel("t (dias)")
    plt.ylabel("número de casos por dia normalizado")

    # Plotting unfiltered version
    plt.plot(data, label="sinal não filtrado", c='silver', linestyle='dashed')

    # 2nd Order Low-Pass Filter with 14-day window
    order = 2
    fs = len(data) # sampling rate       
    cutoff = 14 # cutoff freq.
    filtered_data =  butterworth_lowpass_filter(data, cutoff, fs, order)
    plt.plot(filtered_data, label="sinal filtrado (m.móvel[14] + bw-passa-baixas)")

    # Moving average with 14-day window
    filtered_data = moving_average(filtered_data, 14)
    plt.plot(filtered_data, label="sinal filtrado (m.móvel[14])")

    # Median filter with 14-day window
    filtered_data = median_filter(filtered_data, 14)
    plt.plot(filtered_data, label="sinal filtrado (m.móvel[14] + bw-passa-baixas+ mediana[14])")

    # Reduce the delay effect introduced by the filtering process
    # Advance the signal by 25 days
    n_days_shift = 25
    filtered_data = filtered_data[n_days_shift:]    
    plt.plot(filtered_data, label=f"sinal filtrado e adiantado({n_days_shift} dias)")

    plt.legend()

    plt.show()

    return filtered_data

new_wave_filt.filter_data = filter_data

In [None]:
rel_rmse_by_test = {}
rel_rmse_by_test['Interval'] = ['Wave 1', 'Waves 1 to 2', 'Waves 1 to 3', 'Waves 1 to 4', 'Waves 1 to 5']

# Most populous prefectures in Japan
japan_pref = ['Tokyo','Kanagawa','Osaka','Aichi','Saitama','Chiba','Hyōgo','Hokkaido','Fukuoka','Shizuoka']
#japan_pref = ['Saitama','Chiba']

filtered_data_by_pref = {}
filtered_daily_data_by_pref = {}

for city_name in japan_pref:
    # Import data
    data = pd.read_csv(f"../Datasets/{city_name.lower()}.csv") 
    indicator='cases'

    acc_data = data.cumulative_confirmed #(Japan)
    normalized_acc_data = acc_data / max(acc_data)

    t = np.linspace(0, len(acc_data)-1, len(acc_data))

    normalized_acc_data = normalized_acc_data.tolist()
    daily_data = data.new_confirmed

    scaling_factor = 500
    acc_data = acc_data / scaling_factor
    daily_data = list(daily_data/ scaling_factor)

    # =================================================================================================

    filtered_data_by_pref[city_name] = filter_data(acc_data)
    filtered_daily_data_by_pref[city_name] = filter_data(daily_data)

    # Transition Points
    x_nw = get_transition_points(acc_data, visual=False, city_name=city_name, threshold=2e-6, indicator = indicator)
    if( len(x_nw) > 5):
        x_nw = x_nw[1:6]

    print('x_nw:', x_nw)

    sig_params, rel_rmse_list = optimize_tg.fit_data(acc_data, 
                                daily_data, 
                                city_name, 
                                x_nw, 
                                indicator = indicator, 
                                n_weeks_pred = 0,
                                scaling_factor = scaling_factor,
                                visual = False
                                )

    rel_rmse_by_test[city_name] = rel_rmse_list

    

In [None]:
bw_ma_me = rel_rmse_by_test
pd.DataFrame(bw_ma_me)

### MA+ME+BW

In [None]:
def filter_data(data):    

    plt.figure(figsize=(12, 6))
    plt.title(f"Processo de filtragem - {city_name}")
    plt.xlabel("t (dias)")
    plt.ylabel("número de casos por dia normalizado")

    # Plotting unfiltered version
    plt.plot(data, label="sinal não filtrado", c='silver', linestyle='dashed')

    # Moving average with 14-day window
    filtered_data = moving_average(data, 14)
    plt.plot(filtered_data, label="sinal filtrado (m.móvel[14])")

    # Median filter with 14-day window
    filtered_data = median_filter(filtered_data, 14)
    plt.plot(filtered_data, label="sinal filtrado (m.móvel[14] + bw-passa-baixas+ mediana[14])")

    # 2nd Order Low-Pass Filter with 14-day window
    order = 2
    fs = len(data) # sampling rate       
    cutoff = 14 # cutoff freq.
    filtered_data =  butterworth_lowpass_filter(filtered_data, cutoff, fs, order)
    plt.plot(filtered_data, label="sinal filtrado (m.móvel[14] + bw-passa-baixas)")

    # Reduce the delay effect introduced by the filtering process
    # Advance the signal by 25 days
    n_days_shift = 25
    filtered_data = filtered_data[n_days_shift:]    
    plt.plot(filtered_data, label=f"sinal filtrado e adiantado({n_days_shift} dias)")

    plt.legend()

    plt.show()

    return filtered_data

new_wave_filt.filter_data = filter_data

In [None]:
rel_rmse_by_test = {}
rel_rmse_by_test['Interval'] = ['Wave 1', 'Waves 1 to 2', 'Waves 1 to 3', 'Waves 1 to 4', 'Waves 1 to 5']

# Most populous prefectures in Japan
japan_pref = ['Tokyo','Kanagawa','Osaka','Aichi','Saitama','Chiba','Hyōgo','Hokkaido','Fukuoka','Shizuoka']
#japan_pref = ['Saitama','Chiba']

filtered_data_by_pref = {}
filtered_daily_data_by_pref = {}

for city_name in japan_pref:
    # Import data
    data = pd.read_csv(f"../Datasets/{city_name.lower()}.csv") 
    indicator='cases'

    acc_data = data.cumulative_confirmed #(Japan)
    normalized_acc_data = acc_data / max(acc_data)

    t = np.linspace(0, len(acc_data)-1, len(acc_data))

    normalized_acc_data = normalized_acc_data.tolist()
    daily_data = data.new_confirmed

    scaling_factor = 500
    acc_data = acc_data / scaling_factor
    daily_data = list(daily_data/ scaling_factor)

    # =================================================================================================

    filtered_data_by_pref[city_name] = filter_data(acc_data)
    filtered_daily_data_by_pref[city_name] = filter_data(daily_data)

    # Transition Points
    x_nw = get_transition_points(acc_data, visual=False, city_name=city_name, threshold=2e-6, indicator = indicator)
    if( len(x_nw) > 5):
        x_nw = x_nw[1:6]

    print('x_nw:', x_nw)

    sig_params, rel_rmse_list = optimize_tg.fit_data(acc_data, 
                                daily_data, 
                                city_name, 
                                x_nw, 
                                indicator = indicator, 
                                n_weeks_pred = 0,
                                scaling_factor = scaling_factor,
                                visual = False
                                )

    rel_rmse_by_test[city_name] = rel_rmse_list

    

In [None]:
ma_me_bw = rel_rmse_by_test
pd.DataFrame(ma_me_bw)

In [None]:
pref_plot =  ['Hokkaido','Fukuoka']

fig, axs = plt.subplots(1, 2, figsize=(25,10))

for i in range(len(pref_plot)):
    labels = ['Wave 1', 'Wave 2', 'Wave 3', 'Wave 4', 'Wave 5']
    x = np.arange(len(labels))
    width = 0.8  # the width of the bars
    
    standard = percent_fmt(standard_filt[pref_plot[i]])
    bw_only = percent_fmt(bw_filt[pref_plot[i]])
    ma_bw_v = percent_fmt(ma_bw[pref_plot[i]])
    bw_me_v = percent_fmt(bw_me[pref_plot[i]])
    bw_ma_me_v = percent_fmt(bw_ma_me[pref_plot[i]])
    ma_me_bw_v = percent_fmt(ma_me_bw[pref_plot[i]])

    axs[i].set_title(pref_plot[i])
    axs[i].bar(x - width/2, standard, width/6, label=f'MA+BW+ME (média: {str(round(standard.mean()*100, 3)) + "%"})')
    axs[i].bar(x - width/3, bw_only, width/6, label=f'BW (média: {str(round(bw_only.mean()*100, 3)) + "%"})')
    #axs[i].bar(x - width/6, ma_bw_v, width/6, label=f'MA+BW (média: {str(round(ma_bw_v.mean()*100, 3)) + "%"})')
    axs[i].bar(x, bw_me_v, width/6, label=f'BW+ME (média: {str(round(bw_me_v.mean()*100, 3)) + "%"})')
    axs[i].bar(x + width/6, bw_ma_me_v, width/6, label=f'BW+MA+ME (média: {str(round(bw_ma_me_v.mean()*100, 3)) + "%"})')
    axs[i].bar(x + width/3, ma_me_bw_v, width/6, label=f'MA+ME+BW (média: {str(round(ma_me_bw_v.mean()*100, 3)) + "%"})')
    axs[i].set_ylabel('Relative RMSE')
    axs[i].yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.2%}'.format(y))) 
    axs[i].legend()
    axs[i].set_xticks(x)
    axs[i].set_xticklabels(labels)
plt.tight_layout()
#plt.savefig('Figuras/10-pref-filter-test.png', dpi=50, facecolor='white')
plt.show()