In [1]:
import pandas as pd
from datetime import datetime
import numpy as np


df_columns = [ "DATE", "MVAR1", "MVAR2", "MVAR3", "MVAR4","ISO3", "OVAR1", "OVAR3", "OVAR5", "OVAR7", "OVAR11", "OVAR13", "OVAR15", "OVAR21", "OVAR24", "OVAR40"]
# Source: https://sites.google.com/view/snuaric/data/covid-19-data/covid-19-data?authuser=0
# ISO3 is a country variable

# data gotten from https://www.worldometers.info/world-population/south-korea-population/
korean_population=51343545   # Friday, March 11, 2022

#MVAR1= Cum Confirmed cases
#MVAR2= Cum Deaths Cases
#MVAR3= Cum Recovered Cases
#MVAR4= Quarantined Cases

df = pd.read_csv("D:\Research_work\covid_parameters_2\dataset_prepare\covid_de_world.csv", header=0, usecols=df_columns)

df2 = df[df["ISO3"] == "KOR"]
def parse(x):
	return datetime.strptime(x, '%d%b%y')

df2["DATE"] = df2["DATE"].apply(parse)
# target is AVAR1 which is newly confirmed cases
df_new = df.set_index('DATE')
df2=df2.sort_values(by=['DATE'], ascending=True)

df2.drop(df2.head(25+2).index,inplace=True) # drop last n rows
df2.drop(df2.tail(499).index,inplace=True) # drop last n rows
dates=df2["DATE"][0:-1].tolist()

len(df2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2["DATE"] = df2["DATE"].apply(parse)


343

In [2]:
df_new = df.set_index('DATE')

#Cummulative numbers
infected_cum=df2[['MVAR1']].to_numpy()
quarantined_cum=df2[['MVAR4']].to_numpy()
deceased_cum=df2[['MVAR2']].to_numpy()
recovered_cum=df2[['MVAR3']].to_numpy()

pol1=df2[['OVAR1']][0:-1].to_numpy()
pol2=df2[['OVAR3']][0:-1].to_numpy()
pol3=df2[['OVAR5']][0:-1].to_numpy()
pol4=df2[['OVAR7']][0:-1].to_numpy()
pol5=df2[['OVAR11']][0:-1].to_numpy()
pol6=df2[['OVAR13']][0:-1].to_numpy()
pol7=df2[['OVAR15']][0:-1].to_numpy()
pol8=df2[['OVAR21']][0:-1].to_numpy()
pol9=df2[['OVAR24']][0:-1].to_numpy()
pol10=df2[['OVAR40']][0:-1].to_numpy()



#Daily numbers
infected=infected_cum[1:len(infected_cum)]-infected_cum[0:len(infected_cum)-1]
quarantined=quarantined_cum[0:len(infected_cum)-1]
deceased=deceased_cum[1:len(deceased_cum)]-deceased_cum[0:len(deceased_cum)-1]
recovered=recovered_cum[1:len(recovered_cum)]-recovered_cum[0:len(recovered_cum)-1]


oner=np.ones((len(infected),1))
residual=infected+quarantined+deceased+recovered
susceptible=korean_population*oner-residual
days=np.arange(len(infected))

In [3]:
from scipy.signal import savgol_filter
from scipy.interpolate import CubicSpline


sampling_fact=10;  # defining factor

######defining factors##########
window_len=11
degree_poly=2
sampling_fact=3;
################


#SIQRD model states:

#Susceptible
sig=np.squeeze(susceptible)
smooth_sig=savgol_filter(sig,window_len,degree_poly)
cp_sig = CubicSpline(days[0:len(days):sampling_fact],smooth_sig[0:len(days):sampling_fact])
cubic_sig_p=cp_sig(days)
if np.min(cubic_sig_p)<0:
    cubic_sig_p=cubic_sig_p+np.abs(np.min(cubic_sig_p)) 
S=cubic_sig_p


#Infected
sig=np.squeeze(infected)
smooth_sig=savgol_filter(sig,window_len,degree_poly)
cp_sig = CubicSpline(days[0:len(days):sampling_fact],smooth_sig[0:len(days):sampling_fact])
cubic_sig_p=cp_sig(days)
if np.min(cubic_sig_p)<0:
    cubic_sig_p=cubic_sig_p+np.abs(np.min(cubic_sig_p))
I=cubic_sig_p

#Quarantined
sig=np.squeeze(quarantined)
smooth_sig=savgol_filter(sig,window_len,degree_poly)
cp_sig = CubicSpline(days[0:len(days):sampling_fact],smooth_sig[0:len(days):sampling_fact])
cubic_sig_p=cp_sig(days)
if np.min(cubic_sig_p)<0:
    cubic_sig_p=cubic_sig_p+np.abs(np.min(cubic_sig_p))
Q=cubic_sig_p

#Recovered
sig=np.squeeze(recovered)
smooth_sig=savgol_filter(sig,window_len,degree_poly)
cp_sig = CubicSpline(days[0:len(days):sampling_fact],smooth_sig[0:len(days):sampling_fact])
cubic_sig_p=cp_sig(days)
if np.min(cubic_sig_p)<0:
    cubic_sig_p=cubic_sig_p+np.abs(np.min(cubic_sig_p))
R=cubic_sig_p

#Deceased
sig=np.squeeze(deceased)
smooth_sig=savgol_filter(sig,window_len,degree_poly)
cp_sig = CubicSpline(days[0:len(days):sampling_fact],smooth_sig[0:len(days):sampling_fact])
cubic_sig_p=cp_sig(days)
if np.min(cubic_sig_p)<0:
    cubic_sig_p=cubic_sig_p+np.abs(np.min(cubic_sig_p))
D=cubic_sig_p

data=[days,S,I,Q,R,D]
data=np.array(np.transpose(data))

print(np.min(I))
print(np.min(Q))
print(np.min(R))
print(np.min(D))
print(np.min(S))

dataset = pd.DataFrame({ 'S': np.squeeze(S), 'I': np.squeeze(I), 'Q': np.squeeze(Q), 'R': np.squeeze(R), 'D': np.squeeze(D),
                            'Pol1': np.squeeze(pol1), 'Pol2': np.squeeze(pol2), 'Pol3': np.squeeze(pol3), 'Pol4': np.squeeze(pol4), 'Pol5': np.squeeze(pol5),
                            'Pol6': np.squeeze(pol6), 'Pol7': np.squeeze(pol7), 'Pol8': np.squeeze(pol8), 'Pol9': np.squeeze(pol9), 'Pol10': np.squeeze(pol10)})
dataset.to_csv("states_data.csv")


0.0
0.0
0.0
0.0
51323977.82377621


In [4]:
dataset = pd.DataFrame({'DATE': dates, 'S': np.squeeze(S), 'I': np.squeeze(I), 'Q': np.squeeze(Q), 'R': np.squeeze(R), 'D': np.squeeze(D),
                            'Pol1': np.squeeze(pol1), 'Pol2': np.squeeze(pol2), 'Pol3': np.squeeze(pol3), 'Pol4': np.squeeze(pol4), 'Pol5': np.squeeze(pol5),
                            'Pol6': np.squeeze(pol6), 'Pol7': np.squeeze(pol7), 'Pol8': np.squeeze(pol8), 'Pol9': np.squeeze(pol9), 'Pol10': np.squeeze(pol10)})
dataset.to_csv("states_data_with_dates.csv")