# Multivariate LSTM Time Series Forecasting

In [81]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import warnings
warnings.filterwarnings('ignore')

In [82]:
df = pd.read_csv('covid_clean_imputted.csv')

In [83]:
df.head()

Unnamed: 0,date,state,critical_staffing_shortage_today_yes,critical_staffing_shortage_today_no,critical_staffing_shortage_today_not_reported,critical_staffing_shortage_anticipated_within_week_yes,critical_staffing_shortage_anticipated_within_week_no,critical_staffing_shortage_anticipated_within_week_not_reported,hospital_onset_covid,inpatient_beds,...,previous_day_admission_adult_covid_suspected_50-59,previous_day_admission_adult_covid_suspected_60-69,previous_day_admission_adult_covid_suspected_70-79,previous_day_admission_adult_covid_suspected_80+,previous_day_admission_adult_covid_suspected_unknown,deaths_covid,all_pediatric_inpatient_bed_occupied,all_pediatric_inpatient_beds,staffed_pediatric_icu_bed_occupancy,total_staffed_pediatric_icu_beds
0,2020-01-01,TX,0.0,0.0,3.0,0.0,0.0,3.0,0.0,790.0,...,0.0,0.0,0.0,0.0,2.2,0.0,5.2,59.8,0.0,0.2
1,2020-01-01,HI,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.2,0.0,12.6,27.6,0.0,0.0
2,2020-01-01,LA,0.0,0.0,1.0,0.0,0.0,1.0,0.0,41.2,...,0.0,0.0,0.0,0.0,0.0,0.0,136.4,150.6,40.6,46.0
3,2020-01-01,NC,0.0,0.0,1.0,0.0,0.0,1.0,0.0,72.0,...,0.0,0.0,0.0,0.0,0.0,0.0,14.0,45.6,0.0,0.0
4,2020-01-01,MN,0.0,0.0,1.0,0.0,0.0,1.0,0.0,46.0,...,0.0,0.0,0.0,0.0,0.0,0.0,14.0,45.6,0.0,0.0


In [84]:
# replace the row number with the date
df['date'] = pd.to_datetime(df['date'])
df = df.set_index('date')

In [85]:
#  sort the index by date in ascending order and then by country in descending order 
df = df.sort_index(ascending=True, axis=0)

# df = df.sort_index(ascending=True, axis=0)

In [86]:
# sort again by state
# df = df.sort_values(by=['state'])

In [87]:
df.head(5)

Unnamed: 0_level_0,state,critical_staffing_shortage_today_yes,critical_staffing_shortage_today_no,critical_staffing_shortage_today_not_reported,critical_staffing_shortage_anticipated_within_week_yes,critical_staffing_shortage_anticipated_within_week_no,critical_staffing_shortage_anticipated_within_week_not_reported,hospital_onset_covid,inpatient_beds,inpatient_beds_used,...,previous_day_admission_adult_covid_suspected_50-59,previous_day_admission_adult_covid_suspected_60-69,previous_day_admission_adult_covid_suspected_70-79,previous_day_admission_adult_covid_suspected_80+,previous_day_admission_adult_covid_suspected_unknown,deaths_covid,all_pediatric_inpatient_bed_occupied,all_pediatric_inpatient_beds,staffed_pediatric_icu_bed_occupancy,total_staffed_pediatric_icu_beds
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-01,TX,0.0,0.0,3.0,0.0,0.0,3.0,0.0,790.0,42.0,...,0.0,0.0,0.0,0.0,2.2,0.0,5.2,59.8,0.0,0.2
2020-01-01,HI,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.2,0.0,12.6,27.6,0.0,0.0
2020-01-01,LA,0.0,0.0,1.0,0.0,0.0,1.0,0.0,41.2,31.2,...,0.0,0.0,0.0,0.0,0.0,0.0,136.4,150.6,40.6,46.0
2020-01-01,NC,0.0,0.0,1.0,0.0,0.0,1.0,0.0,72.0,53.2,...,0.0,0.0,0.0,0.0,0.0,0.0,14.0,45.6,0.0,0.0
2020-01-01,MN,0.0,0.0,1.0,0.0,0.0,1.0,0.0,46.0,25.0,...,0.0,0.0,0.0,0.0,0.0,0.0,14.0,45.6,0.0,0.0


In [25]:
df.shape

(50088, 62)

## KPSS Test

In [27]:
from statsmodels.tsa.stattools import kpss
def kpss_test(series, **kw):    
    statistic, p_value, n_lags, critical_values = kpss(series, **kw)
    # Format Output
    print(f'KPSS Statistic: {statistic}')
    print(f'p-value: {p_value}')
    print(f'num lags: {n_lags}')
    print('Critial Values:')
    for key, value in critical_values.items():
        print(f'   {key} : {value}')
    print(f'Result: The series is {"not " if p_value < 0.05 else ""}stationary')

## Multivariate Time Series Forecasting with LSTMs in Keras

In [62]:
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = "plotly_white"

plot_template = dict(
    layout=go.Layout({
        "font_size": 18,
        "xaxis_title_font_size": 24,
        "yaxis_title_font_size": 24})
)

In [63]:
# plot the covid deaths sorted by month and grouped by state
fig = px.line(df, x=df.index, y='deaths_covid', color='state', title='Covid Deaths by State')
fig.update_layout(plot_template['layout'])
fig.show()

In [64]:
fig.update_yaxes(range = [0, 520])
fig.show()