In [65]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import statsmodels.api as sm

import plotly.graph_objects as go

from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error
from math import inf

In [66]:
world_dataset = pd.read_csv("../../../data/stage_II/owid-covid-data.csv")

# United States

In [67]:
country = "United States"
country_dataset = world_dataset[world_dataset.location == country].fillna(0)
temp = country_dataset.loc[country_dataset[country_dataset.new_cases > 0].index[0]:].new_cases
new_cases = pd.DataFrame({"days": np.arange(1, temp.shape[0]+1), "new_cases": temp})
lr_model = smf.ols(formula='new_cases ~ days', data=new_cases).fit()
new_cases['cases_pred'] = lr_model.predict(new_cases.days)
fig = go.Figure()
fig.add_trace(
    go.Scatter(x=new_cases['days'], y=new_cases['new_cases'], mode='markers', name='cases', marker=dict(color='blue')),
)
fig.add_trace(
    go.Scatter(x=new_cases['days'], y=new_cases['cases_pred'], mode='lines', marker=dict(size=10), name='best-fit',
              line=dict(color='red'))
)

In [68]:
np.sqrt(mean_squared_error(new_cases.cases_pred, new_cases.new_cases))

45050.93133249329

In [69]:
def check_rmse(new_cases):
    rmse = inf
    req = 0
    for i in range(20):
        polynomial_features = PolynomialFeatures(degree=i)
        y = new_cases.new_cases
        xp = polynomial_features.fit_transform(new_cases.days.values.reshape(new_cases.days.shape[0], 1))
        pm = sm.OLS(y.values.reshape(y.shape[0], 1), xp).fit()
        new_cases['poly_pred'] = np.ceil(pm.predict(xp))
        temp_rmse = np.sqrt(mean_squared_error(new_cases.new_cases, new_cases.poly_pred))
        if temp_rmse < rmse:
            rmse = temp_rmse
            req = i
    return req

In [70]:
polynomial_features = PolynomialFeatures(degree=check_rmse(new_cases))
y = new_cases.new_cases
xp = polynomial_features.fit_transform(new_cases.days.values.reshape(new_cases.days.shape[0], 1))
pm = sm.OLS(y.values.reshape(y.shape[0], 1), xp).fit()
new_cases['poly_pred'] = np.ceil(pm.predict(xp))
temp_rmse = np.sqrt(mean_squared_error(new_cases.new_cases, new_cases.poly_pred))
fig = go.Figure()
fig.add_trace(
    go.Scatter(x=new_cases['days'], y=new_cases['new_cases'], mode='markers', name='cases', marker=dict(color='blue')),
)
fig.add_trace(
    go.Scatter(x=new_cases['days'], y=new_cases['poly_pred'], mode='lines', marker=dict(size=10), name='best-fit',
              line=dict(color='red'))
)

In [71]:
np.sqrt(mean_squared_error(new_cases.poly_pred, new_cases.new_cases))

18316.767615542143

In [72]:
def trends(country, poly_degree: int):
    country_dataset = world_dataset[world_dataset.location == country].fillna(0)
    temp = country_dataset.loc[country_dataset[country_dataset.new_cases > 0].index[0]:].new_cases
    new_cases = pd.DataFrame({"days": np.arange(1, temp.shape[0]+1), "new_cases": temp})
    polynomial_features = PolynomialFeatures(degree=poly_degree)
    y = new_cases.new_cases
    xp = polynomial_features.fit_transform(new_cases.days.values.reshape(new_cases.days.shape[0], 1))
    reg_model = sm.OLS(y.values.reshape(y.shape[0], 1), xp).fit()
    new_cases['poly_pred'] = np.ceil(pm.predict(xp))
    fig = go.Figure()
    fig.add_trace(
        go.Scatter(x=new_cases['days'], y=new_cases['new_cases'], mode='markers', name='cases', marker=dict(color='blue')),
    )
    fig.add_trace(
        go.Scatter(x=new_cases['days'], y=new_cases['poly_pred'], mode='lines', marker=dict(size=10), name='best-fit',
                  line=dict(color='red'))
    )
    return new_cases, fig, reg_model

In [73]:
data, fig, model = trends("United States", 9)

In [74]:
fig.show()

In [75]:
def prediction(input_df, days):
    new = pd.DataFrame({"days": input_df.iloc[-1].days + 1 + np.arange(days)})
    xp = polynomial_features.fit_transform(new.days.values.reshape(new.days.shape[0], 1))
    return model.predict(xp)

In [76]:
prediction = prediction(data, 7)
prediction = pd.DataFrame(prediction, columns=["cases"])    

# United States 7 Day Perdiction

In [77]:
fig = go.Figure(data=go.Scatter(x=prediction.index, y=prediction['cases']))
fig.show()

# Bangladesh

In [78]:
country = "Bangladesh"
country_dataset = world_dataset[world_dataset.location == country].fillna(0)
temp = country_dataset.loc[country_dataset[country_dataset.new_cases > 0].index[0]:].new_cases
new_cases = pd.DataFrame({"days": np.arange(1, temp.shape[0]+1), "new_cases": temp})
lr_model = smf.ols(formula='new_cases ~ days', data=new_cases).fit()
new_cases['cases_pred'] = lr_model.predict(new_cases.days)
fig = go.Figure()
fig.add_trace(
    go.Scatter(x=new_cases['days'], y=new_cases['new_cases'], mode='markers', name='cases', marker=dict(color='blue')),
)
fig.add_trace(
    go.Scatter(x=new_cases['days'], y=new_cases['cases_pred'], mode='lines', marker=dict(size=10), name='best-fit',
              line=dict(color='red'))
)

In [79]:
np.sqrt(mean_squared_error(new_cases.cases_pred, new_cases.new_cases))

1045.5759428403053

In [80]:
polynomial_features = PolynomialFeatures(degree=check_rmse(new_cases))
y = new_cases.new_cases
xp = polynomial_features.fit_transform(new_cases.days.values.reshape(new_cases.days.shape[0], 1))
pm = sm.OLS(y.values.reshape(y.shape[0], 1), xp).fit()
new_cases['poly_pred'] = np.ceil(pm.predict(xp))
temp_rmse = np.sqrt(mean_squared_error(new_cases.new_cases, new_cases.poly_pred))
fig = go.Figure()
fig.add_trace(
    go.Scatter(x=new_cases['days'], y=new_cases['new_cases'], mode='markers', name='cases', marker=dict(color='blue')),
)
fig.add_trace(
    go.Scatter(x=new_cases['days'], y=new_cases['poly_pred'], mode='lines', marker=dict(size=10), name='best-fit',
              line=dict(color='red'))
)

In [81]:
np.sqrt(mean_squared_error(new_cases.poly_pred, new_cases.new_cases))

367.58070861793914

In [82]:
data, fig, model = trends("Bangladesh", 7)
fig.show()

# Indonesia

In [83]:
country = "Indonesia"
country_dataset = world_dataset[world_dataset.location == country].fillna(0)
temp = country_dataset.loc[country_dataset[country_dataset.new_cases > 0].index[0]:].new_cases
new_cases = pd.DataFrame({"days": np.arange(1, temp.shape[0]+1), "new_cases": temp})
lr_model = smf.ols(formula='new_cases ~ days', data=new_cases).fit()
new_cases['cases_pred'] = lr_model.predict(new_cases.days)
fig = go.Figure()
fig.add_trace(
    go.Scatter(x=new_cases['days'], y=new_cases['new_cases'], mode='markers', name='cases', marker=dict(color='blue')),
)
fig.add_trace(
    go.Scatter(x=new_cases['days'], y=new_cases['cases_pred'], mode='lines', marker=dict(size=10), name='best-fit',
              line=dict(color='red'))
)

In [84]:
np.sqrt(mean_squared_error(new_cases.cases_pred, new_cases.new_cases))

1523.3393009238978

In [85]:
polynomial_features = PolynomialFeatures(degree=check_rmse(new_cases))
y = new_cases.new_cases
xp = polynomial_features.fit_transform(new_cases.days.values.reshape(new_cases.days.shape[0], 1))
pm = sm.OLS(y.values.reshape(y.shape[0], 1), xp).fit()
new_cases['poly_pred'] = np.ceil(pm.predict(xp))
temp_rmse = np.sqrt(mean_squared_error(new_cases.new_cases, new_cases.poly_pred))
fig = go.Figure()
fig.add_trace(
    go.Scatter(x=new_cases['days'], y=new_cases['new_cases'], mode='markers', name='cases', marker=dict(color='blue')),
)
fig.add_trace(
    go.Scatter(x=new_cases['days'], y=new_cases['poly_pred'], mode='lines', marker=dict(size=10), name='best-fit',
              line=dict(color='red'))
)

In [86]:
np.sqrt(mean_squared_error(new_cases.poly_pred, new_cases.new_cases))

797.6242611383644

In [87]:
data, fig, model = trends("Indonesia", 11)
fig.show()

# Pakistan

In [88]:
country = "Pakistan"
country_dataset = world_dataset[world_dataset.location == country].fillna(0)
temp = country_dataset.loc[country_dataset[country_dataset.new_cases > 0].index[0]:].new_cases
new_cases = pd.DataFrame({"days": np.arange(1, temp.shape[0]+1), "new_cases": temp})
lr_model = smf.ols(formula='new_cases ~ days', data=new_cases).fit()
new_cases['cases_pred'] = lr_model.predict(new_cases.days)
fig = go.Figure()
fig.add_trace(
    go.Scatter(x=new_cases['days'], y=new_cases['new_cases'], mode='markers', name='cases', marker=dict(color='blue')),
)
fig.add_trace(
    go.Scatter(x=new_cases['days'], y=new_cases['cases_pred'], mode='lines', marker=dict(size=10), name='best-fit',
              line=dict(color='red'))
)

In [89]:
np.sqrt(mean_squared_error(new_cases.cases_pred, new_cases.new_cases))

1430.9345107443057

In [90]:
polynomial_features = PolynomialFeatures(degree=check_rmse(new_cases))
y = new_cases.new_cases
xp = polynomial_features.fit_transform(new_cases.days.values.reshape(new_cases.days.shape[0], 1))
pm = sm.OLS(y.values.reshape(y.shape[0], 1), xp).fit()
new_cases['poly_pred'] = np.ceil(pm.predict(xp))
temp_rmse = np.sqrt(mean_squared_error(new_cases.new_cases, new_cases.poly_pred))
fig = go.Figure()
fig.add_trace(
    go.Scatter(x=new_cases['days'], y=new_cases['new_cases'], mode='markers', name='cases', marker=dict(color='blue')),
)
fig.add_trace(
    go.Scatter(x=new_cases['days'], y=new_cases['poly_pred'], mode='lines', marker=dict(size=10), name='best-fit',
              line=dict(color='red'))
)

In [97]:
data, fig, model = trends("Pakistan", 7)
fig.show()

# Nigeria

In [None]:
country = "Nigeria"
country_dataset = world_dataset[world_dataset.location == country].fillna(0)
temp = country_dataset.loc[country_dataset[country_dataset.new_cases > 0].index[0]:].new_cases
new_cases = pd.DataFrame({"days": np.arange(1, temp.shape[0]+1), "new_cases": temp})
lr_model = smf.ols(formula='new_cases ~ days', data=new_cases).fit()
new_cases['cases_pred'] = lr_model.predict(new_cases.days)
fig = go.Figure()
fig.add_trace(
    go.Scatter(x=new_cases['days'], y=new_cases['new_cases'], mode='markers', name='cases', marker=dict(color='blue')),
)
fig.add_trace(
    go.Scatter(x=new_cases['days'], y=new_cases['cases_pred'], mode='lines', marker=dict(size=10), name='best-fit',
              line=dict(color='red'))
)

In [None]:
np.sqrt(mean_squared_error(new_cases.cases_pred, new_cases.new_cases))

In [None]:
polynomial_features = PolynomialFeatures(degree=check_rmse(new_cases))
y = new_cases.new_cases
xp = polynomial_features.fit_transform(new_cases.days.values.reshape(new_cases.days.shape[0], 1))
pm = sm.OLS(y.values.reshape(y.shape[0], 1), xp).fit()
new_cases['poly_pred'] = np.ceil(pm.predict(xp))
temp_rmse = np.sqrt(mean_squared_error(new_cases.new_cases, new_cases.poly_pred))
fig = go.Figure()
fig.add_trace(
    go.Scatter(x=new_cases['days'], y=new_cases['new_cases'], mode='markers', name='cases', marker=dict(color='blue')),
)
fig.add_trace(
    go.Scatter(x=new_cases['days'], y=new_cases['poly_pred'], mode='lines', marker=dict(size=10), name='best-fit',
              line=dict(color='red'))
)

In [None]:
data, fig, model = trends("Nigeria", 8)
fig.show()

# Brazil

In [None]:
country = "Brazil"
country_dataset = world_dataset[world_dataset.location == country].fillna(0)
temp = country_dataset.loc[country_dataset[country_dataset.new_cases > 0].index[0]:].new_cases
new_cases = pd.DataFrame({"days": np.arange(1, temp.shape[0]+1), "new_cases": temp})
lr_model = smf.ols(formula='new_cases ~ days', data=new_cases).fit()
new_cases['cases_pred'] = lr_model.predict(new_cases.days)
fig = go.Figure()
fig.add_trace(
    go.Scatter(x=new_cases['days'], y=new_cases['new_cases'], mode='markers', name='cases', marker=dict(color='blue')),
)
fig.add_trace(
    go.Scatter(x=new_cases['days'], y=new_cases['cases_pred'], mode='lines', marker=dict(size=10), name='best-fit',
              line=dict(color='red'))
)

In [None]:
np.sqrt(mean_squared_error(new_cases.cases_pred, new_cases.new_cases))

In [None]:
polynomial_features = PolynomialFeatures(degree=check_rmse(new_cases))
y = new_cases.new_cases
xp = polynomial_features.fit_transform(new_cases.days.values.reshape(new_cases.days.shape[0], 1))
pm = sm.OLS(y.values.reshape(y.shape[0], 1), xp).fit()
new_cases['poly_pred'] = np.ceil(pm.predict(xp))
temp_rmse = np.sqrt(mean_squared_error(new_cases.new_cases, new_cases.poly_pred))
fig = go.Figure()
fig.add_trace(
    go.Scatter(x=new_cases['days'], y=new_cases['new_cases'], mode='markers', name='cases', marker=dict(color='blue')),
)
fig.add_trace(
    go.Scatter(x=new_cases['days'], y=new_cases['poly_pred'], mode='lines', marker=dict(size=10), name='best-fit',
              line=dict(color='red'))
)

In [None]:
data, fig, model = trends("Brazil", 8)
fig.show()