In [1]:
import pandas as pd
import numpy as np 

df = pd.read_csv('data/novel-corona-virus-2019-dataset/covid_19_data.csv',parse_dates=['Last Update'])
df.rename(columns={'ObservationDate':'Date', 'Country/Region':'Country'}, inplace=True)

df["Eradicated"] = df["Deaths"] + df["Recovered"]
df["Active"] = df["Confirmed"] - df["Eradicated"]

In [44]:
def growth_rate(data=None):
    x = []
    x.append(0)
    for i in range(data.shape[0]-1):
        a = data.iloc[i+1]-data.iloc[i]
        x.append(a/data.iloc[i])
        
    return np.array(x)

In [45]:
# df["Active"].head()

df["Growth Rate"] = growth_rate(df["Active"])

# df.groupby("Country")[['Growth Rate']].sum().sort_values(by=['Growth Rate'], ascending=False).reset_index().tail()


In [56]:
eradicated = df.groupby("Country")[['Date', 'Confirmed', 'Deaths', 'Recovered', 'Active', 'Eradicated']].sum().sort_values(by=['Eradicated'], ascending=False).reset_index().head(10)

eradicated

Unnamed: 0,Country,Confirmed,Deaths,Recovered,Active,Eradicated
0,Mainland China,5321801.0,192276.0,3462768.0,1666757.0,3655044.0
1,Spain,2746558.0,253417.0,748696.0,1744445.0,1002113.0
2,Italy,3139708.0,358875.0,515773.0,2265060.0,874648.0
3,Germany,2117675.0,34001.0,656844.0,1426830.0,690845.0
4,Iran,1404759.0,88961.0,591248.0,724550.0,680209.0
5,US,6886003.0,219107.0,353340.0,6313556.0,572447.0
6,France,1878998.0,167567.0,340945.0,1370486.0,508512.0
7,South Korea,401301.0,5404.0,148481.0,247416.0,153885.0
8,Switzerland,470881.0,14447.0,134183.0,322251.0,148630.0
9,UK,1019040.0,106910.0,6943.0,905187.0,113853.0


In [47]:
# for country in recovered["Country"]:
eradicators = {}
for country in eradicated["Country"]:
    eradicators[country] = (df[df['Country'] == country].groupby("Date")[["Date", "Country", "Confirmed", "Deaths", "Recovered", "Eradicated", "Active", "Growth Rate"]].sum().reset_index())

#.sort_values(by=["Eradicated"], ascending=False).head()
# df.groupby("Country")sum().sort_values(ascending=False).head()

# recoverors = df[df['Country'] == '']
# china = df[df['Country'] == 'Mainland China'].groupby("Date")[["Date", "Confirmed", "Deaths", "Recovered"]].sum().reset_index()
# spain = df[df['Country'] == 'Spain'].groupby("Date")[["Confirmed", "Deaths", "Recovered"]].sum().reset_index()
# germany = df[df['Country'] == 'Germany'].groupby("Date")[["Confirmed", "Deaths", "Recovered"]].sum().reset_index()

In [48]:
import plotly.graph_objects as go

fig = go.Figure()

for country in eradicators:
    fig.add_trace(go.Bar(
        x=eradicators[country]['Date'],
        y=eradicators[country]["Confirmed"],
        name=country
    ))

fig.update_layout(
    title="Active Cases of COVID19",
    title_x = 0.5,
    xaxis_title="Date",
    xaxis= {
        'tickformat': '%b',
        # 'tickvals': pd.date_range('2020-1', '2020-4', freq='MS')
    },
    yaxis_title="Number of Active Cases",
    font={
        "family":"Courier New, monospace",
        # size=18,
        # color="#7f7f7f"
    },
)

fig.write_image("images/active_countries.png")
fig.show()

In [None]:
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Bar(
    x=china['Date'],
    y=china["Active"],
))

fig.update_layout(
    title="Active Cases in Mainland China",
    title_x = 0.5,
    xaxis_title="Date",
    xaxis= {
        'tickformat': '%b',
        # 'tickvals': pd.date_range('2020-1', '2020-4', freq='MS')
    },
    yaxis_title="Number of Active Cases",
    font={
        "family":"Courier New, monospace",
        # size=18,
        # color="#7f7f7f"
    },
    # legend={
    #     "x": 0,
    #     "y": 1
    # },
)

fig.write_image("images/china_active.png")
fig.show()

In [None]:
from sklearn.model_selection import train_test_split

china = eradicators["Mainland China"]
china["Days"] = china.index

X_train, X_test, y_train, y_test = train_test_split(
    china["Days"].values.reshape(-1,1), 
    china["Active"].values.reshape(-1,1), 
    test_size=0.30, 
    random_state=42)

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

poly_reg = PolynomialFeatures(degree=6)
x_poly = poly_reg.fit_transform(X_train)

lr = LinearRegression()
lr.fit(x_poly, y_train)

#To retrieve the intercept:
print(lr.intercept_)
#For retrieving the slope:
print(lr.coef_)

y_pred_test = lr.predict(poly_reg.fit_transform(X_test))
y_pred = lr.predict(poly_reg.fit_transform(china["Days"].values.reshape(-1,1)))


In [None]:
# from sklearn.preprocessing import PolynomialFeatures
# from sklearn import linear_model

# poly = PolynomialFeatures(degree=4)
# X = poly.fit_transform(X_train)
# X_test = poly.fit_transform(X_test)

# clf = linear_model.LinearRegression()
# clf.fit(X, y_train)

# y_pred = clf.predict(poly.fit_transform(china["Days"]))

In [None]:
# from sklearn.linear_model import BayesianRidge

# br = BayesianRidge(tol=1e-6, fit_intercept=False, compute_score=True)
# br.fit(X_train, y_train)

# y_pred = br.predict(china["Days"].values.reshape(-1,1))

In [None]:
# from scipy.optimize import curve_fit

In [None]:
fig = go.Figure()
fig.add_trace(go.Bar(
    x=china['Date'],
    y=china["Active"],
))

fig.add_trace(go.Scatter(
    x=china['Date'],
    y=y_pred.flatten(),
    mode="markers"
))

fig.update_layout(
    title="Active Cases in Mainland China",
    title_x = 0.5,
    xaxis_title="Date",
    # xaxis= {
    #     'tickformat': '%b',
    #     # 'tickvals': pd.date_range('2020-1', '2020-4', freq='MS')
    # },
    yaxis_title="Number of Active Cases",
    font={
        "family":"Courier New, monospace",
        # size=18,
        # color="#7f7f7f"
    },
    # legend={
    #     "x": 0,
    #     "y": 1
    # },
)

fig.write_image("images/china_model.png")
fig.show()

In [None]:
from scipy.optimize import curve_fit 
active = df.groupby('Date').sum()['Active'].reset_index()
active["Days"] = active.index

def fit_funct(x, a):
    '''
    Compute polynomial P(x) where P is a vector of coefficients, highest
    order coefficient at P[0].  Uses Horner's Method.
    '''
    sum = 10229.28660973
    coef = [0.00000000e+00, -5.97847366e+03,  9.70360124e+02, -4.28570157e+01,
   8.18184961e-01, -7.23095037e-03,  2.43486762e-05]
    for c in range(len(coef)): 
        sum += coef[c] * x**i


param, param_cov = curve_fit(eval_polynomial, active["Days"], y_pred.flatten())

active_world = 

# y_pred = lr.predict(poly_reg.fit_transform(active["Days"].values.reshape(-1,1)))

# fig = go.Figure()
# fig.add_trace(go.Bar(
#     x=active['Date'],
#     y=active["Active"]
# ))

# fig.add_trace(go.Scatter(
#     x=active['Date'],
#     y=y_pred
# ))

# fig.update_layout(
#     title="Active Cases in Mainland China",
#     title_x = 0.5,
#     xaxis_title="Date",
#     # xaxis= {
#     #     'tickformat': '%b',
#     #     # 'tickvals': pd.date_range('2020-1', '2020-4', freq='MS')
#     # },
#     yaxis_title="Number of Active Cases",
#     font={
#         "family":"Courier New, monospace",
#         # size=18,
#         # color="#7f7f7f"
#     },
#     # legend={
#     #     "x": 0,
#     #     "y": 1
#     # },
# )

# fig.write_image("images/china_model.png")
# fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Bar(
    x=active['Date'],
    y=active["Active"]
))

fig.add_trace(go.Scatter(
    x=active['Date'],
    y=y_pred
))

fig.update_layout(
    title="Active Cases in Mainland China",
    title_x = 0.5,
    xaxis_title="Date",
    # xaxis= {
    #     'tickformat': '%b',
    #     # 'tickvals': pd.date_range('2020-1', '2020-4', freq='MS')
    # },
    yaxis_title="Number of Active Cases",
    font={
        "family":"Courier New, monospace",
        # size=18,
        # color="#7f7f7f"
    },
    # legend={
    #     "x": 0,
    #     "y": 1
    # },
)

fig.write_image("images/china_model.png")
fig.show()