In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import plotly.graph_objects as go # data viz
from plotly.subplots import make_subplots # create subplots
import datetime # work with date format
from scipy.optimize import curve_fit # modelling

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
raw_data = pd.read_csv("/kaggle/input/coronavirus-data-set-free-api-for-realtime-data/coronavirus.csv", header=0)
raw_data.head()

In [None]:
transformed_data_1 = raw_data[raw_data.columns[8: ]].fillna(0)
transformed_data_1 = transformed_data_1.astype(int)
transformed_data_1["CountryOrRegion"] = raw_data["CountryOrRegion"]
transformed_data_1["Type"] = raw_data["Type"]
transformed_data_2 = transformed_data_1.groupby(["CountryOrRegion", "Type"]).sum()
transformed_data_2.reset_index(inplace=True) 
transformed_data_2.rename(
    columns={
        date_string: datetime.datetime.strptime(date_string, "%m/%d/%Y") for date_string in transformed_data_2.columns[2:]
    }, inplace=True
)
transformed_data_2

In [None]:
last_date=transformed_data_2.columns[-1]
transformed_data_2 = transformed_data_2.loc[transformed_data_2[last_date] >= 100]

confirmed_cases = transformed_data_2.loc[transformed_data_2["Type"] == "Confirmed"]
confirmed_cases.set_index("CountryOrRegion", inplace=True)
confirmed_cases.drop(columns="Type", inplace=True)

deceased = transformed_data_2.loc[transformed_data_2["Type"] == "Deaths"]
deceased.set_index("CountryOrRegion", inplace=True)
deceased.drop(columns="Type", inplace=True)

recovered = transformed_data_2.loc[transformed_data_2["Type"] == "Recovered"]
recovered.set_index("CountryOrRegion", inplace=True)
recovered.drop(columns="Type", inplace=True)

fig = go.Figure()
fig.add_trace(
    go.Bar(
        x=confirmed_cases.index,
        y=confirmed_cases[last_date],
        name="Confirmed",
        marker_color="rgb(55, 83, 109)"
    )
)
fig.add_trace(
    go.Bar(
        x=deceased.index,
        y=deceased[last_date],
        name="Deaths",
        marker_color="rgb(26, 118, 255)"
    )
)
fig.add_trace(
    go.Bar(
        x=recovered.index,
        y=recovered[last_date],
        name="Recovered",
        marker_color="rgb(255, 105, 180)"
    )
)
fig.update_layout(title=f"Count of confirmed cases the {last_date} (>= 100 cases)",
                  plot_bgcolor="rgb(255, 255, 255)")
fig.update_xaxes(title_text="Country")
fig.update_yaxes(title_text="Confirmed cases count",
                 gridcolor="rgb(200, 200, 200)", 
                 gridwidth=1)
fig.show()

In [None]:
case_fatality = deceased[last_date] / (deceased[last_date] + recovered[last_date]) * 100
case_fatality.fillna(0, inplace=True)
case_fatality = case_fatality.loc[case_fatality.values != 0]
countries = case_fatality.index
case_recovery = [100 - fatality for fatality in case_fatality.values]

fig = go.Figure(data=[
    go.Bar(
        name='Recovered', 
        x=countries, 
        y=case_recovery,
        marker_color="rgb(0,128,0)",
        text=[f"{value:.1f}%" for value in case_recovery],
        textposition="inside"
    ),
    go.Bar(
        name='Passed', 
        x=countries, 
        y=case_fatality.values,
        marker_color="rgb(139,0,0)",
        text=[f"{value:.1f}%" for value in case_fatality.values],
        textposition="outside"
    )
])
fig.update_layout(title="Recovered/passed cases by country",
                  barmode="stack",
                  width=700, 
                  height=500, 
                  plot_bgcolor="rgb(255, 255, 255)")
fig.update_xaxes(title_text="Country")
fig.update_yaxes(title_text="Ratio",
                 gridcolor="rgb(200, 200, 200)", 
                 gridwidth=1)
fig.show()

In [None]:
fig = go.Figure()
for country in transformed_data_2["CountryOrRegion"].drop_duplicates().tolist():
    fig.add_trace(
        go.Scatter(
            x=confirmed_cases.columns,
            y=confirmed_cases[confirmed_cases.index == country].values[0],
            name=country,
            mode="lines+markers"
        )
    )
fig.update_layout(title="Count of confirmed case (cases > 100)",
                  xaxis_tickformat="%d %B (%a)<br>%Y",
                  height=500, 
                  plot_bgcolor="rgb(255, 255, 255)")
fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text="Confirmed cases count",
                 gridcolor="rgb(200, 200, 200)", 
                 gridwidth=1)
fig.show()


In [None]:
day_count = 0
max_cases = 0

current_cases = confirmed_cases.subtract(recovered, fill_value=0)

fig = go.Figure()
for country in current_cases.index:
    confirmed_cases_for_current_country = current_cases[current_cases.index == country].values[0]
    day_count_for_current_country = len(confirmed_cases_for_current_country)
    confirmed_cases_for_current_country = [date for date in confirmed_cases_for_current_country if date >= 100]
    max_cases_for_current_country = max(confirmed_cases_for_current_country)
    if day_count <= day_count_for_current_country:
        day_count = day_count_for_current_country
    if max_cases <= max_cases_for_current_country:
        max_cases = max_cases_for_current_country
    fig.add_trace(
        go.Scatter(
            x=np.arange(1, day_count_for_current_country + 1),
            y=confirmed_cases_for_current_country,
            name=country,
            mode="lines+markers"
        )
    )
day_array = np.arange(1, day_count)
for r in [1, 2, 3, 5, 7]:
    trace=[100 * 2**(i/r) for i in day_array]
    fig.add_trace(
        go.Scatter(
            x=day_array,
            y=trace,
            mode="lines",
            line={"dash": "dash", "color": "MediumPurple", "width": 1},            
            showlegend=False
        )
    )
    if r == 1:
        text_value = "Double every day"
    else:
        text_value = f"Double every {r} days"
    
    text_x = np.argmax(np.array(trace) >= max_cases + 10000) if trace[-1] >= max_cases + 10000 else day_count - 4
    text_y = max_cases + 10000 if trace[-1] >= max_cases + 10000 else trace[-1]
    fig.add_trace(
        go.Scatter(
            x=[text_x],
            y=[text_y],
            text=text_value,
            mode="text",
            showlegend=False
        )
    )
fig.update_layout(
    title="Flattening the curve: when the 100th case is reached",
    xaxis={"range":(0, day_count + 4)},
    yaxis={"range":(-1000, max_cases + 15000)},
    height=500, 
    plot_bgcolor="rgb(255, 255, 255)")
fig.update_xaxes(title_text="Days since the 100th case",
                 gridcolor="rgb(200, 200, 200)", 
                 gridwidth=1)
fig.update_yaxes(title_text="Confirmed cases count",
                 gridcolor="rgb(200, 200, 200)", 
                 gridwidth=1)
fig.show()

In [None]:
confirmed_cases_france = confirmed_cases.loc[confirmed_cases.index == "France"]
confirmed_cases_france

In [None]:
#New cases each day
new_cases = confirmed_cases_france.values[0].tolist()
new_cases = [new_cases[i] - new_cases[i - 1] if i > 0 else 0 
             for i in np.arange(0, len(new_cases))]

fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
    go.Scatter(
        x=confirmed_cases_france.columns,
        y=confirmed_cases_france.values[0],
        name="Confirmed cases",
        mode="lines+markers",
        marker={"color": "rgb(87, 46, 228)"}),
    secondary_y=False
)
fig.add_trace(
    go.Scatter(
        x=confirmed_cases_france.columns,
        y=new_cases,
        name="New confirmed cases",
        mode="lines+markers",
        marker={"color": "rgb(231, 46, 1)"}),
    secondary_y=True
)
fig.update_layout(
    title="Confirmed cases evolution in France",
    showlegend=False,
    xaxis_tickformat="%d %B (%a)<br>%Y",
    height=500, 
    plot_bgcolor="rgb(255, 255, 255)")
fig.update_xaxes(title_text="Date",
                 gridcolor="rgb(200, 200, 200)", 
                 gridwidth=1)
fig.update_yaxes(title_text="Confirmed cases count", 
                 secondary_y=False,
                 color="rgb(87, 46, 228)",
                 gridcolor="rgb(200, 200, 200)", 
                 gridwidth=1)
fig.update_yaxes(title_text="New confirmed cases count", 
                 secondary_y=True,
                 color="rgb(231, 46, 1)")
fig.show()

In [None]:
dx = 1
new_confirmed_cases = confirmed_cases.loc[confirmed_cases[confirmed_cases.columns[-1]]>= 500].apply(lambda row: np.diff(row)/dx, axis=1)

fig = go.Figure()   
fig.add_trace(
    go.Heatmap(
        z=new_confirmed_cases.values,
        x=confirmed_cases.columns,
        y=new_confirmed_cases.index,
        colorscale=[
            [0, 'rgb(250, 250, 250)'],        #0
            [1./10000, 'rgb(200, 200, 200)'], #10
            [1./1000, 'rgb(150, 150, 150)'],  #100
            [1./100, 'rgb(100, 100, 100)'],   #1000
            [1./10, 'rgb(50, 50, 50)'],       #10000
            [1., 'rgb(0, 0, 0)'],             #100000
        ],
        colorbar={"tick0": 0, "tickmode": "array", "tickvals": [0, 1000, 10000, 100000]}
    )
)
fig.update_layout(
    title="New confirmed cases evolution",
    xaxis_tickformat="%d %B (%a)<br>%Y",
    height=800, 
    plot_bgcolor="rgb(255, 255, 255)")
fig.update_xaxes(title_text="Date",
                 gridcolor="rgb(200, 200, 200)", 
                 gridwidth=1)
fig.update_yaxes(title_text="New confirmed cases count",
                 gridcolor="rgb(200, 200, 200)", 
                 gridwidth=1)
fig.show()

In [None]:
y = current_cases.loc[current_cases.index == "China"].values
x = np.arange(0, len(y[0])).reshape(1, len(y[0]))

# Test function with coefficients as parameters 
def test(x, a, b, c): 
    return (a * x) + (b * x**2) + (c * x**3) 
  
# curve_fit() function takes the test-function 
# x-data and y-data as argument and returns  
# the coefficients a and b in param and 
# the estimated covariance of param in param_cov 
param, param_cov = curve_fit(test, x, y) 

print("Sine funcion coefficients:") 
print(param) 
print("Covariance of coefficients:") 
print(param_cov) 