In [10]:
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
import requests
import io

import matplotlib.pyplot as plt
import seaborn as sns
from countryinfo import CountryInfo

In [2]:
def load_dataset(tipo):
    url_confirmed=f"https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-{tipo}.csv"
    s=requests.get(url_confirmed).content
    df=pd.read_csv(io.StringIO(s.decode('utf-8')))
    df = df.drop(["Province/State", "Lat", "Long"], axis=1)
    df = pd.DataFrame(df.set_index("Country/Region").stack()).reset_index()
    df = df.rename(columns={"Country/Region":"Country","level_1":"Date", 0:tipo})
    df["Date"] = pd.to_datetime(df["Date"])
    df = df.groupby(["Date", "Country"])[tipo].max().reset_index()
    return df


In [3]:
df_confirmed = load_dataset("Confirmed")
df_recovered = load_dataset("Recovered")
df_deaths = load_dataset("Deaths")

new_df = pd.merge(df_confirmed, df_recovered,  how='left', left_on=["Date", "Country"], right_on = ["Date", "Country"])
df_data = pd.merge(new_df, df_deaths,  how='left', left_on=["Date", "Country"], right_on = ["Date", "Country"])


In [4]:
df_data.groupby("Country")["Confirmed"].max().sum()
df_data.groupby("Country")["Deaths"].max().sum()
df_data.groupby("Country")["Recovered"].max().sum()

85396.0

In [39]:
importantes = ["Colombia", "Ecuador", "Italy", "Spain", "France", "US", "Germany", "South Korea", "Brazil", "Mexico", "Chile" , "Panama", "Peru"]
df_prin = df_data[df_data["Country"].isin(importantes)]
df_prin = df_prin[df_prin["Confirmed"] > 0]
df_prin['start_date'] = df_prin.groupby('Country')['Date'].transform('min')
df_prin["days_since_start"] = (df_prin["Date"] - df_prin['start_date']).dt.days
df_prin = df_prin.replace("US", "United States")

In [40]:
df_prin["population"] = df_prin.apply(lambda x: CountryInfo(x["Country"]).info()["population"], axis=1)

In [41]:
df_prin["percentage"] = df_prin["Confirmed"]/df_prin["population"]

In [42]:
df_prin

Unnamed: 0,Date,Country,Confirmed,Recovered,Deaths,start_date,days_since_start,population,percentage
172,2020-01-22,United States,1.0,0.0,0.0,2020-01-22,0,319259000,3.132253e-09
355,2020-01-23,United States,1.0,0.0,0.0,2020-01-22,1,319259000,3.132253e-09
425,2020-01-24,France,2.0,0.0,0.0,2020-01-24,0,66078000,3.026726e-08
538,2020-01-24,United States,1.0,0.0,0.0,2020-01-22,2,319259000,3.132253e-09
608,2020-01-25,France,3.0,0.0,0.0,2020-01-24,1,66078000,4.540089e-08
...,...,...,...,...,...,...,...,...,...
11274,2020-03-23,Mexico,251.0,4.0,2.0,2020-02-28,24,119713203,2.096678e-06
11292,2020-03-23,Panama,245.0,0.0,3.0,2020-03-10,13,3713312,6.597884e-05
11295,2020-03-23,Peru,363.0,1.0,5.0,2020-03-06,17,30814175,1.178029e-05
11318,2020-03-23,Spain,28768.0,2575.0,1772.0,2020-02-01,51,46507760,6.185634e-04


In [45]:
fig = px.line(df_prin, x="days_since_start", y="percentage", color='Country')
fig.update_layout(title='Country percentage of confirmed',
                   xaxis_title='days since first confirmed',
                   yaxis_title='percentage confirmed',
                 )
fig.show()