In [46]:
%matplotlib nbagg
%pylab

Using matplotlib backend: nbAgg
Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


# Load the data

In [1]:
import pandas as pd

In [47]:
import csv
with open('csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv') as f:
    docs = list(csv.DictReader(f))

## Reformat it so is pandas friendly

In [49]:
new_docs = []
for doc in docs:
    meta = {k: doc[k] for k in ['Province/State', 'Country/Region', 'Lat', 'Long']}
    for k, v in doc.items():
        if k in meta: continue
        new_doc = meta.copy()
        new_doc['date'] = datetime.strptime(k, '%m/%d/%y')
        new_doc['cnt'] = int(v)
        new_docs.append(new_doc)
        

In [50]:
df = (
    pd.DataFrame(new_docs)
      .rename(
          columns={
              'Province/State': 'province',
              'Country/Region': 'country',
              'Lat': 'lat', 'Long': 'long'
          }
      )
)

# Start plotting

## Total number of reported cases per country

In [97]:
import plotly.express as px
import plotly.graph_objects as go

fig = go.Figure()
for country in df.country.unique():
    c_df = (
        df[df.country == country]
          .groupby('date')
          .cnt.sum()
          .reset_index()
          .sort_values('date')
    )
    c_df = c_df[c_df.cnt > 0]
    
    fig.add_trace(
        go.Scatter(
            x=c_df.date,
            y=c_df.cnt,
            name=country,
#                     line_color='deepskyblue',
            opacity=0.8
        )
    )

fig.update_layout(yaxis_type="log")
fig.show()

In [141]:
def get_rate(df, min_cnt=100):
    s = df.groupby('date').cnt.sum().sort_index().rolling(7, center=True).mean()
    s = s[s >= min_cnt]
    return s / s.shift(1) 

total_rate = get_rate(df)
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=total_rate.index,
        y=total_rate.values,
        name='Global trend',
        line=dict(width=8)
    )
)

rest_of_the_countries = []
cnt_by_country = df.groupby('country').sum().cnt.to_dict()
T = 1500

for country in df.country.unique():
    if cnt_by_country[country] < T: continue
    rate = get_rate(df[df.country == country])
    
    fig.add_trace(
        go.Scatter(
            x=rate.index,
            y=rate.values,
            name=country,
            opacity=0.8
        )
    )

rate = get_rate(df[df.country.isin([c for c, cnt in cnt_by_country.items() if cnt < T])])

fig.add_trace(
    go.Scatter(
        x=rate.index,
        y=rate.values,
        name='Rest of the countries',
        line=dict(width=8)
    )
)

fig.show()

# Conclusion

It seems that the countries with a high number of cases have taken measures