In [1]:
%%time
import pandas as pd
import numpy as np
import plotly.offline as pyo
import plotly.graph_objs as go
from datetime import date
today = date.today()
def load_data():
    import time
    start = time.time()
    print('Fetching data')
    df = pd.read_csv('https://covid.ourworldindata.org/data/ecdc/full_data.csv')
    print(f"Fetching complete in {round(time.time()-start, 2)}secs")
    filename = 'data/backups/COVID-19 '+str(today)+' backup.csv'
    df.to_csv(filename, index=False)
    print(f"{filename}, has been saved")
    return df
df = load_data()

Fetching data
Fetching complete in 9.46secs
data/backups/COVID-19 2020-07-19 backup.csv, has been saved
CPU times: user 605 ms, sys: 111 ms, total: 716 ms
Wall time: 10.3 s


In [2]:
df['date'] = pd.to_datetime(df['date'])
df['week'] = df['date'].dt.week

In [3]:
total_cases = df.groupby('location')['total_cases'].max().to_frame().reset_index().sort_values(by='total_cases', ascending=False)[1:26].reset_index(drop=True)
print('Generating Top 25 countries.html')
data = [go.Bar(x=total_cases['location'],
               y=total_cases['total_cases'])]
layout = go.Layout(title=f'Top 25 countries with COVID-19 confirmed cases on {today}',
                  xaxis=dict(title='Countries'),
                  yaxis=dict(title='Confirmed number of Cases'))
fig = go.Figure(data=data, layout=layout)
pyo.plot(fig, filename='data/graphs/Top 25 countries.html')

Generating Top 25 countries.html


'data/graphs/Top 25 countries.html'

### Confirmed Cases

In [4]:
print('Generating Confirmed Cases scatter plot.html')
countries = ['United States', 'Brazil', 'India']
data = [go.Scatter(x=df['date'],
                   y=df[(df['location'] == country)]['total_cases'],
                   name=country,
                   mode='lines+markers') for country in countries]
layout=go.Layout(title=f'Confirmed Cases on {today}',
                 xaxis=dict(title='Countries'),
                 yaxis=dict(title='Confirmed Cases'))
fig=go.Figure(data=data, layout=layout)
pyo.plot(fig, filename='data/graphs/Confirmed Cases scatter plot.html')

Generating Confirmed Cases scatter plot.html


'data/graphs/Confirmed Cases scatter plot.html'

### New Cases

In [5]:
print('Generating New Cases scatter plot.html')
countries = ['United States', 'Brazil', 'India']
data = [go.Scatter(x=df['date'],
                   y=df[(df['location'] == country)]['new_cases'],
                   name=country,
                   mode='lines+markers') for country in countries]
layout=go.Layout(title=f'New Cases on {today}',
                 xaxis=dict(title='Countries'),
                 yaxis=dict(title='Confirmed Cases'))
fig=go.Figure(data=data, layout=layout)
pyo.plot(fig, filename='data/graphs/New Cases scatter plot.html')

Generating New Cases scatter plot.html


'data/graphs/New Cases scatter plot.html'

### Monthly Cases

In [6]:
print('Generating Monthly Cases.html')
df['Month'] = df['date'].dt.month_name()
def monthly_cases():
    countries = ['United States', 'Brazil', 'India']
    data = []
    for country in countries:
        cases = df[(df['location'] == country)].groupby('Month')['total_cases'].max().sort_values().to_frame().reset_index()
        scatter = go.Scatter(x=cases['Month'],
                           y=cases['total_cases'],
                           mode='markers+lines',
                            name=country)
        data.append(scatter)
    layout = go.Layout(title=f'Monthly Cases on {today}')
    fig = go.Figure(data=data, layout=layout)
    return pyo.plot(fig, filename='data/graphs/Monthly Cases.html')
data = monthly_cases()    

Generating Monthly Cases.html


### Total deaths

In [8]:
total_cases = df.groupby('location')['total_deaths'].max().to_frame().reset_index().sort_values(by='total_deaths', ascending=False)[1:16].reset_index(drop=True)
print('Generating Countries with highest deaths.html')
data = [go.Bar(x=total_cases['location'],
               y=total_cases['total_deaths'])]
layout = go.Layout(title=f'Countries with highest deaths from COVID-19 on {today}',
                  xaxis=dict(title='Countries'),
                  yaxis=dict(title='Total number of deaths'))
fig = go.Figure(data=data, layout=layout)
pyo.plot(fig, filename='data/graphs/Countries with highest deaths.html')

Generating Countries with highest deaths.html


'data/graphs/Countries with highest deaths.html'

In [33]:
death_perc = df.groupby('location')[['total_cases', 'total_deaths']].max().reset_index().sort_values(by='total_cases', ascending=False)
death_perc['death%'] = round(death_perc['total_deaths']/death_perc['total_cases'], 3)*100
death_perc.sort_values(by='death%', ascending=False).reset_index(drop=True)[:20]

Unnamed: 0,location,total_cases,total_deaths,death%
0,Yemen,1581.0,443.0,28.0
1,Sint Maarten (Dutch part),79.0,15.0,19.0
2,France,174674.0,30152.0,17.3
3,Belgium,63706.0,9800.0,15.4
4,United Kingdom,313483.0,45273.0,14.4
5,Italy,244216.0,35042.0,14.3
6,Hungary,4315.0,596.0,13.8
7,British Virgin Islands,8.0,1.0,12.5
8,Netherlands,51526.0,6129.0,11.9
9,Mexico,338913.0,38888.0,11.5


In [42]:
print('Qatar.html')
var = ['total_cases', 'new_cases']
data = [go.Scatter(x=df['date'],
                   y=df[(df['location'] == 'Qatar')][x],
                   name=x,
                   mode='lines+markers') for x in var]
layout=go.Layout(title=f'Confirmed Cases on {today}',
                 xaxis=dict(title='Qatar'),
                 yaxis=dict(title='Confirmed Cases'))
fig=go.Figure(data=data, layout=layout)
pyo.plot(fig, filename='data/graphs/Qatar.html')

Qatar.html


'data/graphs/Qatar.html'

In [38]:
df

Unnamed: 0,date,location,new_cases,new_deaths,total_cases,total_deaths,weekly_cases,weekly_deaths,biweekly_cases,biweekly_deaths,week,Month
0,2019-12-31,Afghanistan,0.0,0.0,0.0,0.0,,,,,1,December
1,2020-01-01,Afghanistan,0.0,0.0,0.0,0.0,,,,,1,January
2,2020-01-02,Afghanistan,0.0,0.0,0.0,0.0,,,,,1,January
3,2020-01-03,Afghanistan,0.0,0.0,0.0,0.0,,,,,1,January
4,2020-01-04,Afghanistan,0.0,0.0,0.0,0.0,,,,,1,January
...,...,...,...,...,...,...,...,...,...,...,...,...
31283,2020-07-15,Zimbabwe,0.0,0.0,1034.0,19.0,247.0,10.0,443.0,12.0,29,July
31284,2020-07-16,Zimbabwe,55.0,1.0,1089.0,20.0,204.0,11.0,484.0,13.0,29,July
31285,2020-07-17,Zimbabwe,273.0,3.0,1362.0,23.0,436.0,11.0,745.0,16.0,29,July
31286,2020-07-18,Zimbabwe,58.0,1.0,1420.0,24.0,478.0,11.0,795.0,17.0,29,July


### Confirmed Cases vs New Cases

In [96]:
import plotly.graph_objects as go
cntryLst = ['South Korea', 'India']
for x in cntryLst:
    def Plotter():
        # Create traces
        country = x
        data = df[(df['location'] == country)]
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=data['date'], y=data['total_cases'], mode='lines+markers', name='Confirmed Cases'))
        fig.add_trace(go.Scatter(x=data['date'], y=data['new_cases'], mode='lines+markers', name='New Cases',
                                text=[x for x in data.new_cases], textposition='top left'))
        # Edit the layout
        fig.update_layout(title=f"COVID-19 Cases in {country}", xaxis_title='Date', yaxis_title='Count of Cases')
        fig.show()
Plotter()

### Confirmed Cases Country comparision

In [81]:
def Plotter():
    # Create traces
    cntryLst = []
    for x in totalCases['Country'][1:7]:
        cntryLst.append(x)
    fig = go.Figure()
    for country in cntryLst:
        data = df[(df['location'] == country)]
        fig.add_trace(go.Scatter(x=data['date'], y=data['total_cases'], mode='lines+markers', name=country))
    # Edit the layout
    fig.update_layout(title=f"COVID-19 Confirmed Cases", xaxis_title='Date', yaxis_title='Count of Cases')
    fig.show()
Plotter()

### Total Deaths vs New Deaths

In [82]:
import plotly.graph_objects as go
cntryLst = ['World']
for x in cntryLst:
    def Plotter():
        # Create traces
        country = x
        data = df[(df['location'] == country)]
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=data['date'], y=data['total_deaths'], mode='lines+markers', name='Total Deaths'))
        fig.add_trace(go.Scatter(x=data['date'], y=data['new_deaths'], mode='lines+markers', name='New Deaths'))
        # Edit the layout
        fig.update_layout(title=f"COVID-19 Deaths in {country}", xaxis_title='Date', yaxis_title='Count of Deaths')
        fig.show()
Plotter()

In [83]:
df.groupby('location')['new_deaths'].sum().sort_values(ascending=False).reset_index().rename(columns={'location':'Country', 'new_deaths':'Total Deaths'})

Unnamed: 0,Country,Total Deaths
0,World,589688.0
1,United States,138358.0
2,Brazil,76688.0
3,United Kingdom,45119.0
4,Mexico,37574.0
...,...,...
206,Mongolia,0.0
207,Cambodia,0.0
208,Uganda,0.0
209,Bonaire Sint Eustatius and Saba,0.0


In [84]:
dfPivot = df.pivot(values='total_cases', index='date', columns='location')

In [85]:
dfPivot[['India', 'China', 'Italy', 'South Korea', 'Spain', 'Qatar']]

location,India,China,Italy,South Korea,Spain,Qatar
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-12-31,0.0,27.0,0.0,0.0,0.0,0.0
2020-01-01,0.0,27.0,0.0,0.0,0.0,0.0
2020-01-02,0.0,27.0,0.0,0.0,0.0,0.0
2020-01-03,0.0,44.0,0.0,0.0,0.0,0.0
2020-01-04,0.0,44.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...
2020-07-13,878254.0,85117.0,243061.0,13479.0,255953.0,103598.0
2020-07-14,906752.0,85172.0,243230.0,13512.0,256619.0,104016.0
2020-07-15,936181.0,85226.0,243344.0,13551.0,257494.0,104533.0
2020-07-16,968876.0,85246.0,243506.0,13612.0,258855.0,104983.0


In [86]:
cnt = dfPivot.loc[:, ['India', 'China', 'Italy', 'South Korea', 'Spain', 'Qatar']].style.background_gradient(cmap='Reds')
cnt


invalid value encountered in less



location,India,China,Italy,South Korea,Spain,Qatar
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-12-31 00:00:00,0.0,27.0,0.0,0.0,0.0,0.0
2020-01-01 00:00:00,0.0,27.0,0.0,0.0,0.0,0.0
2020-01-02 00:00:00,0.0,27.0,0.0,0.0,0.0,0.0
2020-01-03 00:00:00,0.0,44.0,0.0,0.0,0.0,0.0
2020-01-04 00:00:00,0.0,44.0,0.0,0.0,0.0,0.0
2020-01-05 00:00:00,0.0,59.0,0.0,0.0,0.0,0.0
2020-01-06 00:00:00,0.0,59.0,0.0,0.0,0.0,0.0
2020-01-07 00:00:00,0.0,59.0,0.0,0.0,0.0,0.0
2020-01-08 00:00:00,0.0,59.0,0.0,0.0,0.0,0.0
2020-01-09 00:00:00,0.0,59.0,0.0,0.0,0.0,0.0


### Weekly Cases

In [88]:
cntryLst = ['World']
def WeeklyCases():
    # Create traces
    fig = go.Figure()
    for x in cntryLst:
        data = df[(df['location'] == x)].groupby('week')['total_cases'].max().reset_index()
        fig.add_trace(go.Scatter(x=data['week'], y=data['total_cases'], mode='lines+markers'))
    # Edit the layout
    fig.update_layout(title=f"COVID-19 Cases on Weekly basis", xaxis_title='Week', yaxis_title='Count of Cases')
    fig.show()
WeeklyCases()    

In [91]:
pv1 = pd.pivot_table(data=df, values='total_cases', index='week', columns='location', aggfunc='max')

In [92]:
list20 = df.groupby('location')['total_cases'].max().sort_values(ascending=False)[:20].index.to_list()
list20.append('India')
pv1[list20]

location,World,United States,Brazil,India,Russia,Peru,South Africa,Mexico,Chile,United Kingdom,...,Pakistan,Spain,Italy,Saudi Arabia,Turkey,Germany,Bangladesh,France,Colombia,India
week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,59.0,0.0,0.0,0.0,0.0,,,0.0,,0.0,...,0.0,0.0,0.0,,,0.0,,0.0,,0.0
2,59.0,0.0,0.0,0.0,0.0,,,0.0,,0.0,...,0.0,0.0,0.0,,,0.0,,0.0,,0.0
3,219.0,0.0,0.0,0.0,0.0,,,0.0,,0.0,...,0.0,0.0,0.0,,,0.0,,0.0,,0.0
4,2023.0,2.0,0.0,0.0,0.0,,,0.0,,0.0,...,0.0,0.0,0.0,,,0.0,,3.0,,0.0
5,14554.0,8.0,0.0,2.0,2.0,,,0.0,,2.0,...,0.0,1.0,3.0,,,8.0,,6.0,,2.0
6,37552.0,12.0,0.0,3.0,2.0,,,0.0,,4.0,...,0.0,1.0,3.0,,,13.0,,11.0,,3.0
7,69265.0,15.0,0.0,3.0,2.0,,,0.0,,9.0,...,0.0,2.0,3.0,,,15.0,,11.0,,3.0
8,78812.0,35.0,0.0,3.0,2.0,,,0.0,,9.0,...,0.0,2.0,79.0,,,15.0,,12.0,,3.0
9,87042.0,69.0,2.0,3.0,2.0,,,4.0,,23.0,...,4.0,82.0,1128.0,,,111.0,,100.0,,3.0
10,106666.0,433.0,13.0,34.0,10.0,1.0,2.0,5.0,5.0,206.0,...,6.0,1094.0,5883.0,7.0,,847.0,,716.0,1.0,34.0


In [93]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=pv1.index, y=pv1['United States'], mode='lines+markers+text', textposition="top left"))
fig.update_layout(title=f"COVID-19 Cases on Weekly basis", xaxis_title='Week number in 2020', yaxis_title='Confirmed Cases')

In [95]:
def HundredDays():
    days = [x for x in range(1,200)]
    cntryLst = ['United States', 'China', 'India', 'Brazil']
    #cntryLst = ['World']
    new = pd.DataFrame(days, columns=['Days'])
    for country in cntryLst:
            Dlist = dfPivot[(dfPivot[country] > 100)][country].values.astype('int')
            new_l = len(new)
            Dlist_l = len(Dlist)
            to_append = new_l - Dlist_l
            for x in range(to_append):
                Dlist = np.append(Dlist, np.nan)
            new[country] = Dlist
    fig = go.Figure()
    for country in cntryLst:
        fig.add_trace(go.Scatter(x=new['Days'], y=new[country], mode='lines', name=country))
    # Edit the layout
    fig.update_layout(title=f"Number of Days after 100th Case", xaxis_title='No of days after 100th Case', yaxis_title='Count of Cases')
    fig.show()
    return new
HundredDF = HundredDays()

In [43]:
dfPivot

location,Afghanistan,Albania,Algeria,Andorra,Angola,Anguilla,Antigua and Barbuda,Argentina,Armenia,Aruba,...,Uruguay,Uzbekistan,Vatican,Venezuela,Vietnam,Western Sahara,World,Yemen,Zambia,Zimbabwe
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-12-31,0.0,,0.0,,,,,,0.0,,...,,,,,0.0,,27.0,,,
2020-01-01,0.0,,0.0,,,,,,0.0,,...,,,,,0.0,,27.0,,,
2020-01-02,0.0,,0.0,,,,,,0.0,,...,,,,,0.0,,27.0,,,
2020-01-03,0.0,,0.0,,,,,,0.0,,...,,,,,0.0,,44.0,,,
2020-01-04,0.0,,0.0,,,,,,0.0,,...,,,,,0.0,,44.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-07-10,33908.0,3188.0,17808.0,855.0,396.0,3.0,73.0,90680.0,30346.0,105.0,...,977.0,11723.0,12.0,8372.0,369.0,519.0,12246269.0,1356.0,1895.0,926.0
2020-07-11,34366.0,3278.0,17808.0,855.0,458.0,3.0,74.0,94047.0,30903.0,105.0,...,985.0,12206.0,12.0,8803.0,370.0,519.0,12476320.0,1380.0,1895.0,942.0
2020-07-12,34451.0,3371.0,18712.0,855.0,483.0,3.0,74.0,97509.0,31392.0,105.0,...,986.0,12513.0,12.0,9178.0,370.0,519.0,12694498.0,1389.0,1895.0,982.0
2020-07-13,34451.0,3454.0,19195.0,855.0,506.0,3.0,74.0,100153.0,31969.0,105.0,...,987.0,13193.0,12.0,9465.0,372.0,519.0,12889793.0,1465.0,1895.0,985.0


In [49]:
df

Unnamed: 0,date,location,new_cases,new_deaths,total_cases,total_deaths,weekly_cases,weekly_deaths,biweekly_cases,biweekly_deaths,Week
0,2019-12-31,Afghanistan,0.0,0.0,0.0,0.0,,,,,1
1,2020-01-01,Afghanistan,0.0,0.0,0.0,0.0,,,,,1
2,2020-01-02,Afghanistan,0.0,0.0,0.0,0.0,,,,,1
3,2020-01-03,Afghanistan,0.0,0.0,0.0,0.0,,,,,1
4,2020-01-04,Afghanistan,0.0,0.0,0.0,0.0,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...
30233,2020-07-10,Zimbabwe,41.0,3.0,926.0,12.0,309.0,5.0,375.0,6.0,28
30234,2020-07-11,Zimbabwe,16.0,1.0,942.0,13.0,317.0,6.0,381.0,7.0,28
30235,2020-07-12,Zimbabwe,40.0,5.0,982.0,18.0,284.0,10.0,415.0,12.0,28
30236,2020-07-13,Zimbabwe,3.0,0.0,985.0,18.0,269.0,10.0,418.0,12.0,29
