In [1]:
import pandas as pd
import requests
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as po
import plotly.graph_objs as pg
import datapane as dp 

In [2]:
#plots offline mode
po.init_notebook_mode(connected = True)

In [3]:
#connecting to datapane
!datapane login --server=https://datapane.com/ --token=<jjfhhhcggjfjhgj>

Connected successfully to https://datapane.com as victor6


In [4]:
#Getting covid data from the API
url = 'https://api.covid19api.com/summary'
url_data = requests.get(url)

In [5]:
url_data

<Response [200]>

In [6]:
#exploring the data
json_data = url_data.json()

json_data.keys()

dict_keys(['ID', 'Message', 'Global', 'Countries', 'Date'])

In [7]:
json_data['Global'] #used to get the values in the key

{'NewConfirmed': 389345,
 'TotalConfirmed': 266069035,
 'NewDeaths': 3504,
 'TotalDeaths': 5258626,
 'NewRecovered': 0,
 'TotalRecovered': 0,
 'Date': '2021-12-07T10:07:18.16Z'}

In [8]:
#let check the data types of each key
type(json_data['ID'])

str

In [9]:
type(json_data['Message'])

str

In [10]:
type(json_data['Countries'])

list

In [11]:
type(json_data['Date'])

str

In [12]:
type(json_data['Global'])

dict

In [13]:
type(json_data['Countries'][0]) #understanding the data structure

dict

In [14]:
#converting the data in the countries to dataframe
countries_data = json_data['Countries']
df = pd.DataFrame(countries_data)

In [15]:
df.head()

Unnamed: 0,ID,Country,CountryCode,Slug,NewConfirmed,TotalConfirmed,NewDeaths,TotalDeaths,NewRecovered,TotalRecovered,Date,Premium
0,1d75214a-34b2-4a87-ba36-f347b6ea8f74,Afghanistan,AF,afghanistan,0,157499,0,7316,0,0,2021-12-07T10:07:18.16Z,{}
1,17fa3b73-9bee-4d7a-b4ed-20b6d6175359,Albania,AL,albania,0,201902,0,3115,0,0,2021-12-07T10:07:18.16Z,{}
2,1f876f36-9010-44e1-ae2c-b4a0bd4756f8,Algeria,DZ,algeria,0,211662,0,6111,0,0,2021-12-07T10:07:18.16Z,{}
3,b49ec246-a2f7-44e6-94f3-369dd1a49bc5,Andorra,AD,andorra,0,18631,0,133,0,0,2021-12-07T10:07:18.16Z,{}
4,90e575ab-dbe4-4af3-806b-87bff7362d0e,Angola,AO,angola,0,65259,0,1735,0,0,2021-12-07T10:07:18.16Z,{}


In [16]:
#clean data
df.isna().sum()

ID                0
Country           0
CountryCode       0
Slug              0
NewConfirmed      0
TotalConfirmed    0
NewDeaths         0
TotalDeaths       0
NewRecovered      0
TotalRecovered    0
Date              0
Premium           0
dtype: int64

In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 193 entries, 0 to 192
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   ID              193 non-null    object
 1   Country         193 non-null    object
 2   CountryCode     193 non-null    object
 3   Slug            193 non-null    object
 4   NewConfirmed    193 non-null    int64 
 5   TotalConfirmed  193 non-null    int64 
 6   NewDeaths       193 non-null    int64 
 7   TotalDeaths     193 non-null    int64 
 8   NewRecovered    193 non-null    int64 
 9   TotalRecovered  193 non-null    int64 
 10  Date            193 non-null    object
 11  Premium         193 non-null    object
dtypes: int64(6), object(6)
memory usage: 18.2+ KB


In [18]:
#convert to ideal data types
string_types = {
    'ID': 'string',
    'Country': 'string',
    'CountryCode': 'string',
    'Slug': 'string',
}

df = df.astype(string_types)

In [19]:
df['Date'] = df['Date'].astype('datetime64[ns]')

In [20]:
#drop the empty premium column
df.drop(['Premium'], axis = 1, inplace = True)

In [21]:
df.head()

Unnamed: 0,ID,Country,CountryCode,Slug,NewConfirmed,TotalConfirmed,NewDeaths,TotalDeaths,NewRecovered,TotalRecovered,Date
0,1d75214a-34b2-4a87-ba36-f347b6ea8f74,Afghanistan,AF,afghanistan,0,157499,0,7316,0,0,2021-12-07 10:07:18.160
1,17fa3b73-9bee-4d7a-b4ed-20b6d6175359,Albania,AL,albania,0,201902,0,3115,0,0,2021-12-07 10:07:18.160
2,1f876f36-9010-44e1-ae2c-b4a0bd4756f8,Algeria,DZ,algeria,0,211662,0,6111,0,0,2021-12-07 10:07:18.160
3,b49ec246-a2f7-44e6-94f3-369dd1a49bc5,Andorra,AD,andorra,0,18631,0,133,0,0,2021-12-07 10:07:18.160
4,90e575ab-dbe4-4af3-806b-87bff7362d0e,Angola,AO,angola,0,65259,0,1735,0,0,2021-12-07 10:07:18.160


In [22]:
#check for duplicates
df.duplicated
df.drop_duplicates(inplace=True)

#### Analytical visualization

In [23]:
figs = []

In [24]:
total_confirmed = df.groupby(by='Country').sum().reset_index().sort_values(by='Country', ascending=False).reset_index(drop=True)

print(total_confirmed)

                             Country  NewConfirmed  TotalConfirmed  NewDeaths  \
0                           Zimbabwe             0          139046          0   
1                             Zambia             0          210327          0   
2                              Yemen             0           10034          0   
3                           Viet Nam             0         1323683          0   
4    Venezuela (Bolivarian Republic)             0          434918          0   
..                               ...           ...             ...        ...   
188                           Angola             0           65259          0   
189                          Andorra             0           18631          0   
190                          Algeria             0          211662          0   
191                          Albania             0          201902          0   
192                      Afghanistan             0          157499          0   

     TotalDeaths  NewRecove

In [25]:
fig1 = px.bar(total_confirmed,
             x='Country',
             y='TotalConfirmed',
            labels={'Country': 'Countries'},
             title='Total Confirmed Cases by Country')

fig1

In [26]:
figs.append(fig1)

In [38]:
data = dict(type='choropleth', 
            locations = df['CountryCode'], 
            z = df['TotalConfirmed'], 
            text = df['Country'])

layout = dict(title = 'Global Covid Cases',
              geo = dict( projection = {'type':'robinson'},
                         showframe = False, 
                         showlakes = True, 
                         lakecolor = 'rgb(0,191,255)'))

x = pg.Figure(data = [data], 
              layout = layout)
po.iplot(x)

In [40]:
figs.append(x)

In [45]:
fig3 = px.histogram(df, x='TotalDeaths', title='Distribution of Total Deaths')
fig3.show()

In [46]:
figs.append(fig3)

In [61]:
x = df.groupby(by='TotalDeaths').sum().reset_index()

#print(x)

fig4 = px.line(x, x='TotalDeaths', y='NewConfirmed', labels={'x':'Total Deaths', 'y':'New Confirmed'}, 
              hover_data=['TotalConfirmed'], title='Total Death Cases by Newly Confirmed')
fig4.show()


In [62]:
figs.append(fig4)

In [63]:
dp.Report(*figs).upload(name="Covid")

Bokeh version 2.3.2 is not supported, these plots may not display correctly, please install version ~=2.2.0


Uploading report and associated data - *please wait...*

Report successfully uploaded, click [here](https://datapane.com/u/victor6/reports/63Owvn3/covid/) to view and share your report.