In [45]:
#importing relevant packages

import pandas as pd
import requests
import json
from datetime import date
import matplotlib.pyplot as plt
import plotly as pt
import plotly.express as px
import numpy as np
import plotly.graph_objects as go

In [5]:
#Requesting data from JSON API
url = "https://covid-api.mmediagroup.fr/v1/cases?continent=Europe"    
req = requests.get(url)

In [44]:
with open("covid_hist.json", "w") as outfile: 

    json.dump(df, outfile)

In [6]:
#Creating a dataframe with the request

df = json.loads(req.text)

In [7]:
#Initializing, and making a list of required variables that I'll use later on

date_list = []
today_date = date.today()
country_list = []
confirmed_list = []
deaths_list = []
pop_list = []
area_list = []

In [8]:
#Iterating through the data with FOR loop

# Obtain and save dates of countries
for country_name in df.keys():
    date_list.append(today_date)
    
# Obtain and save names of countries
for country_name in df.keys():
    country_list.append(country_name)
    
# Obtain and save no of population in all countries
for country_name in df.keys():
    pop_list.append(df[country_name]['All']['population'])

# Obtain and save no of confirmed cases in all countries
for country_name in df.keys():
    confirmed_list.append(df[country_name]['All']['confirmed'])
    
# Obtain and save no of deaths in all countries
for country_name in df.keys():
    deaths_list.append(df[country_name]['All']['deaths'])
    
# Obtain and save dates of countries
for country_name in df.keys():
    area_list.append(df[country_name]['All']['sq_km_area'])

In [9]:
# Creating a dictionary of pandas dataframe. 
# Covid_Today is the pandas dataframe which includes (updated) total values until today.

covid_today = pd.DataFrame(
    {'country': country_list,
     'date': date_list,
     'population': pop_list,
     'confirmed':confirmed_list,
     'deaths': deaths_list,
     'sq_km_area': area_list
    })

covid_today.head(10)

# For example, this is the total population of each respective country. Total confirmed cases that have occured
# from 2020 to today, and so on..

Unnamed: 0,country,date,population,confirmed,deaths,sq_km_area
0,Albania,2021-12-15,2930187,204301,3144,28748.0
1,Andorra,2021-12-15,76965,20136,134,468.0
2,Austria,2021-12-15,8735453,1235063,13282,83859.0
3,Belarus,2021-12-15,9468338,676512,5305,207600.0
4,Belgium,2021-12-15,11429336,1968269,27685,30518.0
5,Bosnia and Herzegovina,2021-12-15,3507017,282587,13028,51197.0
6,Bulgaria,2021-12-15,7084571,718651,29764,110994.0
7,Croatia,2021-12-15,4189353,657726,11722,56538.0
8,Czechia,2021-12-15,10618303,2350338,34697,78866.0
9,Denmark,2021-12-15,5733551,570502,3030,43094.0


## Conducting Analysis of the Data

### TOP 10 DEATHS DEFINING VARIABLE

In [10]:
# Creating a variable of Top 10 Deaths, and sorting values by descending order.
top_10_deaths = covid_today.sort_values(by='deaths',ascending=False).head(10)

#Resetting index for decending values 
top_10_deaths.reset_index(drop=True, inplace=True)

#Sorting values with "total death"
top_10_deaths=top_10_deaths.sort_values(by='deaths')

x = top_10_deaths['country']
y = top_10_deaths['deaths']

top_10_deaths

Unnamed: 0,country,date,population,confirmed,deaths,sq_km_area
9,Hungary,2021-12-15,9721559,1202514,37079,93030.0
8,Romania,2021-12-15,19679306,1794589,57847,238391.0
7,Spain,2021-12-15,46354321,5366128,88542,505992.0
6,Poland,2021-12-15,38170712,3857085,89045,323250.0
5,Ukraine,2021-12-15,44222947,3746106,97328,603700.0
4,Germany,2021-12-15,82114224,6633666,106685,357022.0
3,France,2021-12-15,64979548,8136637,118357,551500.0
2,Italy,2021-12-15,59359900,5258886,135049,301316.0
1,United Kingdom,2021-12-15,66181585,10995311,147085,242900.0
0,Russia,2021-12-15,143989754,9899139,286023,17075400.0


### TOP 10 CASES DEFINING VARIABLE

In [11]:
top_10_total_cases = covid_today.sort_values(by='confirmed',ascending=False).head(10)
top_10_total_cases.reset_index(drop=True, inplace=True)
top_10_total_cases=top_10_total_cases.sort_values(by='confirmed')
x = top_10_total_cases['country']
y = top_10_total_cases['confirmed']

top_10_total_cases

Unnamed: 0,country,date,population,confirmed,deaths,sq_km_area
9,Czechia,2021-12-15,10618303,2350338,34697,78866.0
8,Netherlands,2021-12-15,17035938,2949045,20671,41526.0
7,Ukraine,2021-12-15,44222947,3746106,97328,603700.0
6,Poland,2021-12-15,38170712,3857085,89045,323250.0
5,Italy,2021-12-15,59359900,5258886,135049,301316.0
4,Spain,2021-12-15,46354321,5366128,88542,505992.0
3,Germany,2021-12-15,82114224,6633666,106685,357022.0
2,France,2021-12-15,64979548,8136637,118357,551500.0
1,Russia,2021-12-15,143989754,9899139,286023,17075400.0
0,United Kingdom,2021-12-15,66181585,10995311,147085,242900.0


### DEFINING LOWEST DEATHS IN EUROPE

In [12]:
# Creating a variable of Top 10 Deaths, and sorting values by descending order.
lowest_deaths = covid_today.sort_values(by='deaths',ascending=False).tail(10)

#Resetting index for decending values 
lowest_deaths.reset_index(drop=True, inplace=True)

#Sorting values with "total death"
lowest_deaths=lowest_deaths.sort_values(by='deaths')

x = lowest_deaths['country']
y = lowest_deaths['deaths']

lowest_deaths

Unnamed: 0,country,date,population,confirmed,deaths,sq_km_area
9,Holy See,2021-12-15,1000,27,0,0.4
7,Monaco,2021-12-15,38695,4114,36,1.5
8,Iceland,2021-12-15,335025,19708,36,103000.0
6,Liechtenstein,2021-12-15,37922,5512,68,160.0
5,San Marino,2021-12-15,33400,6719,94,61.0
4,Andorra,2021-12-15,76965,20136,134,468.0
3,Malta,2021-12-15,430835,40724,471,316.0
2,Luxembourg,2021-12-15,583455,94511,896,2586.0
1,Norway,2021-12-15,5305383,328747,1141,323877.0
0,Finland,2021-12-15,5523231,207033,1444,338145.0


### DEFINING LOWEST CASES IN EUROPE

In [13]:
lowest_cases = covid_today.sort_values(by='confirmed',ascending=False).tail(10)
lowest_cases.reset_index(drop=True, inplace=True)
lowest_cases=lowest_cases.sort_values(by='confirmed')
x = lowest_cases['country']
y = lowest_cases['confirmed']

lowest_cases

Unnamed: 0,country,date,population,confirmed,deaths,sq_km_area
9,Holy See,2021-12-15,1000,27,0,0.4
8,Monaco,2021-12-15,38695,4114,36,1.5
7,Liechtenstein,2021-12-15,37922,5512,68,160.0
6,San Marino,2021-12-15,33400,6719,94,61.0
5,Iceland,2021-12-15,335025,19708,36,103000.0
4,Andorra,2021-12-15,76965,20136,134,468.0
3,Malta,2021-12-15,430835,40724,471,316.0
2,Luxembourg,2021-12-15,583455,94511,896,2586.0
1,Albania,2021-12-15,2930187,204301,3144,28748.0
0,Finland,2021-12-15,5523231,207033,1444,338145.0


## Visualizing Data

# Total Deaths in Europe Plotting 

In [46]:
fig = px.bar(top_10_deaths, y="country", x="deaths", orientation='h')
fig.update_layout(plot_bgcolor = "white",
                  xaxis = dict(title = "No. of Deaths", linecolor = "white"),
                  yaxis = dict(title = " ", linecolor = "white"),
                  title={ 'text': "Highest Deaths in Europe",
                    'y':0.95,
                    'x':0.5})

fig.show()

In [49]:
fig = px.bar(top_10_deaths, y="country", x="deaths", orientation='h', color = "confirmed")
fig.update_layout(plot_bgcolor = "white",
                  xaxis = dict(title = "No. of Deaths", linecolor = "white"),
                  yaxis = dict(title = " ", linecolor = "white"),
                  title={ 'text': "Highest Deaths in Europe",
                    'y':0.95,
                    'x':0.5})

fig.show();


#getting help from https://plotly.com/python/bar-charts/

In [16]:
fig = px.bar(top_10_total_cases, y="country", x="confirmed", orientation='h')

fig.update_layout(plot_bgcolor = "white",
                  xaxis = dict(title = "Total COVID Cases", linecolor = "white"),
                  yaxis = dict(title = " ", linecolor = "white"),
                  title={ 'text': "Top 10 COVID Cases in Europe",
                    'y':0.95,
                    'x':0.5})

fig.show()

In [17]:
#trying shit

fig = px.histogram(top_10_total_cases, y="country", x="confirmed", orientation='h', color="deaths")

fig.update_layout(plot_bgcolor = "white",
                  xaxis = dict(title = "Total COVID Cases (in millions)", linecolor = "white"),
                  yaxis = dict(title = " ", linecolor = "white"),
                  title={ 'text': "Top 10 COVID Cases in Europe",
                    'y':0.95,
                    'x':0.5},
                  showlegend=False)

fig.show()

In [18]:
fig = px.bar(lowest_cases, y="country", x="confirmed", orientation='h', color = "country")

fig.update_layout(plot_bgcolor = "white",
                  showlegend=False,
                  xaxis = dict(title = "Total COVID Cases", linecolor = "white"),
                  yaxis = dict(title = " ", linecolor = "white"),
                  title={ 'text': "Lowest COVID Cases in Europe",
                    'y':0.95,
                    'x':0.5})

fig.show()

In [19]:
fig = px.bar(lowest_deaths, y="country", x="deaths", orientation='h', color="country")
fig.update_layout(plot_bgcolor = "white",
                  showlegend=False,
                  xaxis = dict(title = "Total No. of Deaths", linecolor = "white"),
                  yaxis = dict(title = " ", linecolor = "white"),
                  title={ 'text': "Lowest Deaths in Europe",
                    'y':0.95,
                    'x':0.5})

fig.show()

## Filtering Data for 2021

In [20]:
url1 = "https://covid-api.mmediagroup.fr/v1/history?continent=europe&status=deaths"
req1 = requests.get(url1)

df1 = json.loads(req1.text)
df1

df_test = pd.DataFrame()

In [21]:
with open("covid_death_cum.json", "w") as outfile: 

    json.dump(df1, outfile)

pandas.core.frame.DataFrame

In [22]:
# Obtain and save names of countries

#Changing the format of the dates
for country_name in df1.keys():
    for x,y in df1[country_name]['All']['dates'].items():
        new_row = {'country':country_name,'date': x, 'deaths':y}
        df_test = df_test.append(new_row,ignore_index = True)
        
df_test['date'] =  pd.to_datetime(df_test['date'], format = '%Y-%m-%d')

In [23]:
#Setting the start date as January 1, 2021

start_date = "2021-01-01"
after_start_date = df_test["date"] >= start_date
df_filter = df_test.loc[after_start_date]

In [24]:
test5 = df_filter.sort_values(by='deaths',ascending=True)

In [25]:
fig = px.area(test5, x="date", y="deaths", color="country")
fig.update_layout(plot_bgcolor = "white",
                  xaxis = dict(title = "Time", linecolor = "white"),
                  yaxis = dict(title = "Total COVID Deaths (in millions)", linecolor = "white"),
                  title={ 'text': "COVID Deaths in Europe in 2021",
                        'y':0.95,
                        'x':0.5})
fig.show()

## Past Cumulative Covid Data

In [26]:
covid_cum = json.loads(req1.text)

df_cum = pd.DataFrame()

# Obtain and save names of countries
for country_name in covid_cum.keys():
    for x,y in covid_cum[country_name]['All']['dates'].items():
        new_row = {'country':country_name,'date': x, 'deaths':y}
        df_cum = df_cum.append(new_row,ignore_index = True)
        
df_cum['date'] =  pd.to_datetime(df_cum['date'], format = '%Y-%m-%d')


In [27]:
backup_df_cum = df_cum
#df_cum = backup_df_cum

#creating a backup just in case data gets messed up

In [28]:
df_cum = df_cum.groupby(['country']).apply(lambda x: x.sort_values(["date"])).reset_index(drop=True)

#Grouping by country, sorting values through date

In [29]:
df_cum

Unnamed: 0,country,date,deaths
0,Albania,2020-01-22,0.0
1,Albania,2020-01-23,0.0
2,Albania,2020-01-24,0.0
3,Albania,2020-01-25,0.0
4,Albania,2020-01-26,0.0
...,...,...,...
29101,United Kingdom,2021-12-10,146255.0
29102,United Kingdom,2021-12-11,146387.0
29103,United Kingdom,2021-12-12,146439.0
29104,United Kingdom,2021-12-13,146477.0


In [30]:
df_cum['daily_deaths']=df_cum.groupby('country')['deaths'].diff().fillna(0)

#Grouping by country, taking a difference to reflect the faily death, and filling the null values with zero

In [31]:
#If we want to look at UK's results, we can use the following code to reflect that:

#df_uk = df_cum[(df_cum.country == 'United Kingdom')]

In [32]:
daily = df_cum['daily_deaths']

In [33]:
daily_total_death = df_cum.groupby(['date']).sum().reset_index()

In [34]:
daily_total_death.drop(columns = ['deaths'], inplace = True)

# Moving Average

In [35]:
#creating a list for moving average
moving_avg_list = []
 
#Initializing variables    
i = 0
    
#Creating a for loop to go through the prices for moving average for 5 days
for p in range(0,len(daily_total_death)):
    if i >= 5:
        moving_average = (daily_total_death['daily_deaths'][i-1] + daily_total_death['daily_deaths'][i-2] + daily_total_death['daily_deaths'][i-3] + daily_total_death['daily_deaths'][i-4] + daily_total_death['daily_deaths'][i-5]) / 5
        
    else:
        moving_average = np.nan
             
    moving_avg_list.append(moving_average)        
    i += 1        #Iterating through each price

In [36]:
#df.concat([df_cum, moving_avg_list], axis =1)
daily_total_death['roll_mean'] = moving_avg_list

In [37]:
pd.set_option('display.max_rows', None)

In [38]:
daily_total_death.head(300)


daily_total_death['daily_deaths'] = daily_total_death['daily_deaths'].abs()

In [39]:
fig = go.Figure()

fig = px.bar(daily_total_death, x="date", y="daily_deaths")

fig.update_layout(plot_bgcolor = "white",
                  showlegend=False,
                  xaxis = dict(title = "Total No. of Deaths", linecolor = "black"),
                  yaxis = dict(title = " ", linecolor = "black"),
                  title={ 'text': "Daily Deaths in Europe",
                    'y':0.95,
                    'x':0.5})


fig.show()

In [40]:
fig = go.Figure()

fig.add_trace(
    go.Bar(
        name = 'Total Daily Deaths',
        x= daily_total_death['date'],
        y= daily_total_death['daily_deaths']
    ))

fig.add_trace(
    go.Line(
        name = 'Rolling 5-day Mean',
        x= daily_total_death['date'],
        y= daily_total_death['roll_mean'],
        line_color = 'pink'
    ))

fig.update_layout(title={ 'text': "COVID Deaths in Europe in 2021",
                        'y':0.95,
                        'x':0.5},
    xaxis_title="Time",
    yaxis_title="Deaths",
    plot_bgcolor = "white",
    legend_title=" ")

fig.show()


plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.




In [41]:
fig = px.bar(daily_total_death, x="date", y="roll_mean")

fig.update_layout(plot_bgcolor = "white",
                  showlegend=False,
                  xaxis = dict(title = "Total No. of Deaths", linecolor = "black"),
                  yaxis = dict(title = " ", linecolor = "black"),
                  title={ 'text': "Moving Average Deaths in Europe",
                    'y':0.95,
                    'x':0.5})

fig.show()