In [8]:
'''
Notebook Created by Helbert Arenas
the purpose of the notebook is to explore data i found in 
https://ourworldindata.org/grapher/full-list-total-tests-for-covid-19
The date has been collated from official sources by ourworldindata
The notebook downloads the data source from git
Then:
1) it stores the data into a pandas dataframe
2) Proceeds to clean the date column
3) Creates a timestamp for each record
4) Creates a list of the geographic features for which the data exist, and prints the result
5) Allows the user to create a list of countries to compare and identify the variable that the user wants to compare.
6) Display a graphic presenting the evolution of the selected variable for the selected countries.
'''
import pandas as pd
import psycopg2
import datetime
import numpy as np
import plotly.graph_objects as go
import requests
import io
import time

def clean_date_format(st_date):
    st_date = st_date.replace('-','/')
    return st_date

url="https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv"
s=requests.get(url).content
df_covid=pd.read_csv(io.StringIO(s.decode('utf-8')))

date_to_timestamp = lambda x: time.mktime(datetime.datetime.strptime(x, "%Y/%m/%d").timetuple())
l_clean_date_format = lambda x: clean_date_format(x)

df_covid['date_2'] = df_covid['date'].apply(l_clean_date_format)
df_covid['timestamp'] = df_covid['date_2'].apply(date_to_timestamp)





In [9]:
# how many records exist in my dataset
df_covid.shape

(15756, 18)

In [10]:
# how does it look the data
df_covid.head(5)

Unnamed: 0,iso_code,location,date,total_cases,new_cases,total_deaths,new_deaths,total_cases_per_million,new_cases_per_million,total_deaths_per_million,new_deaths_per_million,total_tests,new_tests,total_tests_per_thousand,new_tests_per_thousand,tests_units,date_2,timestamp
0,ABW,Aruba,2020-03-13,2,2,0,0,18.733,18.733,0.0,0.0,,,,,,2020/03/13,1584054000.0
1,ABW,Aruba,2020-03-20,4,2,0,0,37.465,18.733,0.0,0.0,,,,,,2020/03/20,1584659000.0
2,ABW,Aruba,2020-03-24,12,8,0,0,112.395,74.93,0.0,0.0,,,,,,2020/03/24,1585004000.0
3,ABW,Aruba,2020-03-25,17,5,0,0,159.227,46.831,0.0,0.0,,,,,,2020/03/25,1585091000.0
4,ABW,Aruba,2020-03-26,19,2,0,0,177.959,18.733,0.0,0.0,,,,,,2020/03/26,1585177000.0


In [11]:
# create a list of countries
ls_location = df_covid.location.unique().tolist()
for i in ls_location:
    print(i)

Aruba
Afghanistan
Angola
Anguilla
Albania
Andorra
United Arab Emirates
Argentina
Armenia
Antigua and Barbuda
Australia
Austria
Azerbaijan
Burundi
Belgium
Benin
Bonaire Sint Eustatius and Saba
Burkina Faso
Bangladesh
Bulgaria
Bahrain
Bahamas
Bosnia and Herzegovina
Belarus
Belize
Bermuda
Bolivia
Brazil
Barbados
Brunei
Bhutan
Botswana
Central African Republic
Canada
Switzerland
Chile
China
Cote d'Ivoire
Cameroon
Democratic Republic of Congo
Congo
Colombia
Comoros
Cape Verde
Costa Rica
Cuba
Curacao
Cayman Islands
Cyprus
Czech Republic
Germany
Djibouti
Dominica
Denmark
Dominican Republic
Algeria
Ecuador
Egypt
Eritrea
Western Sahara
Spain
Estonia
Ethiopia
Finland
Fiji
Falkland Islands
France
Faeroe Islands
Gabon
United Kingdom
Georgia
Guernsey
Ghana
Gibraltar
Guinea
Gambia
Guinea-Bissau
Equatorial Guinea
Greece
Grenada
Greenland
Guatemala
Guam
Guyana
Hong Kong
Honduras
Croatia
Haiti
Hungary
Indonesia
Isle of Man
India
Ireland
Iran
Iraq
Iceland
Israel
Italy
Jamaica
Jersey
Jordan
Japan
Kazakhs

In [13]:
# display comparison
import plotly.graph_objects as go
import plotly.express as px
import random

comparison={
    "countries":[ "Chile","Peru","Colombia","France", "Italy", "Spain"]
}
#"Panama", "Colombia", "France", "Argentina", "Italy"
ls_colors=['black', 'blue', 'red', 'green','crimson']

ls_variables = ['total_deaths_per_million', 'total_cases_per_million', 'new_cases_per_million', 'new_deaths_per_million', 'total_tests_per_thousand', 'new_tests_per_thousand']

variable_to_visualize = 'total_tests_per_thousand'
ls_scatters = []
fig = go.Figure()
i =0
for c in comparison["countries"]:

    df_subset = df_covid[(df_covid['location']==c) & (df_covid[variable_to_visualize]!= 0)]
    
    fig.add_trace(go.Scatter(x=df_subset['date_2'], y=df_subset[variable_to_visualize],
                            marker_color=ls_colors[i],
                            opacity=0.6,
                            mode="markers",
                            name= c , connectgaps = False))
    i = i+1
    if i>=len(ls_colors):
        i=0

    
fig.update_layout (title = variable_to_visualize)
fig.show()