In [None]:
###########
# IMPORTS #
###########
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
from IPython.display import Javascript
from datetime import datetime
from pandas.api.types import is_numeric_dtype
from google.colab import files
import os
import time
import getpass
import statistics
import math

In [None]:
# %%
######################
# IMPORT RAW DATASET #
######################
# (Raw dataset summary displayed at end of notebook)
OWID_data_url = ''
OWID_dataRaw_df = ''
OWID_dataRaw_df_dimensions = ''
OWID_dataRaw_locations = ''
OWID_latest_date = ''

OWID_data_url = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv'
OWID_dataRaw_df = pd.read_csv(OWID_data_url, error_bad_lines=False)

# RAW DATA PARAMETERS
OWID_dataRaw_df_dimensions = OWID_dataRaw_df.shape
OWID_dataRaw_locations = OWID_dataRaw_df.location.unique()
OWID_dataRaw_df['date'] = pd.to_datetime(OWID_dataRaw_df['date'])
OWID_dates = OWID_dataRaw_df['date'].to_list()
OWID_latest_date = OWID_dataRaw_df['date'].to_list()[-1].strftime('%b %d, %Y')


In [None]:
#%%
########################
# PRE-PROCESS RAW DATA #
########################
min_population = 1000000
OWID_preProcessed_data_df = ''

OWID_preProcessed_data_df = OWID_dataRaw_df.copy()
# CONVERT 'date' COLUMN TO DATETIME DATA TYPE
OWID_preProcessed_data_df['date'] = pd.to_datetime(OWID_preProcessed_data_df['date'])
# REMOVE SMALL LOCATIONS
# OWID_preProcessed_data_df = OWID_preProcessed_data_df[OWID_preProcessed_data_df.population >= min_population]
# SET 'location' AS ROW INDEX
OWID_preProcessed_data_df.set_index('location', inplace=True)
#RE-SCALE 'total_vaccinations_per_hundred' AND 'people_fully_vaccinated_per_hundred' TO PER MILLION
OWID_preProcessed_data_df['total_vaccinations_per_hundred'] *= 10000
OWID_preProcessed_data_df['people_fully_vaccinated_per_hundred'] *= 10000
OWID_preProcessed_data_df.rename(columns = {"total_vaccinations_per_hundred":"total_vaccinations_per_million",
                                            "people_fully_vaccinated_per_hundred":"people_fully_vaccinated_per_million"}, inplace=True)


# Create a column with real population
OWID_preProcessed_data_df['real_population'] = OWID_preProcessed_data_df['population']


def changepopulation(location, newpopulation):
  OWID_preProcessed_data_df.loc[location]['real_population'] = OWID_preProcessed_data_df.loc[location]['real_population'].replace(OWID_preProcessed_data_df.loc[location]['real_population'].values, newpopulation)
  changeratio = (OWID_preProcessed_data_df.loc[location]['population'][0])/newpopulation
  for dimension in ['total_cases_per_million',
    'new_cases_per_million',
    'new_cases_smoothed_per_million',
    'total_deaths_per_million',
    'new_deaths_per_million',
    'new_deaths_smoothed_per_million',
    'icu_patients_per_million',
    'hosp_patients_per_million',
    'weekly_icu_admissions_per_million',
    'weekly_hosp_admissions_per_million',
    'total_tests_per_thousand',
    'new_tests_per_thousand',
    'new_tests_smoothed_per_thousand',
    'total_vaccinations_per_million',
    'people_vaccinated_per_hundred',
    'people_fully_vaccinated_per_million',
    'total_boosters_per_hundred',
    'new_vaccinations_smoothed_per_million',
    'hospital_beds_per_thousand']:
    OWID_preProcessed_data_df.loc[location][dimension] *= changeratio
    OWID_preProcessed_data_df.loc[location]['population'] = OWID_preProcessed_data_df.loc[location]['population'].replace(OWID_preProcessed_data_df.loc[location]['population'].values, newpopulation)


In [None]:
#%%
################################
# CREATE DATASETS TO BE USED #
################################
perCapita_daily_columns = []
perCapita_cumulative_columns = []
OWID_perCapita_daily_df = ''
OWID_perCapita_cumulative_df = ''

perCapita_daily_columns = ["date","new_cases_smoothed_per_million", "new_deaths_smoothed_per_million",
                           "new_vaccinations_smoothed_per_million"]
perCapita_cumulative_columns = ["date", "total_cases_per_million", "total_deaths_per_million",
                                "total_vaccinations_per_million", "people_fully_vaccinated_per_million"]

# CREATE DATAFRAMES
OWID_perCapita_daily_df = OWID_preProcessed_data_df[perCapita_daily_columns]
OWID_perCapita_cumulative_df = OWID_preProcessed_data_df[perCapita_cumulative_columns]

In [None]:
#%%
##############
# SAVE PLOTS #
##############

save_files = False
if (input("The latest date in the dataset is %s. Save/download plots? y/n: " 
          % OWID_latest_date).lower()[0] == 'y'):
  save_files = True
  print("\nPlots will be saved/downloaded.")
else:
  save_files = False
  print("Plots will not be saved/downloaded.")


In [None]:
#%%
#####################
# DATASET SUMMARIES #
#####################

# format size of cell output
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 5000})'''))
# RAW DATASET
# Dimensions
def getdatasetdimensions():
  print("\nRAW DATASET DIMENSIONS:\n\n  Rows: %d\n  Columns: %d\n" % tuple(OWID_dataRaw_df_dimensions))
# latest date
def getlatestdate():
  print("RAW DATASET LATEST DATE:\n\n  %s\n" % OWID_latest_date)
# Sample
def getsample():
  print("RAW DATASET SAMPLE: First 10 rows, all columns:\n\n")
  display(OWID_dataRaw_df.head(10))
# Locations
def getlocations():
  print("\n\n\nLOCATIONS INCLUDED IN RAW DATASET:")
  for i in range(len(OWID_dataRaw_locations)):
    if i % 10 == 0:
      print("\n")
    if i != (len(OWID_dataRaw_locations) - 1):
      print(OWID_dataRaw_locations[i], end = ", ")
    else:
      print(OWID_dataRaw_locations[i])

def getfields():
  print("\n\nFIELDS INCLUDED IN RAW DATASET:")
  for i in range(len(OWID_dataRaw_df.columns)):
    if i % 5 == 0:
      print("\n")
    if i != (len(OWID_dataRaw_df.columns) - 1):
      print(OWID_dataRaw_df.columns[i], end = ", ")
    else:
      print(OWID_dataRaw_df.columns[i])

In [None]:
#%%
#################################
# Compare 2 states 1 field      #
#################################

def compare2states1field(state1, state2, field1, days):
  location1 = state1
  location2 = state2
  last_n_days = days
  location1_field1 = []
  location2_field1 = []
  dates = []


  # select relevant columns for location
  location1_data = OWID_preProcessed_data_df.loc[location1, [field1, 'date']]
  location2_data = OWID_preProcessed_data_df.loc[location2, [field1, 'date']]


  # get times_to_HI figures

  for i in range(last_n_days):
    field1_quantity = location1_data.iloc[-1*last_n_days+i][field1]

    if math.isnan(field1_quantity):
      try:
        field1_quantity = location1_field1[-1]
      except:
        field1_quantity = 0

    location1_field1.append(field1_quantity)
    dates.append(location1_data.iloc[-1*last_n_days+i]['date'])

  for i in range(last_n_days):
    field1_quantity = location2_data.iloc[-1*last_n_days+i][field1]

    if math.isnan(field1_quantity):
      try:
        field1_quantity = location2_field1[-1]
      except:
        field1_quantity = 0

    location2_field1.append(field1_quantity)

  # plot
  fig, ax1 = plt.subplots(figsize=(15,
  ax2 = ax1.twinx()
10))
  ax1.set_title('%s vs %s: %s' % (location1, location2, field1), fontsize=17, padx()axs
  ax1.set_ylabel("%s" %field1, fontsize=15,rotation='horizontal', ha='right', va='cente-1)
  ax1.set_ylim([0, max([max(location1_field1), max(location2_field1)])*1.3])
  ax2.set_ylim([0, max([max(location1_field1), max(location2_field1)])*1.3])

  ax1.set_xlim([dates[0], dates[-1
  # Major ticks every 1 months.
  fmt_one_month = mdates.MonthLocator(interval=1)
  ax1.xaxis.set_major_locator(fmt_one_month)
  ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
  ax1.format_xdata = mdates.DateFormatter('%Y-%m')
  ax2.xaxis.set_major_locator(fmt_one_month)
  ax2.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
  ax2.format_xdata = mdates.DateFormatter('%Y-%m')
  fig.autofmt_xdate()45) 

  ax1.plot(dates,location1_field1, color='limegreen')
  ax1.plot(dates,location2_field1, color='red')





  fig.legend(['%s, %s' %(field1, location1), 
    '%s, %s' %(field1, location2)
    ])


  # save file
  if save_files:
    if not os.path.exists('plots'):
      os.makedirs('plots')
    filename = '%s_%s_%s' % (location1, location2, field1)
    plt.savefig(filename, bbox_inches = 'tight')
    files.download(filename+'.png')

In [None]:
#%%
#################################
# Compare 2 states 2 fields     #
#################################

def compare2states2fields(state1, state2, field1, field2, days):
  location1 = state1
  location2 = state2
  last_n_days = days
  location1_field1 = []
  location1_field2 = []
  location2_field1 = []
  location2_field2 = []
  dates = []


  # select relevant columns for location
  location1_data = OWID_preProcessed_data_df.loc[location1, [field1, 'date', 
                                                                        field2]]
  location2_data = OWID_preProcessed_data_df.loc[location2, [field1, 'date', 
                                                                        field2]]


  # get times_to_HI figures

  for i in range(last_n_days):
    field1_quantity = location1_data.iloc[-1*last_n_days+i][field1]
    field2_quantity = location1_data.iloc[-1*last_n_days+i][field2]

    if math.isnan(field1_quantity):
      try:
        field1_quantity = location1_field1[-1]
      except:
        field1_quantity = 0

    if math.isnan(field2_quantity):
      try:
        field2_quantity = location1_field2[-1]
      except:
        field2_quantity = 0

    location1_field1.append(field1_quantity)
    location1_field2.append(field2_quantity)
    dates.append(location1_data.iloc[-1*last_n_days+i]['date'])

  for i in range(last_n_days):
    field1_quantity = location2_data.iloc[-1*last_n_days+i][field1]
    field2_quantity = location2_data.iloc[-1*last_n_days+i][field2]

    if math.isnan(field1_quantity):
      try:
        field1_quantity = location2_field1[-1]
      except:
        field1_quantity = 0

    if math.isnan(field2_quantity):
      try:
        field2_quantity = location2_field2[-1]
      except:
        field2_quantity = 0

    location2_field1.append(field1_quantity)
    location2_field2.append(field2_quantity)

  print("dates: " + str(len(dates)))

  # plot
  fig, ax1 = plt.subplots(figsize=(15,
  
  ax2 = ax1.twinx()
10))
  ax1.set_title('%s vs %s: %s and %s' % (location1, location2, field1, field2), fontsize=17, pad=xaxs
  ax1.set_ylabel("%s" %field1, fontsize=15,rotation='horizontal', ha='right', va='cente-1)
  ax1.set_ylim([0, max([max(location1_field1), max(location2_field1)])*1.3])
  ax1.set_xlim([dates[0], dates[-1
  # Major ticks every 1 months.
  fmt_one_month = mdates.MonthLocator(interval=1)
  ax1.xaxis.set_major_locator(fmt_one_month)
  ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
  ax1.format_xdata = mdates.DateFormatter('%Y-%m')
  ax2.xaxis.set_major_locator(fmt_one_month)
  ax2.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
  ax2.format_xdata = mdates.DateFormatter('%Y-%m')
  fig.autofmt_xdate()5) 


  ax2.set_ylim([0, max([max([x for x in location1_field2 if np.isnan(x) == False]), max([x for x in location2_field2 if np.isnan(x) == False])])*1.3])

  ax2.set_ylabel("%s" %field2, fontsize=15, rotation='horizontal', ha='left', va='center')

  ax1.plot(dates,location1_field1, color='limegreen')
  ax1.plot(dates,location2_field1, colorangellow')
  ax2.plot(dates, location1_field2, color='red')
  ax2.plot(dates, location2_field2, color='blue')




  fig.legend(['%s, %s' %(field1, location1), 
    '%s, %s' %(field1, location2), 
    '%s, %s' %(field2, location1), 
    '%s, %s' %(field2, location2)])


  # save file
  if save_files:
    if not os.path.exists('plots'):
      os.makedirs('plots')
    filename = '%s_%s_%s_%s' % (location1, location2, field1, field2)
    plt.savefig(filename, bbox_inches = 'tight')
    files.download(filename+'.png')

In [None]:
#%%
#################################
# Plot 1 state 2 fields         #
#################################

def onestate2fields(state1, field1, field2, days):
  location1 = state1
  last_n_days = days
  location1_field1 = []
  location1_field2 = []
  dates = []


  # select relevant columns for location
  location1_data = OWID_preProcessed_data_df.loc[location1, [field1, 'date', 
                                                                        field2]]


  for i in range(last_n_days):
    field1_quantity = location1_data.iloc[-1*last_n_days+i][field1]
    field2_quantity = location1_data.iloc[-1*last_n_days+i][field2]

    if math.isnan(field1_quantity):
      try:
        field1_quantity = location1_field1[-1]
      except:
        field1_quantity = 0

    if math.isnan(field2_quantity):
      try:
        field2_quantity = location1_field2[-1]
      except:
        field2_quantity = 0

    location1_field1.append(field1_quantity)
    location1_field2.append(field2_quantity)
    dates.append(location1_data.iloc[-1*last_n_days+i]['date'])

  # plot
  fig, ax1 = plt.subplots(figsize=(15,10))
  ax1.set_title('%s: %s and %s' % (location1, field1, field2), fontsize=17, pad=  ax2 = ax1.twinx()
x()

  # ax1: daily vaxs
  ax1.set_ylabel("%s" %field1, fontsize=15,rotation='horizontal', ha='right', va='center')
  # ax1.fill_between(dates, daily_vaxs, color='limegreen', alpha=0.5, zorder=-1)
  ax1.set_ylim([0,max(location1_field1)*1.3])
  ax1.set_xlim([dates[0], dates[-1  
  # Major ticks every 1 months.
  fmt_one_month = mdates.MonthLocator(interval=1)
  ax1.xaxis.set_major_locator(fmt_one_month)
  ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
  ax1.format_xdata = mdates.DateFormatter('%Y-%m')
  ax2.xaxis.set_major_locator(fmt_one_month)
  ax2.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
  ax2.format_xdata = mdates.DateFormatter('%Y-%m')
  fig.autofmt_xdate()
5) 

  ax2.set_ylim([0, max([x for x in location1_field2 if np.isnan(x) == False])*1.1])

  ax2.set_ylabel("%s" %field2, fontsize=15, rotation='horizontal', ha='left', va='center')

  ax1.plot(dates,location1_field1, color='limegreen')
  ax2.plot(dates, location1_field2, color='red')




  fig.legend(['%s, %s' %(field1, location1), 
    '%s, %s' %(field2, location1)])


  # save file
  if save_files:
    if not os.path.exists('plots'):
      os.makedirs('plots')
    filename = '%s_%s_%s' % (location1, field1, field2)
    plt.savefig(filename, bbox_inches = 'tight')
    files.download(filename+'.png')

In [None]:
#%%
#################################
# Plot 1 state 1 field          #
#################################

def onestate1field(state1, field1, days):
  location1 = state1
  last_n_days = days
  location1_field1 = []
  dates = []


  # select relevant columns for location
  location1_data = OWID_preProcessed_data_df.loc[location1, [field1, 'date']]


  for i in range(last_n_days):
    field1_quantity = location1_data.iloc[-1*last_n_days+i][field1]

    if math.isnan(field1_quantity):
      try:
        field1_quantity = location1_field1[-1]
      except:
        field1_quantity = 0

    location1_field1.append(field1_quantity)
    dates.append(location1_data.iloc[-1*last_n_days+i]['date'])

  # plot
  fig, ax1 = plt.subplots(figsize=(15,10))
  ax1.set_title('%s: %s' % (location1, field1), fontsize=17, pad=  ax2 = ax1.twinx()
x()

  # ax1: daily vaxs
  ax1.set_ylabel("%s" %field1, fontsize=15,rotation='horizontal', ha='right', va='center')
  # ax1.fill_between(dates, daily_vaxs, color='limegreen', alpha=0.5, zorder=-1)
  ax1.set_ylim([0,max(location1_field1)*1.3])
  ax2.set_ylim([0,max(location1_field1)*1.3])

  ax1.set_xlim([dates[0], dates[-1  
  # Major ticks every 1 months.
  fmt_one_month = mdates.MonthLocator(interval=1)
  ax1.xaxis.set_major_locator(fmt_one_month)
  ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
  ax1.format_xdata = mdates.DateFormatter('%Y-%m')
  fig.autofmt_xdate()5) 

  ax1.plot(dates,location1_field1, color='limegreen')

  fig.legend(['%s, %s' %(field1, location1)])


  # save file
  if save_files:
    if not os.path.exists('plots'):
      os.makedirs('plots')
    filename = '%s_%s' % (location1, field1)
    plt.savefig(filename, bbox_inches = 'tight')
    files.download(filename+'.png')

In [None]:
#%%
#################################
#         Instructions          #
#################################
# in order to get the list of fields, run:
# getfields()

# in order to get the list of countries, run:
#getlocations()

# in order to get the dataset dimensions, run:
# getdatasetdimensions()

# in order to get the latest date, run:
#getlatestdate()

# in order to get a sample of the dataset, run:
#getsample()


# You can change the population of a country by running:
# changepopulation(location, newpopulation)
# where:
# - location: the country name, in quotes, spelled exeactly as in the output of "getlocations()" (see above). Example: "Moldova" (in quotes as here).
# - newpopulation: the new population, WITHOUT quotes. Example: 3500000 (without quotes)

# Working example:
#changepopulation("Moldova", 2597100)



# in order to plot 2 countries, one dimension, run:
# compare2states1field(state1, state2, field1, days)
# where:
# - state1: the first state, in quotes, spelled exeactly as in the output of "getlocations()" (see above). Example: "United States" (in quotes as here).
# - state1: the second state, in quotes, spelled exeactly as in the output of "getlocations()" (see above). Example: "Moldova" (in quotes).
# - field1: the field, in quotes, spelled exactly as in the output of "getfields()" (see above). Example: "new_cases_smoothed_per_million" (in quotes).
# - days: the number last n days to plot.

# Working examp# :
# compare2states1field("Moldova", "Germany", "new_cases_smoothed_per_million", 200)


# in order to plot two countries, two dimension, run:
# compare2states2fields(state1, state2, field1, field2, days)
# where:
# - state1: the first state, in quotes, spelled exeactly as in the output of "getlocations()" (see above). Example: "United States" (in quotes as here).
# - state2: the second state, in quotes, spelled exeactly as in the output of "getlocations()" (see above). Example: "Moldova" (in quotes).
# - field1: the first field, in quotes, spelled exactly as in the output of "getfields()" (see above). Example: "new_cases_smoothed_per_million" (in quotes).
# - field2: the second field, in quotes, spelled exactly as in the output of "getfields()" (see above). Example: "new_cases_smoothed_per_million" (in quotes).
# - days: the number last n days to plot.

# Working examp# e:
#compare2states2fields("Moldova", "Germany", "new_cases_smoothed_per_million", "positive_rate", 200)



# in order to plot one country, two dimension, run:
# onestate2fields(state1, field1, field2, days)
# where:
# - state1: the first state, in quotes, spelled exeactly as in the output of "getlocations()" (see above). Example: "United States" (in quotes as here).
# - field1: the first field, in quotes, spelled exactly as in the output of "getfields()" (see above). Example: "new_cases_smoothed_per_million" (in quotes).
# - field2: the second field, in quotes, spelled exactly as in the output of "getfields()" (see above). Example: "new_cases_smoothed_per_million" (in quotes).
# - days: the number last n days to plot.

# Working examp# :
# onestate2fields("Moldova", "people_vaccinated_per_hundred", "new_cases_smoothed_per_million", 150)



# in order to plot one country, one field, run:
# onestate1field(state1, field1, days)
# where:
# - state1: the first state, in quotes, spelled exeactly as in the output of "getlocations()" (see above). Example: "United States" (in quotes as here).
# - field1: the dimension, in quotes, spelled exactly as in the output of "getfields()" (see above). Example: "new_cases_smoothed_per_million" (in quotes).
# - days: the number last n days to plot.

# Working examp# :
# onestate1field("Moldova", "people_vaccinated_per_hundred", 150)





In [None]:
#%%
#################################
#         Your own code         #
#############################

# changepopulation("Moldova", 2597100)## onestate2fieldselds("Moldred", "new_cases_smoothed_per_mill, "people_vaccinated_per_hundred"ion", 150)