# COVID API Data Retrieval (https://covid-api.com/)
## Imports
### Importing necessary libraries & modules


In [1]:
import requests
import datetime
from datetime import timedelta
import pandas as pd
from pandas import json_normalize

# API connectors
## Here I define the functions I will be using to connect to the COVID API
### Creating a function to call the COVID API when passed in parameters

In [2]:
def get_data(url, params=None):
    response = requests.get(url, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        return [response.status_code, response.text]

### Setting the base url to be used for all requests:

In [3]:
base_url = 'https://covid-api.com/api/'

### Getting Regions Data

In [4]:
def get_all_regions():
    regions_data = get_data(f"{base_url}regions", params=None)
    return regions_data['data']

### Getting Provinces Data

In [5]:
def get_all_provinces_by_iso(iso):
    params={'per_page': 250}
    url = f"{base_url}provinces/{iso}"
    provinces_for_region = get_data(url, params=params)
    return provinces_for_region

### Get Reports Total Worldwide for ISO and/or date

In [6]:
def get_reports_total(params=None):
    url = f"{base_url}reports/total"
    reports_total = get_data(url, params=params)
    return reports_total['data']

In [7]:
def get_reports_total_for_iso(iso):
    params = {'iso': iso}
    reports_total_for_iso = get_reports_total(params)
    return reports_total_for_iso

In [8]:
def get_reports_total_for_date(date):
    params = {'date': date}
    reports_total_for_date = get_reports_total(params)
    return reports_total_for_date

In [9]:
def get_reports_total_for_date_and_iso(date, iso):
    params = {'date': date, 'iso': iso}
    reports_total_for_date_and_iso = get_reports_total(params)
    return reports_total_for_date_and_iso

In [10]:
def get_reports_total_worldwide():
    return get_reports_total()

### Get Reports for Country/Region by Date


In [11]:
def get_reports_total_for_province(date, iso, region_name=None, region_province=None):
    params = {'date': date, 'iso': iso}
    
    if region_name:
        params['region'] = region_name
    if region_province:
        params['region_province'] = region_province
        
    url = f"{base_url}reports"
    reports_total = get_data(url, params=params)
    return reports_total['data']

# User selectors 
## Now I define the functions used to call the API I build a selector to help us retrieve the necessary data before saving it for analysis:

In [12]:
def display_sorted_regions(regions):
    regions_sorted = sorted(regions, key=lambda region: region['name'])
    print("Available countries:")
    for i, region in enumerate(regions_sorted, 1):
        print(f"{i}. {region['name']}")

In [13]:
def validate_selected_regions(regions, selected_countries_numbers):    
    try:
        if all(1 <= number <= len(regions) for number in selected_countries_numbers):
            return selected_countries_numbers
        else:
            print(f"Error: Please enter numbers between 1 and {len(regions)}.")
    except ValueError:
        print("Error: Please enter only numbers separated by commas.")
    except AttributeError:
        print("Error: Please check your list format.")

In [14]:
def get_selected_regions_iso_list(regions, selected_numbers):
    regions_sorted = sorted(regions, key=lambda x: x['name'])
    selected_isos = []

    for number in selected_numbers:
        region = regions_sorted[number - 1]
        selected_isos.append(region['iso'])

    return selected_isos

# COVID-19 API Request Selections
### Displaying list of countries sorted alphabetically so you can select the countries to be included in your report


In [15]:
regions = get_all_regions()

In [16]:
display_sorted_regions(regions)

Available countries:
1. Afghanistan
2. Albania
3. Algeria
4. Andorra
5. Angola
6. Antigua and Barbuda
7. Argentina
8. Armenia
9. Aruba
10. Australia
11. Austria
12. Azerbaijan
13. Bahamas
14. Bahrain
15. Bangladesh
16. Barbados
17. Belarus
18. Belgium
19. Belize
20. Benin
21. Bhutan
22. Bolivia
23. Bosnia and Herzegovina
24. Botswana
25. Brazil
26. Brunei
27. Bulgaria
28. Burkina Faso
29. Burma
30. Burundi
31. Cabo Verde
32. Cambodia
33. Cameroon
34. Canada
35. Cayman Islands
36. Central African Republic
37. Chad
38. Channel Islands
39. Chile
40. China
41. Colombia
42. Comoros
43. Congo (Brazzaville)
44. Congo (Kinshasa)
45. Costa Rica
46. Cote d'Ivoire
47. Croatia
48. Cruise Ship
49. Cuba
50. Curacao
51. Cyprus
52. Czechia
53. Denmark
54. Diamond Princess
55. Djibouti
56. Dominica
57. Dominican Republic
58. Ecuador
59. Egypt
60. El Salvador
61. Equatorial Guinea
62. Eritrea
63. Estonia
64. Eswatini
65. Ethiopia
66. Faroe Islands
67. Fiji
68. Finland
69. France
70. French Guiana
71. Ga

### Space for you to write the comma separated list of numbers of your selection
#### Please note must be list of integers - replace below numbers for your selection, if you want to select only one country it still must be in list form e.g. for england selected_countries = [209]
#### In the below example I have selected - the United Kingdom, USA, India & China

In [17]:
selected_countries = [203]

### Run this function to ensure that your list is a valid set of values before proceeding

In [18]:
selected_regions = validate_selected_regions(regions, selected_countries)

### Finally, run this to generate your list of ISO codes, this will be used for data retrieval once you have selected your desired dates

In [19]:
selected_isos = get_selected_regions_iso_list(regions, selected_regions)

## Now we need to select the dates for us to retrieve information from - the API accepts requests in the format yyyy-mm-dd

In [20]:
def convert_start_end_to_datetime(start_day, start_month, start_year, end_day, end_month, end_year):
    try:
        start_date = datetime.date(start_year, start_month, start_day)
        end_date = datetime.date(end_year, end_month, end_day)
        return start_date, end_date
    except ValueError:
        print("Start date and end date are not valid.")

In [21]:
def get_dates_list_between_start_and_end(start_date, end_date):
    if start_date > end_date:
        raise ValueError("start_date must be before or equal to end_date")
    
    delta = end_date - start_date
    dates = [start_date + timedelta(days=i) for i in range(delta.days + 1)]
    return dates

In [22]:
def date_generator(start_date, end_date):
    try:
        dates_list = get_dates_list_between_start_and_end(start_date, end_date)
        return dates_list
    except ValueError as e:
        print(e)

### Insert your start and end dates for the desired data
#### The COVID-19 API data starts on March 17, 2020

In [23]:
start_day = 1
start_month = 1
start_year = 2020

In [24]:
end_day = 1
end_month = 1
end_year = 2021

In [25]:
start_date, end_date = convert_start_end_to_datetime(start_day, start_month, start_year, end_day, end_month, end_year)

In [26]:
dates = date_generator(start_date, end_date)

## Performing the API requests for the selected parameters

In [27]:
reports = []

for iso in selected_isos:
    for date in dates:
        date = date.strftime("%Y-%m-%d")
        report = get_reports_total_for_province(date, iso)
        reports.append(report)

In [28]:
dataframes = []

for report in reports:
    try:
        dataframe = pd.DataFrame(report)
        region_dataframe = json_normalize(dataframe['region'])
        data = pd.concat([dataframe.drop('region', axis=1), region_dataframe], axis=1)
        dataframes.append(data) 
    except KeyError:
        pass

In [29]:
combined_df = pd.concat(dataframes, ignore_index=True)
combined_df

Unnamed: 0,date,confirmed,deaths,recovered,confirmed_diff,deaths_diff,recovered_diff,last_update,active,active_diff,fatality_rate,iso,name,province,lat,long,cities
0,2020-03-11,1,0,0,0,0,0,2020-03-11 03:33:05,1,0,0.0000,TUR,Turkey,,38.9637,35.2433,[]
1,2020-03-12,1,0,0,0,0,0,2020-03-11 03:33:05,1,0,0.0000,TUR,Turkey,,38.9637,35.2433,[]
2,2020-03-13,5,0,0,4,0,0,2020-03-11 20:00:00,5,4,0.0000,TUR,Turkey,,38.9637,35.2433,[]
3,2020-03-14,5,0,0,0,0,0,2020-03-13 23:33:06,5,0,0.0000,TUR,Turkey,,38.9637,35.2433,[]
4,2020-03-15,6,0,0,1,0,0,2020-03-15 18:20:18,6,1,0.0000,TUR,Turkey,,38.9637,35.2433,[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,2020-12-28,2162775,20135,2037433,15197,257,22203,2020-12-29 05:22:37,105207,-7263,0.0093,TUR,Turkey,,38.9637,35.2433,[]
293,2020-12-29,2178580,20388,2058437,15805,253,21004,2020-12-30 05:22:34,99755,-5452,0.0094,TUR,Turkey,,38.9637,35.2433,[]
294,2020-12-30,2194272,20642,2078629,15692,254,20192,2020-12-31 05:22:49,95001,-4754,0.0094,TUR,Turkey,,38.9637,35.2433,[]
295,2020-12-31,2208652,20881,2100650,14380,239,22021,2021-01-01 05:23:07,87121,-7880,0.0095,TUR,Turkey,,38.9637,35.2433,[]


## Saving the data to csv

In [30]:
def generate_file_name(start_date, end_date, selected_isos):
    formatted_start_date = start_date.strftime("%d_%m_%Y")
    formatted_end_date = end_date.strftime("%d_%m_%Y")
    formatted_isos = '_'.join(selected_isos)
    return f"{formatted_start_date}_{formatted_end_date}_{formatted_isos}"    

In [31]:
file_name = generate_file_name(start_date, end_date, selected_isos)

In [32]:
combined_df.to_csv(rf'../4_integrated_csv_files/covid_19_api_data/{file_name}.csv', index=False)