In [31]:
import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
from pprint import pprint
import json
from scipy.stats import linregress

# Replace '../Folder1/your_file.csv' with the actual path to your CSV file
data = pd.read_csv('Data_Files/owid-covid-data.csv')

# Display the first few rows of the DataFrame
data.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,population,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
0,AFG,Asia,Afghanistan,2020-01-03,,0.0,,,0.0,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,
1,AFG,Asia,Afghanistan,2020-01-04,,0.0,,,0.0,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,
2,AFG,Asia,Afghanistan,2020-01-05,,0.0,,,0.0,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,
3,AFG,Asia,Afghanistan,2020-01-06,,0.0,,,0.0,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,
4,AFG,Asia,Afghanistan,2020-01-07,,0.0,,,0.0,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,


In [32]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 336078 entries, 0 to 336077
Data columns (total 67 columns):
 #   Column                                      Non-Null Count   Dtype  
---  ------                                      --------------   -----  
 0   iso_code                                    336078 non-null  object 
 1   continent                                   320099 non-null  object 
 2   location                                    336078 non-null  object 
 3   date                                        336078 non-null  object 
 4   total_cases                                 298424 non-null  float64
 5   new_cases                                   326839 non-null  float64
 6   new_cases_smoothed                          325576 non-null  float64
 7   total_deaths                                277172 non-null  float64
 8   new_deaths                                  326894 non-null  float64
 9   new_deaths_smoothed                         325664 non-null  float64
 

In [34]:
covid19_data = data[['location', 'date', 'total_cases', 
                     'new_cases', 'total_deaths', 'new_deaths', 'total_cases_per_million', 'new_cases_per_million', 'total_deaths_per_million', 'new_deaths_per_million', 
                     'icu_patients_per_million', 'hosp_patients_per_million', 'total_vaccinations_per_hundred', 'people_vaccinated_per_hundred', 'people_fully_vaccinated_per_hundred', 
                     'total_boosters_per_hundred', 'median_age', 'aged_65_older', 'aged_70_older', 'female_smokers', 'male_smokers', 'human_development_index'
                    ]]

covid19_data.head()

Unnamed: 0,location,date,total_cases,new_cases,total_deaths,new_deaths,total_cases_per_million,new_cases_per_million,total_deaths_per_million,new_deaths_per_million,...,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,total_boosters_per_hundred,median_age,aged_65_older,aged_70_older,female_smokers,male_smokers,human_development_index
0,Afghanistan,2020-01-03,,0.0,,0.0,,0.0,,0.0,...,,,,,18.6,2.581,1.337,,,0.511
1,Afghanistan,2020-01-04,,0.0,,0.0,,0.0,,0.0,...,,,,,18.6,2.581,1.337,,,0.511
2,Afghanistan,2020-01-05,,0.0,,0.0,,0.0,,0.0,...,,,,,18.6,2.581,1.337,,,0.511
3,Afghanistan,2020-01-06,,0.0,,0.0,,0.0,,0.0,...,,,,,18.6,2.581,1.337,,,0.511
4,Afghanistan,2020-01-07,,0.0,,0.0,,0.0,,0.0,...,,,,,18.6,2.581,1.337,,,0.511


In [35]:
# Rename the columns using the .rename() method
new_column_names = {
    'location': 'Location',
    'date': 'Date',
    'total_cases': 'totalCases',
    'new_cases': 'newCases',
    'total_deaths': 'totalDeaths',
    'new_deaths': 'newDeaths',
    'total_cases_per_million': 'totalCasesPerMillion',
    'new_cases_per_million': 'newCasesPerMillion',
    'total_deaths_per_million': 'totalDeathsPerMillion',
    'new_deaths_per_million': 'newDeathsPerMillion',
    'icu_patients_per_million': 'icuPatientsPerMillion',
    'hosp_patients_per_million': 'hospitalPatientsPerMillion',
    'total_vaccinations_per_hundred': 'totalVaccinationsPerHundred',
    'people_vaccinated_per_hundred': 'peopleVaccinatedPerHundred',
    'people_fully_vaccinated_per_hundred': 'peopleFullyVaccinatedPerHundred',
    'total_boosters_per_hundred': 'totalBoostersPerHundred',
    'median_age': 'medianAge',
    'aged_65_older': 'aged65+',
    'aged_70_older': 'aged70+',
    'female_smokers': 'femaleSmokers',
    'male_smokers': 'maleSmokers',
    'human_development_index': 'humanDevelopmentIndex'
}

covid19_data = covid19_data.rename(columns=new_column_names)
covid19_data.head()

Unnamed: 0,Location,Date,totalCases,newCases,totalDeaths,newDeaths,totalCasesPerMillion,newCasesPerMillion,totalDeathsPerMillion,newDeathsPerMillion,...,totalVaccinationsPerHundred,peopleVaccinatedPerHundred,peopleFullyVaccinatedPerHundred,totalBoostersPerHundred,medianAge,aged65+,aged70+,femaleSmokers,maleSmokers,humanDevelopmentIndex
0,Afghanistan,2020-01-03,,0.0,,0.0,,0.0,,0.0,...,,,,,18.6,2.581,1.337,,,0.511
1,Afghanistan,2020-01-04,,0.0,,0.0,,0.0,,0.0,...,,,,,18.6,2.581,1.337,,,0.511
2,Afghanistan,2020-01-05,,0.0,,0.0,,0.0,,0.0,...,,,,,18.6,2.581,1.337,,,0.511
3,Afghanistan,2020-01-06,,0.0,,0.0,,0.0,,0.0,...,,,,,18.6,2.581,1.337,,,0.511
4,Afghanistan,2020-01-07,,0.0,,0.0,,0.0,,0.0,...,,,,,18.6,2.581,1.337,,,0.511


In [36]:
#covid19_data['Date'] = pd.to_datetime(covid19_data['Date'])
#covid19_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 336078 entries, 0 to 336077
Data columns (total 22 columns):
 #   Column                           Non-Null Count   Dtype         
---  ------                           --------------   -----         
 0   Location                         336078 non-null  object        
 1   Date                             336078 non-null  datetime64[ns]
 2   totalCases                       298424 non-null  float64       
 3   newCases                         326839 non-null  float64       
 4   totalDeaths                      277172 non-null  float64       
 5   newDeaths                        326894 non-null  float64       
 6   totalCasesPerMillion             298424 non-null  float64       
 7   newCasesPerMillion               326839 non-null  float64       
 8   totalDeathsPerMillion            277172 non-null  float64       
 9   newDeathsPerMillion              326894 non-null  float64       
 10  icuPatientsPerMillion            37235 non-n

In [62]:
def process_covid19_data(covid19_data, country):
    # Apply filter for rows where 'location' is in the list_of_countries
    filtered_covid19_data = covid19_data[covid19_data['Location'].isin(country)]
    
    # Group by 'date' column and aggregate the specified columns
    grouped = filtered_covid19_data.groupby(filtered_covid19_data['Date'].dt.strftime('%Y-%m')).agg({
        'totalCases': 'sum',
        'newCases': 'sum',
        'totalDeaths': 'sum',
        'newDeaths': 'sum',
        'totalCasesPerMillion': 'sum',
        'newCasesPerMillion': 'sum',
        'totalDeathsPerMillion': 'sum',
        'newDeathsPerMillion': 'sum',
        'icuPatientsPerMillion': 'sum',
        'hospitalPatientsPerMillion': 'sum',
        'totalVaccinationsPerHundred': 'sum',
        'peopleVaccinatedPerHundred': 'sum',
        'peopleFullyVaccinatedPerHundred': 'sum',
        'totalBoostersPerHundred': 'sum',
        'medianAge': 'mean',
        'aged65+': 'sum',
        'aged70+': 'sum',
        'femaleSmokers': 'sum',
        'maleSmokers': 'sum',
        'humanDevelopmentIndex': 'mean'
    })
    
    return grouped


country = ['Australia']  

# to create a filtered data for another country dataframe, just change Australia to Zimbabwe for example
# in the country = ['Zimbabwe']
# Zimbabwe_grouped_data = process_covid19_data(covid19_data, country)
# Zimbabwe_grouped_data.head()
Australia_grouped_data = process_covid19_data(covid19_data, country)
Australia_grouped_data.head()

#Australia_grouped_data.info()

Unnamed: 0_level_0,totalCases,newCases,totalDeaths,newDeaths,totalCasesPerMillion,newCasesPerMillion,totalDeathsPerMillion,newDeathsPerMillion,icuPatientsPerMillion,hospitalPatientsPerMillion,totalVaccinationsPerHundred,peopleVaccinatedPerHundred,peopleFullyVaccinatedPerHundred,totalBoostersPerHundred,medianAge,aged65+,aged70+,femaleSmokers,maleSmokers,humanDevelopmentIndex
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2020-01,35.0,9.0,0.0,0.0,1.337,0.343,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,37.9,449.616,293.741,377.0,478.5,0.944
2020-02,478.0,16.0,0.0,0.0,18.259,0.61,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,37.9,449.616,293.741,377.0,478.5,0.944
2020-03,31988.0,4334.0,231.0,21.0,1221.969,165.562,8.824,0.8,3.132,16.274,0.0,0.0,0.0,0.0,37.9,480.624,313.999,403.0,511.5,0.944
2020-04,186937.0,2387.0,1895.0,74.0,7141.157,91.184,72.391,2.824,73.689,258.241,0.0,0.0,0.0,0.0,37.9,465.12,303.87,390.0,495.0,0.944
2020-05,216814.0,439.0,3204.0,12.0,8282.483,16.769,122.391,0.457,16.349,53.098,0.0,0.0,0.0,0.0,37.9,480.624,313.999,403.0,511.5,0.944


In [38]:
# Accessing the WorldBank API for GDP
url = "http://api.worldbank.org/v2/country/"
country_code = "AU"
gdp_indicator = "NY.GDP.MKTP.CD"
start_year = 2020
last_year = 2023

gdp_data = []

for year in range(start_year, last_year + 1):

    gdp_query_url = f"{url}{country_code}/indicator/{gdp_indicator}?date={year}&format=json"

    gdp_request = (requests.get(gdp_query_url)).json()
    
    gdp_data.append(gdp_request)
        
    pprint(gdp_request)

[{'lastupdated': '2023-07-25',
  'page': 1,
  'pages': 1,
  'per_page': 50,
  'sourceid': '2',
  'total': 1},
 [{'country': {'id': 'AU', 'value': 'Australia'},
   'countryiso3code': 'AUS',
   'date': '2020',
   'decimal': 0,
   'indicator': {'id': 'NY.GDP.MKTP.CD', 'value': 'GDP (current US$)'},
   'obs_status': '',
   'unit': '',
   'value': 1326944627876.87}]]
[{'lastupdated': '2023-07-25',
  'page': 1,
  'pages': 1,
  'per_page': 50,
  'sourceid': '2',
  'total': 1},
 [{'country': {'id': 'AU', 'value': 'Australia'},
   'countryiso3code': 'AUS',
   'date': '2021',
   'decimal': 0,
   'indicator': {'id': 'NY.GDP.MKTP.CD', 'value': 'GDP (current US$)'},
   'obs_status': '',
   'unit': '',
   'value': 1552703151616.01}]]
[{'lastupdated': '2023-07-25',
  'page': 1,
  'pages': 1,
  'per_page': 50,
  'sourceid': '2',
  'total': 1},
 [{'country': {'id': 'AU', 'value': 'Australia'},
   'countryiso3code': 'AUS',
   'date': '2022',
   'decimal': 0,
   'indicator': {'id': 'NY.GDP.MKTP.CD', 'val

In [39]:
gdp_values = []

for data in gdp_data:
    if data[1]:
        gdp_values.append({
            "Year": data[1][0]["date"],
            "GDP": data[1][0]["value"]
        })

gdp_values

[{'Year': '2020', 'GDP': 1326944627876.87},
 {'Year': '2021', 'GDP': 1552703151616.01},
 {'Year': '2022', 'GDP': 1675418665067.09}]

In [40]:
#Accessing the WorldBank API for total population
pop_indicator = "SP.POP.TOTL"

pop_data = []

for year in range(start_year, last_year + 1):

    pop_query_url = f"{url}{country_code}/indicator/{pop_indicator}?date={year}&format=json"

    pop_request = (requests.get(pop_query_url)).json()
    
    pop_data.append(pop_request)
        
    pprint(pop_request)

[{'lastupdated': '2023-07-25',
  'page': 1,
  'pages': 1,
  'per_page': 50,
  'sourceid': '2',
  'total': 1},
 [{'country': {'id': 'AU', 'value': 'Australia'},
   'countryiso3code': 'AUS',
   'date': '2020',
   'decimal': 0,
   'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'},
   'obs_status': '',
   'unit': '',
   'value': 25655289}]]
[{'lastupdated': '2023-07-25',
  'page': 1,
  'pages': 1,
  'per_page': 50,
  'sourceid': '2',
  'total': 1},
 [{'country': {'id': 'AU', 'value': 'Australia'},
   'countryiso3code': 'AUS',
   'date': '2021',
   'decimal': 0,
   'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'},
   'obs_status': '',
   'unit': '',
   'value': 25688079}]]
[{'lastupdated': '2023-07-25',
  'page': 1,
  'pages': 1,
  'per_page': 50,
  'sourceid': '2',
  'total': 1},
 [{'country': {'id': 'AU', 'value': 'Australia'},
   'countryiso3code': 'AUS',
   'date': '2022',
   'decimal': 0,
   'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}

In [41]:
pop_values = []

for data in pop_data:
    if data[1]:
        pop_values.append({
            "Date": data[1][0]["date"],
            "Population Total": data[1][0]["value"]
        })

pop_values

[{'Date': '2020', 'Population Total': 25655289},
 {'Date': '2021', 'Population Total': 25688079},
 {'Date': '2022', 'Population Total': 25978935}]

In [64]:
aus_pop_df = pd.DataFrame(pop_values)
aus_pop_df

Unnamed: 0,Date,Population Total
0,2020,25655289
1,2021,25688079
2,2022,25978935
