# Covid-19 data

The covid data on active cases is requested using the Covid-19 API: https://covid19api.com/

In [1]:
# Import dependencies
import requests
import csv
import json
import pandas as pd
from pprint import pprint
import datetime
from datetime import datetime
from datetime import timedelta, date
import matplotlib.pyplot as plt
import scipy.stats as sts

In [2]:
# Make request to retrieve the country slug for Australia, for per country requests
url = "https://api.covid19api.com/countries"
response_json = requests.get(url).json()

for response in range(len(response_json)):
    if response_json[response]['Country'] == "Australia":
        aus_response = response_json[response]
    
print(aus_response)    

{'Country': 'Australia', 'Slug': 'australia', 'ISO2': 'AU'}


In [3]:
# Covid-19 API request for Australia, case status includes active, confirmed, deaths, recovered
# Build URL
url = "https://api.covid19api.com/country/australia?from=2020-03-01T00:00:00Z&to=2020-09-05T00:00:00Z"

# Make request
response = requests.get(url)

# Convert to json
cases_response = response.json()

# Use list comprehension to get case numbers
state = [case_number["Province"] for case_number in cases_response]
date = [case_number["Date"] for case_number in cases_response]
confirmed = [case_number["Confirmed"] for case_number in cases_response]
active = [case_number["Active"] for case_number in cases_response]
deaths = [case_number["Deaths"] for case_number in cases_response]

# Create dataframe and include month column and set to the month specified in url 
covid_df = pd.DataFrame({
         "State": state,
         "Date": date, 
         "Confirmed (cumulative)": confirmed,
         "Active": active,
         "Deaths": deaths      
 }) 

# Write to csv
covid_df.to_csv("covid_data.csv", index=False, header=True)

In [4]:
# Read in csv
covid_df = pd.read_csv("covid_data.csv") 

# Display dataframe
covid_df.head()

Unnamed: 0,State,Date,Confirmed (cumulative),Active,Deaths
0,Western Australia,2020-03-01T00:00:00Z,2,1,1
1,Australian Capital Territory,2020-03-01T00:00:00Z,0,0,0
2,New South Wales,2020-03-01T00:00:00Z,6,2,0
3,Northern Territory,2020-03-01T00:00:00Z,0,0,0
4,Queensland,2020-03-01T00:00:00Z,9,8,0


In [5]:
# Check that we have all the States for Australia
covid_df["State"].unique()

array(['Western Australia', 'Australian Capital Territory',
       'New South Wales', 'Northern Territory', 'Queensland',
       'South Australia', 'Tasmania', 'Victoria'], dtype=object)

In [6]:
# Convert 'Date' column to datetime format
covid_df["datetime"] = pd.to_datetime(covid_df["Date"])

# Use dt to get the date part only
covid_df["date"] = covid_df["datetime"].dt.date

In [7]:
# Drop date columns
covid_df.drop(columns=["Date", "datetime"],inplace=True)
covid_df.head()

Unnamed: 0,State,Confirmed (cumulative),Active,Deaths,date
0,Western Australia,2,1,1,2020-03-01
1,Australian Capital Territory,0,0,0,2020-03-01
2,New South Wales,6,2,0,2020-03-01
3,Northern Territory,0,0,0,2020-03-01
4,Queensland,9,8,0,2020-03-01


### Western Australia

In [8]:
# Loop through dates and get the active cases for each week
# Set up list to hold active cases                                   
wa_active_cases = []
week_ending = []

# Set end dates for while loop
end_date = datetime(2020,9,8).date()
week_start_date = datetime(2020,3,1).date()
week_end_date = datetime(2020,3,7).date()

while week_end_date < end_date:
    
    # Set conditions on df
    week_rows_wa = covid_df[(covid_df["date"] >= week_start_date) &
                             (covid_df["date"] <= week_end_date) & 
                             (covid_df["State"] == "Western Australia")] 
    
    # Get the active cases and append to list
    wa_active_cases.append(week_rows_wa["Active"].mean())
    week_ending.append(week_end_date)
        
    # Add 7 days to start and end date range
    week_start_date = week_start_date + timedelta(days=7)
    week_end_date = week_end_date + timedelta(days=7) 

# Create df
wa_df = pd.DataFrame({"WA Active": wa_active_cases, 
                       "Week ending": week_ending})

In [9]:
# Calculate percentage change in active cases and add as new column to df
wa_df["diff"] = wa_df["WA Active"].diff()
wa_df["percentage"] = (wa_df["diff"]/wa_df["WA Active"])*100
wa_df = wa_df.dropna()

# Write to csv
wa_df.to_csv("wa_covid_data.csv", index=False, header=True)

### Queensland

In [11]:
# Loop through dates and get the active cases for each week
# Set up list to hold active cases                                   
qld_active_cases = []
week_ending = []

# Set end dates for while loop
end_date = datetime(2020,9,8).date()
week_start_date = datetime(2020,3,1).date()
week_end_date = datetime(2020,3,7).date()

while week_end_date < end_date:
    
    # Set conditions on df
    week_rows_qld = covid_df[(covid_df["date"] >= week_start_date) &
                             (covid_df["date"] <= week_end_date) & 
                             (covid_df["State"] == "Queensland")] 
    
    # Get the active cases and append to list
    qld_active_cases.append(week_rows_qld["Active"].mean())
    week_ending.append(week_end_date)
        
    # Add 7 days to start and end date range
    week_start_date = week_start_date + timedelta(days=7)
    week_end_date = week_end_date + timedelta(days=7)  
    
# Create df
qld_df = pd.DataFrame({"Qld Active": qld_active_cases, 
                       "Week ending": week_ending})

In [12]:
# Calculate percentage change in active cases and add as new column to df
qld_df["diff"] = qld_df["Qld Active"].diff()
qld_df["percentage"] = (qld_df["diff"]/qld_df["Qld Active"])*100
qld_df["percentage"] = (qld_df["diff"]/qld_df["Qld Active"])*100
qld_df = qld_df.dropna()

# Write to csv
qld_df.to_csv("qld_covid_data.csv", index=False, header=True)

## Victoria

In [14]:
# Loop through dates and get the active cases for each week
# Set up list to hold active cases                                   
vic_active_cases = []
week_ending = []

# Set end dates for while loop
end_date = datetime(2020,9,8).date()
week_start_date = datetime(2020,3,1).date()
week_end_date = datetime(2020,3,7).date()

while week_end_date < end_date:
    
    # Set conditions on df
    week_rows_vic = covid_df[(covid_df["date"] >= week_start_date) &
                             (covid_df["date"] <= week_end_date) & 
                             (covid_df["State"] == "Victoria")] 
    
    # Get the active cases and append to list
    vic_active_cases.append(week_rows_vic["Active"].mean())
    week_ending.append(week_end_date)
        
    # Add 7 days to start and end date range
    week_start_date = week_start_date + timedelta(days=7)
    week_end_date = week_end_date + timedelta(days=7)   

# Create df
vic_df = pd.DataFrame({"Vic Active": vic_active_cases, 
                       "Week ending": week_ending})

In [15]:
# Calculate percentage change in active cases and add as new column to df
vic_df["diff"] = vic_df["Vic Active"].diff()
vic_df["percentage"] = (vic_df["diff"]/vic_df["Vic Active"])*100
vic_df = vic_df.dropna()

# Write to csv
vic_df.to_csv("vic_covid_data.csv", index=False, header=True)

## Victoria - new cases

Data on new cases was downloaded from DHHS: https://www.dhhs.vic.gov.au/victorian-coronavirus-covid-19-data

In [17]:
# Read in csv
vic_new_cases_df = pd.read_csv("vic_newcases_data.csv") 

# Display dataframe
vic_new_cases_df.head()

Unnamed: 0,Day of Date,Cases
0,"October 7, 2020",11.0
1,"October 6, 2020",5.0
2,"October 5, 2020",15.0
3,"October 4, 2020",8.0
4,"October 3, 2020",10.0


In [18]:
# Determine date type
vic_new_cases_df.dtypes

Day of Date     object
Cases          float64
dtype: object

In [19]:
# Convert 'Date' column to datetime format
vic_new_cases_df["dateformat"] = pd.to_datetime(vic_new_cases_df["Day of Date"])

# Use dt to get the date part only
vic_new_cases_df["date"] = vic_new_cases_df["dateformat"].dt.date

In [20]:
# Drop date columns
vic_new_cases_df.drop(columns=["dateformat"],inplace=True)

In [21]:
# Loop through dates and get the active cases for each week
# Set up list to hold active cases                                   
vic_new_cases = []
week_ending = []

# Set end dates for while loop
end_date = datetime(2020,9,8).date()
week_start_date = datetime(2020,3,1).date()
week_end_date = datetime(2020,3,7).date()

while week_end_date < end_date:
    
    # Set conditions on df
    week_rows = vic_new_cases_df[(vic_new_cases_df["date"] >= week_start_date) &
                             (vic_new_cases_df["date"] <= week_end_date)] 
    
    # Get the active cases and append to list
    vic_new_cases.append(week_rows["Cases"].sum())
    week_ending.append(week_end_date)
        
    # Add 7 days to start and end date range
    week_start_date = week_start_date + timedelta(days=7)
    week_end_date = week_end_date + timedelta(days=7)  
    
# Create df
vic_new_df = pd.DataFrame({"New cases": vic_new_cases, 
                       "Week ending": week_ending})

vic_new_df.head()

Unnamed: 0,New cases,Week ending
0,5.0,2020-03-07
1,42.0,2020-03-14
2,213.0,2020-03-21
3,489.0,2020-03-28
4,373.0,2020-04-04


In [25]:
# Calculate percentage change in new cases and add as new column to df
vic_new_df["diff"] = vic_new_df["New cases"].diff()
vic_new_df["percentage"] = (vic_new_df["diff"]/vic_new_df["New cases"])*100

vic_new_df

Unnamed: 0,New cases,Week ending,diff,percentage
1,42.0,2020-03-14,,
2,213.0,2020-03-21,171.0,80.28169
3,489.0,2020-03-28,276.0,56.441718
4,373.0,2020-04-04,-116.0,-31.099196
5,138.0,2020-04-11,-235.0,-170.289855
6,60.0,2020-04-18,-78.0,-130.0
7,19.0,2020-04-25,-41.0,-215.789474
8,40.0,2020-05-02,21.0,52.5
9,99.0,2020-05-09,59.0,59.59596
10,74.0,2020-05-16,-25.0,-33.783784


In [23]:
# Drop the row with no values
vic_new_df = vic_new_df.dropna()

In [24]:
# Write to csv
vic_new_df.to_csv("vic_new_covid_data.csv", index=False, header=True)