## 1. Importing libraries

In [1]:
#importing required libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import requests
from pandas.io.json import json_normalize

## 2. Importing datasets

In [2]:
#Reading in data 
## Testing data

#Total tests conducted
df4 = pd.read_csv('./../data/raw/full-list-total-tests-for-covid-19.csv')
sa_tot=df4[df4['Entity'] == 'South Africa']

#Confirmed provincial cummulative  tests         
df5 = pd.read_csv('./../data/raw/owid-covid-data.csv', sep= ',')
sa_cumm=df5[df5['location'] == 'South Africa']

#Confirmed national cummulative confirmed positive tests         
df6 = pd.read_csv('./../data/raw/time_series_covid19_confirmed_global.csv', sep= ',')
sa_con=df6[df6['Country/Region'] == 'South Africa']

#Confirmed national cummulative confirmed deaths         
df7 = pd.read_csv('./../data/raw/time_series_covid19_deaths_global.csv', sep= ',')
sa_death=df7[df7['Country/Region'] == 'South Africa']

#Confirmed national cummulative confirmed revovered tests         
df8 = pd.read_csv('./../data/raw/time_series_covid19_recovered_global.csv', sep= ',')
sa_recovd=df8[df8['Country/Region'] == 'South Africa']

#National density
sa_dens = pd.read_csv('./../data/raw/data.csv')
sa_dens=sa_dens[sa_dens['name'] == 'South Africa']

#Number of hospital beds per province
hosp_beds = pd.read_csv('./../data/raw/hospital_beds.csv', sep = ';')

#List of hospitals in each povince
hosp_list=pd.read_csv("./../data/raw/health_system_za_public_hospitals.csv")

#Government measures dataset
excel_file = './../data/raw/acaps_covid19_government_measures_dataset.xlsx'
df9 = pd.read_excel(excel_file, sheet_name='Database', index_col=0)
sa_recovd=df9[df9['COUNTRY'] == 'South Africa']

## 3.EDA

In [3]:
#Dropping unwanted columns

#Confirmed national cummulative tests conducted
sa_tot = sa_tot.drop(['Code'], axis=1)

#Confirmed national cummulative tests conducted
sa_dens = sa_dens.drop(['rank'], axis=1)

#List of hospitals
hosp_list = hosp_list.drop(['ID','Lat', 'Long','district','district_estimated_population','subdistrict', 'service_offered_by_hospital', 'size_hospital','number_of_beds','number_of_practitioners','webpage','geo_subdivision'], axis=1)

#Confirmed national cummulative tests conducted
sa_cumm = sa_cumm.drop(['iso_code'], axis=1)

#Confirmed national cummulative confirmed positive tests 
sa_con = sa_con.drop(['Lat', 'Long','Province/State'], axis=1)

#Confirmed national cummulative confirmed deaths 
sa_death = sa_death.drop(['Lat', 'Long','Province/State'], axis=1)

#Confirmed national cummulative confirmed recovered
sa_recovd = sa_recovd.drop(['Lat', 'Long','Province/State'], axis=1)


# Extracting relevant columns
sa_cumm= sa_cumm[['date','new_cases','new_deaths']]

KeyError: "['Lat' 'Long' 'Province/State'] not found in axis"

In [None]:
#Cleaning up columns

#Creating date column for sa_con dataframe
dfc1 = sa_con.melt(id_vars=["Country/Region"], 
                    var_name="Date",
                    value_name="cases")
dfc1 = dfc1.rename({'Country/Region': 'country', 'Date': 'date'}, axis=1)
sa_con = dfc1
sa_con['date'] = pd.to_datetime(sa_con['date'])

#Creating date column for sa_death dataframe
dfc2 = sa_death.melt(id_vars=["Country/Region"], 
                    var_name="Date",
                    value_name="cases")
dfc2 = dfc2.rename({'Country/Region': 'country', 'Date': 'date'}, axis=1)
sa_death = dfc2
sa_death['date'] = pd.to_datetime(sa_death['date'])

#Creating date column for sa_recovd dataframe
dfc3 = sa_recovd.melt(id_vars=["Country/Region"], 
                    var_name="Date",
                    value_name="cases")
dfc3 = dfc3.rename({'Country/Region': 'country', 'Date': 'date'}, axis=1)
sa_recovd = dfc3

#converting to datetime
sa_recovd['date'] = pd.to_datetime(sa_recovd['date'])
sa_cumm['date'] = pd.to_datetime(sa_cumm['date'])
sa_tot['Date'] = pd.to_datetime(sa_tot['Date'])


#converting all nan values to 0
sa_cumm = sa_cumm.fillna(0)
sa_tot = sa_tot.fillna(0)

In [None]:
#Renaming testing columns 

sa_tot.rename(columns={'Total tests':' Total_cases'}, inplace=True)
sa_con.rename(columns={'cases':'confirmed_cases'}, inplace=True)
sa_death.rename(columns={'cases':'death_cases'}, inplace=True)
sa_recovd.rename(columns={'cases':'recovered_cases'}, inplace=True)

In [None]:
#Merging South Africa's confirmed, death, recovered cases and new cases for each

sa_testing=pd.merge(sa_con, sa_death, how='left')
sa_testing=pd.merge(sa_testing, sa_recovd, how='left')
sa_testing=pd.merge(sa_testing, sa_cumm, how='left')
sa_testing=pd.merge(sa_testing, sa_tot, how='left')

# Removing nans in dataframe
sa_testing= sa_testing.fillna(0)

In [None]:
#Dropping days whereby they have 0 confirmed cases
sa_testing = sa_testing[sa_testing.confirmed_cases != 0]

In [None]:
#resetting index
sa_testing = sa_testing.reset_index(drop=True)

##  4.  Statistical analysis

### 4.1 Testing dataset

In [None]:
#Looking closely into the national testing dataframe

# display of latest figures
totl_test = sa_testing['Total_cases'].iloc[-1]
totl_con = sa_testing['confirmed_cases'].iloc[-1]
totl_death = sa_testing['death_cases'].iloc[-1]
totl_recovd = sa_testing['recovered_cases'].iloc[-1]
new_case = sa_testing['new_cases'].iloc[-1]
new_deaths = sa_testing['new_deaths'].iloc[-1]
date = sa_testing['date'].iloc[-1]

print('Date:{} TOTAL TESTED:{} TOTAL CASES:{} TOTAL DEATH:{} TOTAL RECOVERED:{} NEW CASES:{}' .format(date,totl_test,totl_con,totl_death,totl_recovd,new_case), sep='nl')


In [None]:
#Moving averages for total tests cases
sa_testing['testing_MA'] = sa_testing.Total_cases.rolling(3).mean()

#Moving averages for confirmed cases
sa_testing['confirmed_MA'] = sa_testing.confirmed_cases.rolling(3).mean()

#Moving averages for death cases
sa_testing['death_MA'] = sa_testing.death_cases.rolling(3).mean()

#Moving averages for recovered cases
sa_testing['recoverd_MA'] = sa_testing.recovered_cases.rolling(3).mean()

# Removing nans in dataframe
sa_testing= sa_testing.fillna(0)
sa_testing.head()

In [None]:
#Rate calculating confirmed cases
sa_testing['positive_rate'] = sa_testing['confirmed_cases'] / sa_testing['Total_cases']

ax = sa_testing.plot(y='confirmed_cases', grid=True, figsize=(12, 6), marker='o', title='Positive case Rate within South Africa')
plt.gca().set_yticklabels(['{:.1f}%'.format(x*100) for x in plt.gca().get_yticks()]) 
plt.show()

#Rate calculating death cases
sa_testing['mortality_rate'] = sa_testing['death_cases'] / sa_testing['Total_cases']

ax = sa_testing.plot(y='death_cases', grid=True, figsize=(12, 6), marker='o', title='Mortality Rate within South Africa')
plt.gca().set_yticklabels(['{:.1f}%'.format(x*100) for x in plt.gca().get_yticks()]) 
plt.show()

#Rate calculating recovered cases
sa_testing['recovery_rate'] = sa_testing['recovered_cases'] / sa_testing['Total_cases']

ax = sa_testing.plot(y='recovered_cases', grid=True, figsize=(12, 6), marker='o', title='Recovery Rate within South Africa')
plt.gca().set_yticklabels(['{:.1f}%'.format(x*100) for x in plt.gca().get_yticks()]) 
plt.show()

In [None]:
#Confirmed cases
fig = go.Figure(data=go.Scatter(x=sa_testing['date'],
                                y=sa_testing['confirmed_MA'],
                                mode='markers',
                                marker_color=sa_testing['confirmed_MA'],
                                text=sa_testing['country'])) # hover text goes here

fig.update_layout(title_text="National confirmed cases",
                 xaxis_title="Days",
                 yaxis_title="Number of cases",
                 )
fig.show()

In [None]:
#Confirmed cases
fig = go.Figure(data=go.Scatter(x=sa_testing['date'],
                                y=sa_testing['death_MA'],
                                mode='markers',
                                marker_color=sa_testing['death_MA'],
                                text=sa_testing['country'])) # hover text goes here

fig.update_layout(title_text="National death cases",
                 xaxis_title="Days",
                 yaxis_title="Number of cases",
                 )
fig.show()

In [None]:
#Confirmed cases
fig = go.Figure(data=go.Scatter(x=sa_testing['date'],
                                y=sa_testing['recoverd_MA'],
                                mode='markers',
                                marker_color=sa_testing['recoverd_MA'],
                                text=sa_testing['country'])) # hover text goes here

fig.update_layout(title_text="National recovered cases",
                 xaxis_title="Days",
                 yaxis_title="Number of cases",
                 )
fig.show()

In [None]:
#Looking closely into the national hospital dataframe

data = px.data.gapminder()

fig = px.bar(hosp_beds, x='Province', y='Total no of beds',
             hover_data=['No of public hospitals', 'No of private hospitals'],
             labels={'Number of hospital beds nationally'}, height=400)
fig.show()

In [None]:
#Looking closely into the national density dataframe

# display of latest figures
density = sa_dens['density'].iloc[-1]
dens_mi = sa_dens['densityMi'].iloc[-1]
pop_2020 = sa_dens['pop2020'].iloc[-1]
area = sa_dens['area'].iloc[-1]

print('Population density:{} Population:{} Area(km2):{} ' .format(density,pop_2020,area), sep='nl')
