# Import Libraries

In [None]:
import requests
import numpy as np
import urllib.request
import pandas as pd
import csv
from bs4 import BeautifulSoup
import seaborn as sns
sns.set_style("darkgrid")
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('fivethirtyeight')
import warnings
warnings.filterwarnings('ignore')

# Data

## A -  Cases from Nigeria

In [None]:
NigCovidurl = 'https://raw.githubusercontent.com/Ustacky-dev/Nigeria-COVID-19-Data-Analysis-Using-Python/main/covidnig.csv'
NigCovid = pd.read_csv(NigCovidurl,thousands=',')

## B - Cases from John Hopkins Data Repository

In [None]:
globalconfirmedurl = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
globalconfirmed = pd.read_csv(globalconfirmedurl)


recoveredurl = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv'
recovered = pd.read_csv(recoveredurl)


globaldeathurl = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
globaldeath = pd.read_csv(globaldeathurl)

#### Restructure date columns in global confirmed cases

In [None]:
globalconfirmedheaders = np.array(globalconfirmed.columns)[4:] # Select all date columns

globalconfirmedheadersMain = np.array(globalconfirmed.columns)[:4] # Select all none date columns

globalconfirmed_new = pd.melt(globalconfirmed,id_vars=globalconfirmedheadersMain,
                              value_vars=globalconfirmedheaders,var_name='Date',value_name='cases')\
    .groupby(by =['Province/State', 'Country/Region', 'Lat', 'Long', 'Date']).sum().reset_index() # Arrange all date columns to one column, date.



#### Restructure date columns in global recovered cases

In [None]:
recoveredheaders = np.array(recovered.columns)[4:] # Select all date columns

recoveredheadersheadersMain = np.array(recovered.columns)[:4] # Select all none date columns

recovered_new = pd.melt(recovered,id_vars=recoveredheadersheadersMain,
                              value_vars=recoveredheaders,var_name='Date',value_name='cases')\
    .groupby(by =['Province/State', 'Country/Region', 'Lat', 'Long', 'Date']).sum().reset_index() # Arrange all date columns to one column, date.


#### Restructure date columns in global recovered cases

In [None]:
globaldeathheaders = np.array(globaldeath.columns)[4:] # Select all date columns

globaldeathheadersMain = np.array(globaldeath.columns)[:4] # Select all none date columns

globaldeath_new = pd.melt(globaldeath,id_vars=globaldeathheadersMain,
                              value_vars=globaldeathheaders,var_name='Date',value_name='cases')\
    .groupby(by =['Province/State', 'Country/Region', 'Lat', 'Long', 'Date']).sum().reset_index() # Arrange all date columns to one column, date.



## C - External Data

In [None]:
Budgetdataurl = 'https://raw.githubusercontent.com/Ustacky-dev/Nigeria-COVID-19-Data-Analysis-Using-Python/main/Budget%20data.csv'

RealGDPurl = 'https://raw.githubusercontent.com/Ustacky-dev/Nigeria-COVID-19-Data-Analysis-Using-Python/main/RealGDP.csv'

externalFileurl = 'https://raw.githubusercontent.com/Ustacky-dev/Nigeria-COVID-19-Data-Analysis-Using-Python/main/covid_external.csv'


Budgetdata = pd.read_csv(Budgetdataurl)
RealGDP = pd.read_csv(RealGDPurl)
externalFile = pd.read_csv(externalFileurl)

## Task 2 -  View Data Properties

#### Reviewing First Five Data Points

In [None]:
print(NigCovid.head())
print('*'*50,'\n')
print(globalconfirmed_new.head())
print('*'*50,'\n')
print(recovered_new.head())
print('*'*50,'\n')
print(globaldeath_new.head())
print('*'*50,'\n')
print(Budgetdata.head())
print('*'*50,'\n')
print(RealGDP.head())
print('*'*50,'\n')
print(externalFile.head())
print('*'*50,'\n')

#### Reviewing Data info

In [None]:
print(NigCovid.info())
print('*'*50,'\n')
print(globalconfirmed_new.info())
print('*'*50,'\n')
print(recovered_new.info())
print('*'*50,'\n')
print(globaldeath_new.info())
print('*'*50,'\n')
print(Budgetdata.info())
print('*'*50,'\n')
print(RealGDP.info())
print('*'*50,'\n')
print(externalFile.info())
print('*'*50,'\n')

## Task 3 - Data Cleaning and Preparation

### Convert to appropriate data type

#### converter into date types

In [None]:
globalconfirmed_new['Date'] = pd.to_datetime(globalconfirmed_new['Date'])

recovered_new['Date'] = pd.to_datetime(recovered_new['Date'])

globaldeath_new['Date'] = pd.to_datetime(globaldeath_new['Date'])

#### Remove commas from scrapped data

In [None]:
#NigCovid[1:] = NigCovid[1:].replace(',','')
print(NigCovid.info())

#### Rename columns in the Nigeria COVID-19 Data

In [None]:
Nigcovidheaders = np.array(NigCovid.columns) #
NigCovid.rename(columns = {Nigcovidheaders[0]:'States',
                    Nigcovidheaders[1]:'Confirmed',
                    Nigcovidheaders[2]:'Admissions',
                    Nigcovidheaders[3]:'Discharged',
                    Nigcovidheaders[4]: 'Deaths'}, inplace = True)

#### Remove comma(,) in numerical data
#### Look into this

In [None]:
#NigCovid.astype({'Confirmed':'float','Admissions':'float','Discharged':'str','Deaths':'str'})
# list(Nigcovidheaders[1:])
#NigCovid[list(Nigcovidheaders[1:])]= NigCovid.Confirmed.astype(int)
#NigCovid['Confirmed'].replace(',',regex=True,inplace=True)
##NigCovid.Confirmed = NigCovid.Confirmed.astype(float)
##print(NigCovid.info())


#### Extract daily data for Nigeria from the Global daily cases data
#### look into countries present in the data set

In [None]:
print(globalconfirmed_new['Country/Region'].unique())
# no Nigeria present in the data
country = 'Nigeria'
Nigeria = globalconfirmed_new[globalconfirmed_new['Country/Region'] == country]
print(Nigeria)

# Task 4 - Analysis
### Confirmed Covid cases by Laboratory test

In [None]:
#print(NigCovid)
NigCovid.set_index('States') ## Set state as index
Top_10_Confirmed = NigCovid.nlargest(10, columns=['Confirmed'])[['Confirmed']]
print(Top_10_Confirmed)