# COVID-19 Exploratory Data Analysis

## COVID-19 Dataset Understanding

Dataset Link: https://github.com/laxmimerit/Covid-19-Preprocessed-Dataset

In [None]:
# import packages
import plotly as py
py.offline.init_notebook_mode(connected=True)

import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

import folium

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

import math
import random
from datetime import timedelta

import warnings
warnings.filterwarnings('ignore')

import os

# color pallette
cnf = '#393e46'
dth = '#ff2e63'
rec = '#21bf73'
act = '#fe9801'

## Dataset Preparation

In [None]:
try:
    os.system('rm -rf Covid-19-Preprocessed-Dataset')
except:
    print('File does not exist')

In [None]:
!git clone https://github.com/laxmimerit/Covid-19-Preprocessed-Dataset.git

In [None]:
df = pd.read_csv('Covid-19-Preprocessed-Dataset/preprocessed/covid_19_data_cleaned.csv', parse_dates=['Date'])

country_daywise = pd.read_csv('Covid-19-Preprocessed-Dataset/preprocessed/country_daywise.csv', parse_dates=['Date'])
countrywise = pd.read_csv('Covid-19-Preprocessed-Dataset/preprocessed/countrywise.csv')
daywise = pd.read_csv('Covid-19-Preprocessed-Dataset/preprocessed/daywise.csv', parse_dates=['Date'])


In [None]:
df['Province/State'] = df['Province/State'].fillna("")
df

In [None]:
confirmed = df.groupby('Date').sum()['Confirmed'].reset_index()
recovered = df.groupby('Date').sum()['Recovered'].reset_index()
deaths = df.groupby('Date').sum()['Deaths'].reset_index()
deaths.head()

In [None]:
df.isnull().sum()

In [None]:
df.info()

In [None]:
df.query('Country == "US"')

## Worldwide Total Confirmed, Recovered, and Deaths

In [None]:
confirmed.tail()

In [None]:
recovered.tail()

In [None]:
deaths.tail()

In [None]:
fig = go.Figure()

# Plot confirmed cases
fig.add_trace(go.Scatter(x = confirmed['Date'], y = confirmed['Confirmed'], 
                         mode = 'lines+markers', name='Confirmed',
                         line=dict(color='Orange', width=1)))

# Plot recovered cases
fig.add_trace(go.Scatter(x = recovered['Date'], y = recovered['Recovered'], 
                         mode = 'lines+markers', name='Recovered',
                         line=dict(color='Green', width=1)))

# Plot deaths
fig.add_trace(go.Scatter(x = deaths['Date'], y = deaths['Deaths'], 
                         mode = 'lines+markers', name='Deaths',
                         line=dict(color='Red', width=1)))

fig.update_layout(title='Worlwide Covid-19 Cases', xaxis_tickfont_size=14, 
                  yaxis=dict(title='Number of Cases'))

fig.show()

## Cases Density Animation on World Map

In [None]:
df.info()

In [None]:
df['Date'] = df['Date'].astype('str')

In [None]:
df.info()

In [None]:
fig = px.density_mapbox(df, lat='Lat', lon='Long', hover_name='Country',
                       hover_data=['Confirmed', 'Recovered', 'Deaths'],
                       animation_frame='Date', color_continuous_scale='Portland',
                       radius=7, zoom=0, height=700)

fig.update_layout(title='Worldwide COVID-19 Cases with Time Laps')
fig.update_layout(mapbox_style='open-street-map', mapbox_center_lon=0)

fig.show()

## COVID-19 Cases on Ships

In [None]:
df['Date'] = pd.to_datetime(df['Date'])
df.info()

In [None]:
# Ship cases

ship_rows = df['Province/State'].str.contains('Grand Princess') | df['Province/State'].str.contains('Diamond Princess') | df['Country'].str.contains('Grand Princess') | df['Country'].str.contains('Diamond Princess') | df['Country'].str.contains('MS Zaandam')
ship = df[ship_rows]

df = df[~ship_rows]

In [None]:
ship_latest = ship[ship['Date'] == max(ship['Date'])]

In [None]:
ship_latest

In [None]:
ship_latest.style.background_gradient(cmap = 'Pastel1_r')

## Cases Over Time

In [None]:
temp = df.groupby('Date')['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()
temp = temp[temp['Date'] == max(temp['Date'])].reset_index(drop=True)

tm = temp.melt(id_vars='Date', value_vars=['Active', 'Deaths', 'Recovered'])

fig = px.treemap(tm, path=['variable'], values='value', height=250, width=800, 
                 color_discrete_sequence=[act, rec, dth])

fig.data[0].textinfo = 'label+text+value'

fig.show()

In [None]:
temp = df.groupby('Date')['Recovered', 'Deaths', 'Active'].sum().reset_index()
temp = temp.melt(id_vars='Date', value_vars=['Recovered', 'Deaths', 'Active'], var_name='Case')

fig = px.area(temp, x = 'Date', y = 'value', color= 'Case', height = 400, 
              title = 'Cases over time', color_discrete_sequence=[rec, dth,  act])

fig.update_layout(xaxis_rangeslider_visible=True)

In [None]:
temp

## Folium Maps

Worldwide Cases on Folium Maps

In [None]:
temp = df[df['Date'] == max(df['Date'])]

m = folium.Map(location=[0,0], tiles='cartodbpositron', min_zoom=1, max_xoom=4,
              zoom_start=1)

for i in range(0, len(temp)):
    folium.Circle(location=[temp.iloc[i]['Lat'], temp.iloc[i]['Long']], color='crimson', fill='crimson', 
                  tooltip='<li><bold> Country: ' + str(temp.iloc[i]['Country']) +
                          '<li><bold> State: ' + str(temp.iloc[i]['Province/State']) +
                          '<li><bold> Confirmed: ' + str(temp.iloc[i]['Confirmed']) +
                          '<li><bold> Deaths: ' + str(temp.iloc[i]['Deaths']),
                 radius=int(temp.iloc[i]['Confirmed'])**0.5).add_to(m)

m

## Confirmed Cases with Choropleth Map

In [None]:
fig = px.choropleth(country_daywise, locations='Country', locationmode='country names',
                   color=np.log(country_daywise['Confirmed']), animation_frame=country_daywise['Date'].dt.strftime('%Y-%m-%d'),
                   title='Cases over time', color_continuous_scale=px.colors.sequential.Inferno)

fig.update(layout_coloraxis_showscale=True)

fig.show()

## Deaths and Recoveries per 100 Cases

In [None]:
daywise.head()

In [None]:
fig_c = px.bar(daywise, x='Date', y='Confirmed', color_discrete_sequence=[act])
fig_d = px.bar(daywise, x='Date', y='Confirmed', color_discrete_sequence=[dth])

fig = make_subplots(rows=1, cols=2, shared_xaxes=False, horizontal_spacing=0.1,
                   subplot_titles=('Confirmed Cases', 'Deaths'))

fig.add_trace(fig_c['data'][0], row=1, col=1)
fig.add_trace(fig_d['data'][0], row=1, col=2)

fig.update_layout(height=400)

fig.show()

## Confirmed Cases and Deaths with Static Colormap

In [None]:
fig_c = px.choropleth(countrywise, locations='Country', locationmode='country names',
                     color=np.log(countrywise['Confirmed']), hover_name='Country',
                     hover_data=['Confirmed'])

temp = countrywise[countrywise['Deaths']>0]
fig_d = px.choropleth(temp, locations='Country', locationmode='country names',
                     color=np.log(temp['Deaths']), hover_name='Country',
                     hover_data=['Deaths'])

fig = make_subplots(rows=1, cols=2, subplot_titles=['Confirmed', 'Deaths'],
                   specs=[[{'type': 'choropleth'}, {'type': 'choropleth'}]])

fig.add_trace(fig_c['data'][0], row=1, col=1)
fig.add_trace(fig_d['data'][0], row=1, col=2)

fig.update(layout_coloraxis_showscale=False)

fig.show()

In [None]:
daywise.columns

In [None]:
fig1 = px.line(daywise, x='Date', y='Deaths / 100 Cases', color_discrete_sequence=[dth])
fig2 = px.line(daywise, x='Date', y='Recovered / 100 Cases', color_discrete_sequence=[rec])
fig3 = px.line(daywise, x='Date', y='Deaths / 100 Recovered', color_discrete_sequence=['aqua'])

fig = make_subplots(rows=1, cols=3, shared_xaxes=False,
                   subplot_titles=('Deaths / 100 Cases', 'Recovered / 100 Cases', 'Deaths / 100 Recovered'))

fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig2['data'][0], row=1, col=2)
fig.add_trace(fig3['data'][0], row=1, col=3)

fig.update_layout(height=400)
fig.show()

## New Cases and No. of Countries

In [None]:
fig_c = px.bar(daywise, x='Date', y='Confirmed', color_discrete_sequence=[act])
fig_d = px.bar(daywise, x='Date', y='No. of Countries', color_discrete_sequence=[dth])

fig = make_subplots(rows=1, cols=2, shared_xaxes=False, horizontal_spacing=0.1,
                   subplot_titles=('No. of New Cases per Day', 'No. of Countries'))

fig.add_trace(fig_c['data'][0], row=1, col=1)
fig.add_trace(fig_d['data'][0], row=1, col=2)

fig.show()

## Top 15 Countries Case Analysis

In [None]:
countrywise.columns

In [None]:
top = 15

fig_c = px.bar(countrywise.sort_values('Confirmed').tail(top), x='Confirmed', y='Country',
              text='Confirmed', orientation='h', color_discrete_sequence=[cnf])
fig_d = px.bar(countrywise.sort_values('Deaths').tail(top), x='Deaths', y='Country',
              text='Deaths', orientation='h', color_discrete_sequence=[dth])
fig_a = px.bar(countrywise.sort_values('Active').tail(top), x='Active', y='Country',
              text='Active', orientation='h', color_discrete_sequence=[act])
fig_r = px.bar(countrywise.sort_values('Recovered').tail(top), x='Recovered', y='Country',
              text='Recovered', orientation='h', color_discrete_sequence=[rec])

fig_dc = px.bar(countrywise.sort_values('Deaths / 100 Cases').tail(top), x='Deaths / 100 Cases', y='Country',
              text='Deaths / 100 Cases', orientation='h', color_discrete_sequence=[rec])
fig_rc = px.bar(countrywise.sort_values('Recovered / 100 Cases').tail(top), x='Recovered / 100 Cases', y='Country',
              text='Recovered / 100 Cases', orientation='h', color_discrete_sequence=[rec])

fig_nc = px.bar(countrywise.sort_values('New Cases').tail(top), x='New Cases', y='Country',
              text='New Cases', orientation='h', color_discrete_sequence=['#f04341'])

temp = countrywise[countrywise['Population']>1000000]
fig_p = px.bar(temp.sort_values('Cases / Million People').tail(top), x='Cases / Million People', y='Country',
              text='Cases / Million People', orientation='h', color_discrete_sequence=['#b40398'])

fig_wc = px.bar(countrywise.sort_values('1 week change').tail(top), x='1 week change', y='Country',
              text='1 week change', orientation='h', color_discrete_sequence=['#c04041'])

temp = countrywise[countrywise['Confirmed']>100]
fig_wi = px.bar(temp.sort_values('1 week % increase').tail(top), x='1 week % increase', y='Country',
              text='1 week % increase', orientation='h', color_discrete_sequence=['#f40598'])

fig = make_subplots(rows=5, cols=2, shared_xaxes=False, horizontal_spacing=0.2,
                    vertical_spacing=0.02,
                    subplot_titles=('Confirmed Cases', 'Deaths Reported', 'Active Cases', 'Recovered Cases', 'Deaths / 100 Cases',
                                    'Recovered / 100 Cases', 'New Cases', 'Cases / Million People',
                                   '1 week change', '1 week % increase'))

fig.add_trace(fig_c['data'][0], row=1, col=1)
fig.add_trace(fig_d['data'][0], row=1, col=2)

fig.add_trace(fig_a['data'][0], row=2, col=1)
fig.add_trace(fig_r['data'][0], row=2, col=2)

fig.add_trace(fig_dc['data'][0], row=3, col=1)
fig.add_trace(fig_rc['data'][0], row=3, col=2)

fig.add_trace(fig_nc['data'][0], row=4, col=1)
fig.add_trace(fig_p['data'][0], row=4, col=2)

fig.add_trace(fig_wc['data'][0], row=5, col=1)
fig.add_trace(fig_wi['data'][0], row=5, col=2)

fig.update_layout(height=3000)

fig.show()