# COVID-19 Data Analysis & Visualization

## What is COVID-19?

> Coronavirus disease (COVID-19) is an infectious disease caused by a newly discovered coronavirus.
Most people infected with the COVID-19 virus will experience mild to moderate respiratory illness and recover without requiring special treatment.  Older people, and those with underlying medical problems like cardiovascular disease, diabetes, chronic respiratory disease, and cancer are more likely to develop serious illness.
The best way to prevent and slow down transmission is to be well informed about the COVID-19 virus, the disease it causes and how it spreads. Protect yourself and others from infection by washing your hands or using an alcohol based rub frequently and not touching your face. 
The COVID-19 virus spreads primarily through droplets of saliva or discharge from the nose when an infected person coughs or sneezes, so it’s important that you also practice respiratory etiquette (for example, by coughing into a flexed elbow).

![Coronavirus Image](https://cdn.pixabay.com/photo/2020/03/16/16/29/virus-4937553_960_720.jpg)

In [1]:
# Importing the libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import plotly.express as px
import plotly.graph_objects as go

from IPython.core.display import display, HTML

from ipywidgets import interact
import ipywidgets as widgets
import folium

In [2]:
# loading data right from the source:
death_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
confirmed_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
recovered_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')
country_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_country.csv')

In [12]:
# Converting the columns names to lower case
death_df.columns = map(str.lower,death_df.columns)
confirmed_df.columns = map(str.lower,confirmed_df.columns)
recovered_df.columns = map(str.lower,recovered_df.columns)
country_df.columns = map(str.lower,country_df.columns)

In [13]:
# Renaming some of the columns for easy handling
death_df = death_df.rename(columns={'province/state': 'state', 'country/region': 'country'})
confirmed_df = confirmed_df.rename(columns={'province/state': 'state', 'country/region': 'country'})
recovered_df = recovered_df.rename(columns={'province/state': 'state', 'country/region': 'country'})
country_df = country_df.rename(columns={'country_region': 'country'})

In [14]:
# Creating a new feature "NewCases" to capture the difference between the last/latest day count and 2nd last day count
confirmed_df.insert(4,'NewCases',0)
confirmed_df['NewCases'] = confirmed_df.iloc[:,-1] - confirmed_df.iloc[:,-2]

In [15]:
# Creating a new feature "NewCases" to capture the difference between the last/latest day count and 2nd last day count
recovered_df.insert(4,'NewCases',0)
recovered_df['NewCases'] = recovered_df.iloc[:,-1] - recovered_df.iloc[:,-2]

In [16]:
# Creating a new feature "NewCases" to capture the difference between the last/latest day count and 2nd last day count
death_df.insert(4,'NewCases',0)
death_df['NewCases'] = death_df.iloc[:,-1] - death_df.iloc[:,-2]

## Overal Worldwide Counts

In [17]:
# Summing up the total confirmed cases across countries
confirmed_total = country_df['confirmed'].sum()

155338408.0

In [18]:
# Summing up the total deaths cases across countries
deaths_total = country_df['deaths'].sum()

3245153.0

In [19]:
# Summing up the total recovered cases across countries
recovered_total = country_df['recovered'].sum()

91761644.0

In [20]:
# Summing up the total active cases across countries
active_total = country_df['active'].sum()

28351256.0

In [21]:
# displaying the current total stats

display(HTML("<div style = 'background-color: #504e4e; padding: 32px '>" +
             "<span style='color: #fff; font-size:32px;'> Confirmed: "  + str(confirmed_total) +"</span>" +
             "<span style='color: red; font-size:32px;margin-left:22px;'> Deaths: " + str(deaths_total) + "</span>"+
             "<span style='color: lightgreen; font-size:32px; margin-left:22px;'> Recovered: " + str(recovered_total) + "</span>"+
             "<span style='color: #fff; font-size:32px; margin-left:22px;'> Active: "  + str(active_total) +"</span>" +
             "</div>")
       )

# Data Visualization through Bubble Charts

### Latest Count of Confirmed New Cases

In [24]:
# Aggregating the confirmed new cases against each country
country_confirmed_newcases = confirmed_df.groupby("country")['NewCases'].sum().reset_index(name ='TotalNewCases')
country_confirmed_newcases = country_confirmed_newcases.sort_values(by='TotalNewCases', ascending=False)

Unnamed: 0,country,TotalNewCases
79,India,412431
23,Brazil,73295
178,US,44510
177,Turkey,26476
62,France,26004


In [25]:
# Visualizing the new confirmed cases against each country using plotly
def bubble_chart(n):
    fig = px.scatter(country_confirmed_newcases.head(n), x="country", y="TotalNewCases", size="TotalNewCases", color="country",
               hover_name="country", size_max=60)
    fig.update_layout(
    title=str(n) +" countries with highest number of confirmed new cases. <br> (Last updated on "+(confirmed_df.columns)[-1]+")",
    xaxis_title="Countries",
    yaxis_title="Confirmed New Cases",
    width = 900
    )
    fig.show();
    

interact(bubble_chart, n=10)
ipywLayout = widgets.Layout()
ipywLayout.display='none'

interactive(children=(IntSlider(value=10, description='n', max=30, min=-10), Output()), _dom_classes=('widget-…

### Latest Count of New Death Cases

In [26]:
# Aggregating the new death cases against each country
country_death_newcases = death_df.groupby("country")['NewCases'].sum().reset_index(name ='TotalNewCases')
country_death_newcases = country_death_newcases.sort_values(by='TotalNewCases', ascending=False)

Unnamed: 0,country,TotalNewCases
79,India,3980
23,Brazil,2811
178,US,776
6,Argentina,663
37,Colombia,388


In [27]:
# Visualizing the new death cases against each country using plotly
def bubble_chart(n):
    fig = px.scatter(country_death_newcases.head(n), x="country", y="TotalNewCases", size="TotalNewCases", color="country",
               hover_name="country", size_max=60)
    fig.update_layout(
    title=str(n) +" countries with highest number of new death cases. <br> (Last updated on "+(death_df.columns)[-1]+")",
    xaxis_title="Countries",
    yaxis_title="New Death Cases",
    width = 900
    )
    fig.show();

    
interact(bubble_chart, n=10)
ipywLayout = widgets.Layout()
ipywLayout.display='none'

interactive(children=(IntSlider(value=10, description='n', max=30, min=-10), Output()), _dom_classes=('widget-…

### Latest Count of New Recovered Cases

In [28]:
# Aggregating the recovered new cases against each country
country_recovered_newcases = recovered_df.groupby("country")['NewCases'].sum().reset_index(name ='TotalNewCases')
country_recovered_newcases = country_recovered_newcases.sort_values(by='TotalNewCases', ascending=False)

Unnamed: 0,country,TotalNewCases
79,India,329323
23,Brazil,84099
66,Germany,43430
177,Turkey,35464
37,Colombia,17183


In [29]:
# Visualizing the recovered new cases against each country using plotly
def bubble_chart(n):
    fig = px.scatter(country_recovered_newcases.head(n), x="country", y="TotalNewCases", size="TotalNewCases", color="country",
               hover_name="country", size_max=60)
    fig.update_layout(
    title=str(n) +" countries with highest number of new recovered cases. <br> (Last updated on "+(recovered_df.columns)[-1]+")",
    xaxis_title="Countries",
    yaxis_title="New Recovered Cases",
    width = 900
    )
    fig.show();

    
interact(bubble_chart, n=10)
ipywLayout = widgets.Layout()
ipywLayout.display='none'

interactive(children=(IntSlider(value=10, description='n', max=30, min=-10), Output()), _dom_classes=('widget-…

# Data Visualization through Line Charts

### Trend of Confirmed Case

In [32]:
# Visualizing the trend of confirmed cases over time using Plotly
def confirmedCases_trend(name):
    x_data = confirmed_df.iloc[:, 5:].columns
    y_data = confirmed_df[confirmed_df['country'] == name].iloc[:,5:].sum(axis=0)
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=x_data, y=y_data,
                    mode='markers',
                    name='markers'))

    fig.update_layout(
        title=str(name) +"'s trend on confirmed casess. <br> (Last updated on "+(confirmed_df.columns)[-1]+")",
        xaxis_title="Date",
        yaxis_title="Confirmed Cases",
        width = 800
        )

    fig.show()
    
    

interact(confirmedCases_trend, name='India')
ipywLayout = widgets.Layout()
ipywLayout.display='none'

interactive(children=(Text(value='India', description='name'), Output()), _dom_classes=('widget-interact',))

### Trend of Death Cases

In [33]:
# Visualizing the trend of death cases over time using Plotly
def deathCases_trend(name):
    x_data = death_df.iloc[:, 5:].columns
    y_data = death_df[death_df['country'] == name].iloc[:,5:].sum(axis=0)
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=x_data, y=y_data,
                    mode='markers',
                    name='markers'))

    fig.update_layout(
        title=str(name) +"'s trend on death cases. <br> (Last updated on "+(death_df.columns)[-1]+")",
        xaxis_title="Date",
        yaxis_title="Death Cases",
        width = 800
        )

    fig.show()
    

interact(deathCases_trend, name='India')
ipywLayout = widgets.Layout()
ipywLayout.display='none'

interactive(children=(Text(value='India', description='name'), Output()), _dom_classes=('widget-interact',))

### Trend of Recovered Case

In [34]:
# Visualizing the trend of recovered cases over time using Plotly
def recoveredCases_trend(name):
    x_data = recovered_df.iloc[:, 5:].columns
    y_data = recovered_df[recovered_df['country'] == name].iloc[:,5:].sum(axis=0)
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=x_data, y=y_data,
                    mode='markers',
                    name='markers'))

    fig.update_layout(
        title=str(name) +"'s trend on recovered casess. <br> (Last updated on "+(recovered_df.columns)[-1]+")",
        xaxis_title="Date",
        yaxis_title="Recovered Cases",
        width = 800
        )

    fig.show()
    
    
interact(recoveredCases_trend, name='India')
ipywLayout = widgets.Layout()
ipywLayout.display='none'

interactive(children=(Text(value='India', description='name'), Output()), _dom_classes=('widget-interact',))

# Interactive Table Chart

### COVID-19 Confirmed/Death/Recovered/Active cases - Sorted by Confirmed Cases in Descending order

In [35]:
# Printing the top n countries sorted by Confirmed Cases in Descending order
def show_latest_cases(n):
    n = int(n)
    df1 = country_df[['country','last_update','confirmed','deaths','recovered','active','incident_rate','mortality_rate']]
    df1 = df1.sort_values(by ='confirmed', ascending=False)
    return df1.head(n)


interact(show_latest_cases, n='10')
ipywLayout = widgets.Layout()
ipywLayout.display='none'

interactive(children=(Text(value='10', description='n'), Output()), _dom_classes=('widget-interact',))

### COVID-19 Confirmed/Death/Recovered/Active cases - Country specific

In [36]:
# Country specific search to see the count details
def country_specific(name):
    df1 = country_df[['country','last_update','confirmed','deaths','recovered','active','incident_rate','mortality_rate']]
    country_specific = df1.loc[df1['country'] == name]
    return country_specific


interact(country_specific, name='India')
ipywLayout = widgets.Layout()
ipywLayout.display='none'

interactive(children=(Text(value='India', description='name'), Output()), _dom_classes=('widget-interact',))

# Data Visualization through Bar Charts

### Worst Hit Countries - Confirmed Cases

In [38]:
# Visualizing the top n countries with respect to confirmed cases using Plotly
def confirmedCases_bar_chart(n):
    df1 = country_df.sort_values(by ='confirmed', ascending=False)
    fig = px.bar(df1.head(n), x="country", y="confirmed")
    
    fig.update_layout(
    title=str(n) +" countries with highest number of confirmed cases. <br> (Last updated on "+ df1.last_update[1] +")",
    xaxis_title="Countries",
    yaxis_title="Confirmed Cases",
    width = 800
    )
    
    fig.show();

    
interact(confirmedCases_bar_chart, n=10)
ipywLayout = widgets.Layout()
ipywLayout.display='none'

interactive(children=(IntSlider(value=10, description='n', max=30, min=-10), Output()), _dom_classes=('widget-…

### Worst Hit Countries - Death Cases

In [39]:
# Visualizing the top n countries with respect to death cases using Plotly
def deathCases_bar_chart(n):
    df1 = country_df.sort_values(by ='deaths', ascending=False)
    fig = px.bar(df1.head(n), x="country", y="deaths")
    
    fig.update_layout(
    title=str(n) +" countries with highest number of death cases. <br> (Last updated on "+ df1.last_update[1] +")",
    xaxis_title="Countries",
    yaxis_title="Death Cases",
    width = 800
    )
    
    fig.show();

    
interact(deathCases_bar_chart, n=10)
ipywLayout = widgets.Layout()
ipywLayout.display='none'

interactive(children=(IntSlider(value=10, description='n', max=30, min=-10), Output()), _dom_classes=('widget-…

### Countries - Recovered Cases

In [40]:
# Visualizing the top n countries with respect to recovered cases using Plotly
def recoveredCases_bar_chart(n):
    df1 = country_df.sort_values(by ='recovered', ascending=False)
    fig = px.bar(df1.head(n), x="country", y="recovered")
    
    fig.update_layout(
    title=str(n) +" countries with highest number of recovered cases. <br> (Last updated on "+ df1.last_update[1] +")",
    xaxis_title="Countries",
    yaxis_title="Recovered Cases",
    width = 800
    )
    
    fig.show();

    
interact(recoveredCases_bar_chart, n=10)
ipywLayout = widgets.Layout()
ipywLayout.display='none'

interactive(children=(IntSlider(value=10, description='n', max=30, min=-10), Output()), _dom_classes=('widget-…

### Countries - Active Cases

In [41]:
# Visualizing the top n countries with respect to active cases using Plotly
def activeCases_bar_chart(n):
    df1 = country_df.sort_values(by ='active', ascending=False)
    fig = px.bar(df1.head(n), x="country", y="recovered")
    
    fig.update_layout(
    title=str(n) +" countries with highest number of active cases. <br> (Last updated on "+ df1.last_update[1] +")",
    xaxis_title="Countries",
    yaxis_title="Active Cases",
    width = 800
    )
    
    fig.show();

    
interact(recoveredCases_bar_chart, n=10)
ipywLayout = widgets.Layout()
ipywLayout.display='none'

interactive(children=(IntSlider(value=10, description='n', max=30, min=-10), Output()), _dom_classes=('widget-…

# Data Visualization on Maps

### Global spread of COVID-19 using Folium

In [44]:
# Removing the null values from confirmed_df
confirmed_df = confirmed_df[~confirmed_df['lat'].isnull()]

In [45]:
# Using Folium to spread of COVID19 cases over the world
world_map = folium.Map(location=[11,0], tiles="cartodbpositron", zoom_start=2, max_zoom = 6, min_zoom = 2)


for i in range(0,len(confirmed_df)):
    folium.Circle(
        location=[confirmed_df.iloc[i]['lat'], confirmed_df.iloc[i]['long']],
        fill=True,
        radius=(int((np.log(confirmed_df.iloc[i,-1]+1.00001)))+0.2)*5000,
        color='red',
        fill_color='indigo',
        tooltip = "<div style='margin: 0; background-color: black; color: white;'>"+
                    "<h4 style='text-align:center;font-weight: bold'>"+confirmed_df.iloc[i]['country'] + "</h4>"
                    "<hr style='margin:10px;color: white;'>"+
                    "<ul style='color: white;;list-style-type:circle;align-item:left;padding-left:20px;padding-right:20px'>"+
                        "<li>Confirmed: "+str(confirmed_df.iloc[i,-1])+"</li>"+
                        "<li>Deaths:   "+str(death_df.iloc[i,-1])+"</li>"+
                        "<li>Death Rate: "+ str(np.round(death_df.iloc[i,-1]/(confirmed_df.iloc[i,-1]+1.00001)*100,2))+ "</li>"+
                    "</ul></div>",
        ).add_to(world_map)

world_map


## Global spread of COVID-19 using Plotly

In [46]:
fig = px.scatter_mapbox(confirmed_df, lat="lat", lon="long", color="country",
                  color_continuous_scale=px.colors.cyclical.IceFire, size_max=15, zoom=0,
                  mapbox_style="carto-positron")

fig.update_layout(
    title="Global spread of COVID-19. (Last updated on "+ confirmed_df.columns[-1] +")"
    )
    
fig.show();