<a href="https://colab.research.google.com/github/eispat28/COVID-19-Data-Analysis/blob/master/COVID_19_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Load and Prepare Dataset**


In [0]:
import pandas as pd

url = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv'
dataset = pd.read_csv(url)

# Remove rows irrelavent
dataset = dataset[dataset.location != "World"]


**Setup for Plotly**

In [0]:
def configure_plotly_browser_state():
  import IPython
  display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
        <script>
          requirejs.config({
            paths: {
              base: '/static/base',
              plotly: 'https://cdn.plot.ly/plotly-1.5.1.min.js?noext',
            },
          });
        </script>
        '''))
  

In [16]:
# !pip install plotly==4.0.0
# !wget https://github.com/plotly/orca/releases/download/v1.2.1/orca-1.2.1-x86_64.AppImage -O /usr/local/bin/orca
# !chmod +x /usr/local/bin/orca
# !apt-get install xvfb libgtk2.0-0 libgconf-2-4

Collecting plotly==4.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/58/f3/a49d3281cc7275164ecf89ad3497556b11d9661faa119becdf7f9d3b2125/plotly-4.0.0-py2.py3-none-any.whl (6.8MB)
[K     |████████████████████████████████| 6.8MB 2.7MB/s 
[31mERROR: cufflinks 0.17.3 has requirement plotly>=4.1.1, but you'll have plotly 4.0.0 which is incompatible.[0m
Installing collected packages: plotly
  Found existing installation: plotly 4.4.1
    Uninstalling plotly-4.4.1:
      Successfully uninstalled plotly-4.4.1
Successfully installed plotly-4.0.0


--2020-05-04 17:30:35--  https://github.com/plotly/orca/releases/download/v1.2.1/orca-1.2.1-x86_64.AppImage
Resolving github.com (github.com)... 140.82.113.3
Connecting to github.com (github.com)|140.82.113.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://github-production-release-asset-2e65be.s3.amazonaws.com/99037241/9dc3a580-286a-11e9-8a21-4312b7c8a512?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20200504%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20200504T173035Z&X-Amz-Expires=300&X-Amz-Signature=b930f836d952eed16533c8321918f2e15d813f0f90a2c8106db376652e600195&X-Amz-SignedHeaders=host&actor_id=0&repo_id=99037241&response-content-disposition=attachment%3B%20filename%3Dorca-1.2.1-x86_64.AppImage&response-content-type=application%2Foctet-stream [following]
--2020-05-04 17:30:35--  https://github-production-release-asset-2e65be.s3.amazonaws.com/99037241/9dc3a580-286a-11e9-8a21-4312b7c8a512?X-Amz-Algorithm=AWS4-HMAC-SHA

**World Map of Corona Virus**

In [0]:
# Drop unnecessary columns

world_data = dataset.drop(['total_cases_per_million','new_cases_per_million',
                           'total_deaths_per_million','new_deaths_per_million',
                           'total_tests','new_tests','total_tests_per_thousand', 
                           'new_tests_per_thousand','tests_units'], axis = 1)

# Sort data by date
world_data = world_data.sort_values(by=['date'])


In [29]:
# Details on how to plot a Choropleth Map
# https://plotly.com/python/choropleth-maps/

import plotly.offline as py
import plotly.express as px

configure_plotly_browser_state()
py.init_notebook_mode(connected=True)

fig = px.choropleth(world_data, locations="location", locationmode='country names', 
                    color="total_cases", hover_name="location",hover_data = [world_data.total_cases,world_data.total_deaths],
                    projection="equirectangular",
                    animation_frame="date",width = 1000, height = 700,
                    color_continuous_scale='Blues',
                    range_color=[1000, 300000],
                    title='World Map of Coronavirus'
                    )

fig.update(layout_coloraxis_showscale=True)
fig.write_html("choropleth_map_interactive.html")
fig.write_image("choropleth_map.png")
py.offline.iplot(fig, filename = 'filename.html')


**Pie Chart of Coronavirus**


In [0]:
# Drop unnecessary columns
pie_data = dataset.drop(['new_cases','total_deaths','new_deaths',
                         'total_cases_per_million','new_cases_per_million',
                         'total_deaths_per_million','new_deaths_per_million',
                         'total_tests','new_tests','total_tests_per_thousand', 
                         'new_tests_per_thousand','tests_units'], axis = 1)

# Keep the most current data
pie_data = pie_data[pie_data['date'] == pie_data['date'].max() ]
# Sort dataset in order of most cases
pie_data = pie_data.sort_values('total_cases', ascending=False)
# Take the top 10 cases
pie_data = pie_data.head(10)


In [18]:
# Details on how to plot pie charts
# https://plotly.com/python/pie-charts/

import plotly.graph_objects as go
import plotly.offline as py

configure_plotly_browser_state()
py.init_notebook_mode(connected=True)

labels = pie_data['location']
values = pie_data['total_cases']
date = pie_data['date'].max()


# Use `hole` to create a donut-like pie chart
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.3)])
fig.update_layout(title_text = "Top 10 Countries with Coronavirus Cases " + date)
fig.write_image("pie_chart.png")
py.offline.iplot(fig)


**Line Graph of Total Cases Over Time**


In [0]:
# Drop unnecessary columns
line_data = dataset.drop(['new_cases','total_deaths','new_deaths',
                          'total_cases_per_million','new_cases_per_million',
                          'total_deaths_per_million','new_deaths_per_million',
                          'total_tests','new_tests','total_tests_per_thousand',
                          'new_tests_per_thousand','tests_units'], axis = 1)

# top 10 counties with highest number of cases to date
top_10 = pie_data['location'] 
# filter dataset to keep only data for top 10 highest cases countries
line_data = line_data[line_data['location'].isin(top_10)]


In [19]:
# Details on how to plot line charts
# https://plotly.com/python/line-charts/

import plotly.express as px
import plotly.offline as py

configure_plotly_browser_state()
py.init_notebook_mode(connected=True)

fig = px.line(line_data, x="date", y="total_cases", color='location')

fig.update_layout(title='Total Cases over Time in Top 10 Countries',
                   xaxis_title='Date',
                   yaxis_title='Total Cases')

fig.write_image("total_cases.png")
py.offline.iplot(fig)


**Line Graph of Total Deaths over Time**


In [0]:
# Drop unnecessary columns
line2_data = dataset.drop(['new_cases','total_cases','new_deaths',
                           'total_cases_per_million','new_cases_per_million',
                           'total_deaths_per_million','new_deaths_per_million',
                           'total_tests', 'new_tests','total_tests_per_thousand',
                           'new_tests_per_thousand', 'tests_units'], axis = 1)

# top 10 counties with highest number of cases to date
top_10 = pie_data['location'] 
# filter dataset to keep only data for top 10 highest cases countries
line2_data = line2_data[line2_data['location'].isin(top_10)]


In [20]:
# Details on how to plot line charts
# https://plotly.com/python/line-charts/

import plotly.express as px
import plotly.offline as py

configure_plotly_browser_state()
py.init_notebook_mode(connected=True)

fig = px.line(line2_data, x="date", y="total_deaths", color='location')

fig.update_layout(title='Total Deaths over Time in Top 10 Countries' ,
                   xaxis_title='Date',
                   yaxis_title='Total Deaths')

fig.write_image("total_deaths.png")
py.offline.iplot(fig)


**Bar Chart**

In [0]:
# Drop unnecessary columns
bar_data = dataset.drop(['total_cases_per_million','new_cases_per_million',
                         'total_deaths_per_million','new_deaths_per_million',
                         'total_tests','new_tests','total_tests_per_thousand',
                         'new_tests_per_thousand','tests_units'], axis = 1)

# Keep the most current data
bar_data = bar_data[bar_data['date'] == bar_data['date'].max() ]
# Sort dataset in order of most cases
bar_data = bar_data.sort_values('total_cases', ascending=False)
# Take the top 10 cases
bar_data = bar_data.head(10)


In [21]:
# Details on how to bar charts
# https://plotly.com/python/bar-charts/

import plotly.graph_objects as go
import plotly.offline as py

configure_plotly_browser_state()
py.init_notebook_mode(connected=True)

countries = list(bar_data['location'])
cases = list(bar_data['total_cases'])
deaths = list(bar_data['total_deaths'])
date = bar_data['date'].max()

fig = go.Figure(data=[go.Bar(name='Total Cases', x = countries , y = cases), go.Bar(name='Total Deaths', x = countries, y = deaths)])

fig.update_layout(title_text = 'Total Deaths and Cases in Top 10 Countries ' + date)
fig.update_layout(barmode='stack')
fig.write_image("bar_chart.png")
fig.show()


Save all graphics

In [0]:
from google.colab import files
files.download('choropleth_map_interactive.html')

In [0]:
files.download("pie_chart.png")

In [0]:
files.download("total_cases.png")

In [0]:
files.download("total_deaths.png")

In [0]:
files.download("bar_chart.png")