In [1]:
#Importing required libraries
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
import plotly.graph_objects as go
from fbprophet import Prophet
import pycountry
import plotly.express as px

In [2]:
#Importing required data
COVID19_line_list_data = pd.read_csv("../input/novel-corona-virus-2019-dataset/COVID19_line_list_data.csv")
COVID19_open_line_list = pd.read_csv("../input/novel-corona-virus-2019-dataset/COVID19_open_line_list.csv")
df = pd.read_csv("../input/novel-corona-virus-2019-dataset/covid_19_data.csv", parse_dates = ['Last Update'])
df_confirmed = pd.read_csv("../input/novel-corona-virus-2019-dataset/time_series_covid_19_confirmed.csv")
df_deaths = pd.read_csv("../input/novel-corona-virus-2019-dataset/time_series_covid_19_deaths.csv")
df_recovered = pd.read_csv("../input/novel-corona-virus-2019-dataset/time_series_covid_19_recovered.csv")

In [3]:
#Converting all float values to integers
cols = ['Confirmed', 'Deaths', 'Recovered']
df[cols] = df[cols].applymap(np.int64)
df

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,2020-01-22 17:00:00,1,0,0
1,2,01/22/2020,Beijing,Mainland China,2020-01-22 17:00:00,14,0,0
2,3,01/22/2020,Chongqing,Mainland China,2020-01-22 17:00:00,6,0,0
3,4,01/22/2020,Fujian,Mainland China,2020-01-22 17:00:00,1,0,0
4,5,01/22/2020,Gansu,Mainland China,2020-01-22 17:00:00,0,0,0
...,...,...,...,...,...,...,...,...
11336,11337,03/23/2020,,Uzbekistan,2020-03-23 23:19:21,46,0,0
11337,11338,03/23/2020,,Venezuela,2020-03-23 23:19:21,77,0,15
11338,11339,03/23/2020,,Vietnam,2020-03-23 23:19:21,123,0,17
11339,11340,03/23/2020,,Zambia,2020-03-23 23:19:21,3,0,0


In [4]:
#Renaming ObservationDate to Date and Country/Region to Country in df dataframe
df.rename(columns = {'ObservationDate': 'Date', "Country/Region": 'Country'}, inplace = True)
df

Unnamed: 0,SNo,Date,Province/State,Country,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,2020-01-22 17:00:00,1,0,0
1,2,01/22/2020,Beijing,Mainland China,2020-01-22 17:00:00,14,0,0
2,3,01/22/2020,Chongqing,Mainland China,2020-01-22 17:00:00,6,0,0
3,4,01/22/2020,Fujian,Mainland China,2020-01-22 17:00:00,1,0,0
4,5,01/22/2020,Gansu,Mainland China,2020-01-22 17:00:00,0,0,0
...,...,...,...,...,...,...,...,...
11336,11337,03/23/2020,,Uzbekistan,2020-03-23 23:19:21,46,0,0
11337,11338,03/23/2020,,Venezuela,2020-03-23 23:19:21,77,0,15
11338,11339,03/23/2020,,Vietnam,2020-03-23 23:19:21,123,0,17
11339,11340,03/23/2020,,Zambia,2020-03-23 23:19:21,3,0,0


In [5]:
#Viewing the info of the columns in dataframe 
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11341 entries, 0 to 11340
Data columns (total 8 columns):
SNo               11341 non-null int64
Date              11341 non-null object
Province/State    7746 non-null object
Country           11341 non-null object
Last Update       11341 non-null datetime64[ns]
Confirmed         11341 non-null int64
Deaths            11341 non-null int64
Recovered         11341 non-null int64
dtypes: datetime64[ns](1), int64(4), object(3)
memory usage: 708.9+ KB


In [6]:
#Changing the name of Mainland China to China 
df['Country'].replace({'Mainland China': 'China'}, inplace = True)
df

Unnamed: 0,SNo,Date,Province/State,Country,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,China,2020-01-22 17:00:00,1,0,0
1,2,01/22/2020,Beijing,China,2020-01-22 17:00:00,14,0,0
2,3,01/22/2020,Chongqing,China,2020-01-22 17:00:00,6,0,0
3,4,01/22/2020,Fujian,China,2020-01-22 17:00:00,1,0,0
4,5,01/22/2020,Gansu,China,2020-01-22 17:00:00,0,0,0
...,...,...,...,...,...,...,...,...
11336,11337,03/23/2020,,Uzbekistan,2020-03-23 23:19:21,46,0,0
11337,11338,03/23/2020,,Venezuela,2020-03-23 23:19:21,77,0,15
11338,11339,03/23/2020,,Vietnam,2020-03-23 23:19:21,123,0,17
11339,11340,03/23/2020,,Zambia,2020-03-23 23:19:21,3,0,0


* Creating new column named "Active Cases"
* Active Cases = Confirmed - Deaths - Recovered

In [7]:
df['Active Cases'] = df['Confirmed'] - df['Deaths'] - df['Recovered']
df.tail(10)

Unnamed: 0,SNo,Date,Province/State,Country,Last Update,Confirmed,Deaths,Recovered,Active Cases
11331,11332,03/23/2020,,Uganda,2020-03-23 23:19:21,9,0,0,9
11332,11333,03/23/2020,,Ukraine,2020-03-23 23:19:21,73,3,1,69
11333,11334,03/23/2020,,United Arab Emirates,2020-03-23 23:19:21,198,2,41,155
11334,11335,03/23/2020,,UK,2020-03-23 23:19:21,6650,335,135,6180
11335,11336,03/23/2020,,Uruguay,2020-03-23 23:19:21,158,0,0,158
11336,11337,03/23/2020,,Uzbekistan,2020-03-23 23:19:21,46,0,0,46
11337,11338,03/23/2020,,Venezuela,2020-03-23 23:19:21,77,0,15,62
11338,11339,03/23/2020,,Vietnam,2020-03-23 23:19:21,123,0,17,106
11339,11340,03/23/2020,,Zambia,2020-03-23 23:19:21,3,0,0,3
11340,11341,03/23/2020,,Zimbabwe,2020-03-23 23:19:21,3,1,0,2


Latest/Updated Numbers

In [8]:
data_latest = df[df['Date'] == max(df['Date'])].reset_index()
data_latest.head()

Unnamed: 0,index,SNo,Date,Province/State,Country,Last Update,Confirmed,Deaths,Recovered,Active Cases
0,7926,7927,03/23/2020,South Carolina,US,2020-03-23 23:19:34,1,0,0,1
1,7927,7928,03/23/2020,Louisiana,US,2020-03-23 23:19:34,1,0,0,1
2,7928,7929,03/23/2020,Virginia,US,2020-03-23 23:19:34,1,0,0,1
3,7929,7930,03/23/2020,Idaho,US,2020-03-23 23:19:34,13,0,0,13
4,7930,7931,03/23/2020,Iowa,US,2020-03-23 23:19:34,1,0,0,1


Current Numbers Worldwide

In [9]:
data_world = data_latest.groupby('Date')['Confirmed', 'Deaths', 'Recovered', 'Active Cases'].sum().reset_index()
data_world

Unnamed: 0,Date,Confirmed,Deaths,Recovered,Active Cases
0,03/23/2020,378287,16497,100958,260832


In [10]:
cols = ['Active_Cases', 'Recovered', 'Deaths']
values = data_world.loc[0, ['Active Cases', 'Recovered', 'Deaths']]
fig = px.pie(data_world, values = values, names = cols, color_discrete_sequence = ['lightblue', 'blue', 'darkblue'])
fig.update_layout(title = 'Total Cases: ' + str(data_world['Confirmed'][0]))
fig.show()

Confirmed Cases in each country

In [11]:
conf_country = data_latest.groupby(['Country'])['Confirmed'].sum().reset_index().sort_values('Confirmed',ascending = False).reset_index(drop=True)

In [12]:
# Default value of display.max_rows is 10 i.e. at max 10 rows will be printed.
# Set it None to display all rows in the dataframe
pd.set_option('display.max_rows', None)
conf_country

Unnamed: 0,Country,Confirmed
0,China,81116
1,Italy,63927
2,US,43667
3,Spain,35136
4,Germany,29056
5,Iran,23049
6,France,20123
7,South Korea,8961
8,Switzerland,8795
9,UK,6726


What's the total number of confirmed cases in India?

This could be used as a framework to find out the total number of confirmed cases for any country

In [13]:
conf_country.query('Country == "India"')

Unnamed: 0,Country,Confirmed
41,India,499


**Choropleth Map**

In [14]:
fig = px.choropleth(conf_country, locations = 'Country', color = 'Confirmed', 
                    locationmode = 'country names', hover_name = 'Country', 
                    template = 'ggplot2', color_continuous_scale = 'Reds')
fig.update_layout(title = 'Confirmed Cases in the World')
fig.show()

**Bubble Map**

In [15]:
all_countries = conf_country['Country'].unique()
print('Number of countries/regions with cases: '+ str(len(all_countries)))
print("Countries/Regions with cases: ")
for i in all_countries:
    print(" " + str(i))

Number of countries/regions with cases: 170
Countries/Regions with cases: 
 China
 Italy
 US
 Spain
 Germany
 Iran
 France
 South Korea
 Switzerland
 UK
 Netherlands
 Austria
 Belgium
 Norway
 Canada
 Portugal
 Sweden
 Brazil
 Australia
 Denmark
 Turkey
 Malaysia
 Israel
 Czech Republic
 Japan
 Ireland
 Ecuador
 Luxembourg
 Pakistan
 Poland
 Chile
 Thailand
 Others
 Finland
 Greece
 Iceland
 Indonesia
 Romania
 Saudi Arabia
 Singapore
 Qatar
 India
 Philippines
 Slovenia
 Russia
 South Africa
 Peru
 Bahrain
 Egypt
 Hong Kong
 Estonia
 Mexico
 Croatia
 Panama
 Colombia
 Lebanon
 Argentina
 Iraq
 Serbia
 Dominican Republic
 Armenia
 Algeria
 Bulgaria
 United Arab Emirates
 Taiwan
 Kuwait
 San Marino
 Slovakia
 Latvia
 Lithuania
 Hungary
 Costa Rica
 Uruguay
 Morocco
 North Macedonia
 Andorra
 Bosnia and Herzegovina
 Jordan
 Vietnam
 Cyprus
 Moldova
 Malta
 Albania
 New Zealand
 Burkina Faso
 Sri Lanka
 Brunei
 Tunisia
 Cambodia
 Belarus
 Senegal
 Venezuela
 Ukraine
 Azerbaijan
 Oman
 Kaz

In [16]:
countries = {}
for country in pycountry.countries:
    countries[country.name] = country.alpha_3
    
countries

{'Aruba': 'ABW',
 'Afghanistan': 'AFG',
 'Angola': 'AGO',
 'Anguilla': 'AIA',
 'Åland Islands': 'ALA',
 'Albania': 'ALB',
 'Andorra': 'AND',
 'United Arab Emirates': 'ARE',
 'Argentina': 'ARG',
 'Armenia': 'ARM',
 'American Samoa': 'ASM',
 'Antarctica': 'ATA',
 'French Southern Territories': 'ATF',
 'Antigua and Barbuda': 'ATG',
 'Australia': 'AUS',
 'Austria': 'AUT',
 'Azerbaijan': 'AZE',
 'Burundi': 'BDI',
 'Belgium': 'BEL',
 'Benin': 'BEN',
 'Bonaire, Sint Eustatius and Saba': 'BES',
 'Burkina Faso': 'BFA',
 'Bangladesh': 'BGD',
 'Bulgaria': 'BGR',
 'Bahrain': 'BHR',
 'Bahamas': 'BHS',
 'Bosnia and Herzegovina': 'BIH',
 'Saint Barthélemy': 'BLM',
 'Belarus': 'BLR',
 'Belize': 'BLZ',
 'Bermuda': 'BMU',
 'Bolivia, Plurinational State of': 'BOL',
 'Brazil': 'BRA',
 'Barbados': 'BRB',
 'Brunei Darussalam': 'BRN',
 'Bhutan': 'BTN',
 'Bouvet Island': 'BVT',
 'Botswana': 'BWA',
 'Central African Republic': 'CAF',
 'Canada': 'CAN',
 'Cocos (Keeling) Islands': 'CCK',
 'Switzerland': 'CHE',
 

In [17]:
conf_country['iso_alpha'] = conf_country['Country'].map(countries.get)
conf_country

Unnamed: 0,Country,Confirmed,iso_alpha
0,China,81116,CHN
1,Italy,63927,ITA
2,US,43667,
3,Spain,35136,ESP
4,Germany,29056,DEU
5,Iran,23049,
6,France,20123,FRA
7,South Korea,8961,
8,Switzerland,8795,CHE
9,UK,6726,


In [18]:
#conf_country.query('iso_alpha == ')
index = conf_country['iso_alpha'].index[conf_country['iso_alpha'].apply(pd.isnull)]
a = list(conf_country['Country'].loc[index[:]])
a

['US',
 'Iran',
 'South Korea',
 'UK',
 'Czech Republic',
 'Others',
 'Russia',
 'Taiwan',
 'Vietnam',
 'Moldova',
 'Brunei',
 'Venezuela',
 'Congo (Kinshasa)',
 'Bolivia',
 'Ivory Coast',
 'Macau',
 'Tanzania',
 'Congo (Brazzaville)',
 'Syria',
 'Holy See']

In [19]:
conf_country['Country'].replace(a, ['United States', 'Iran, Islamic Republic of', "Korea, Democratic People's Republic of", "United Kingdom", "Czechia", "Others", "Russian Federation", "Taiwan, Province of China", "Viet Nam", "Moldova, Republic of",
                                       "Brunei Darussalam", "Venezuela, Bolivarian Republic of", "Congo", "Bolivia, Plurinational State of", "Côte d'Ivoire", "Macao", "Tanzania, United Republic of", "Congo, The Democratic Republic of the",
                                       "Syrian Arab Republic", "Holy See (Vatican City State)"], inplace = True)
conf_country

Unnamed: 0,Country,Confirmed,iso_alpha
0,China,81116,CHN
1,Italy,63927,ITA
2,United States,43667,
3,Spain,35136,ESP
4,Germany,29056,DEU
5,"Iran, Islamic Republic of",23049,
6,France,20123,FRA
7,"Korea, Democratic People's Republic of",8961,
8,Switzerland,8795,CHE
9,United Kingdom,6726,


In [20]:
conf_country['iso_alpha'] = conf_country['Country'].map(countries.get)
conf_country

Unnamed: 0,Country,Confirmed,iso_alpha
0,China,81116,CHN
1,Italy,63927,ITA
2,United States,43667,USA
3,Spain,35136,ESP
4,Germany,29056,DEU
5,"Iran, Islamic Republic of",23049,IRN
6,France,20123,FRA
7,"Korea, Democratic People's Republic of",8961,PRK
8,Switzerland,8795,CHE
9,United Kingdom,6726,GBR


In [21]:
fig = px.scatter_geo(conf_country, locations='iso_alpha', color='Country', 
                     hover_name='iso_alpha', size='Confirmed', template = 'plotly_white',
                     projection="natural earth")
fig.update_layout(title = 'Worlwide Confirmed Cases')
fig.show()
#px.scatter_geo(plot_data_confirmed, locations = 'iso_alpha', color = 'Country', hover_name = 'iso_alpha', size = "Confirmed", projection = "natural earth", title = 'Worldwide Confirmed Cases')

**Bar Chart**

In [22]:
fig = go.Figure()
fig.add_trace(go.Bar(x = conf_country['Country'][0:10],
                              y = conf_country['Confirmed'][0:10],
                              text = conf_country['Confirmed'][0:10],
                              textposition='auto',
                              marker_color = 'white'))
fig.update_layout(title = "10 Most Number of Confirmed Cases",
                 xaxis_title = 'Countries', 
                 yaxis_title = 'Number of Confirmed Cases', 
                 template = 'plotly_dark')
fig.show()

**Top 30 Countries where Confirmed Cases are maximum**

In [23]:
fig = go.Figure()
fig.add_trace(go.Bar(x = conf_country['Confirmed'][0:20],
                              y = conf_country['Country'][0:20],
                              text = conf_country['Confirmed'][0:20],
                              textposition='auto',
                              marker_color = 'white', orientation = 'h'))
fig.update_layout(title = "20 Most Number of Confirmed Cases",
                 xaxis_title = 'Number of Confirmed Cases', 
                 yaxis_title = 'Countries',
                 yaxis = dict(autorange = 'reversed'),
                 template = 'plotly_dark')
fig.show()

In [27]:
fig = go.Figure()
fig = px.scatter(conf_country, x=conf_country['Country'][0:2], y=conf_country['Confirmed'][0:2], color=conf_country['Confirmed'][0:2], facet_col=conf_country['Country'][0:2],
           color_continuous_scale='Reds', render_mode="webgl")
fig.show()

#This kind of graph can be used to see the difference in cases between two different period like Cases of confirmed in China and Italy in Feb and March (or first two week and last two weeks in March)

In [None]:
#Changing the name of columns "Country/Region" to "Country" in respective dataframes
df_confirmed.rename(columns = {'Country/Region': 'Country'}, inplace = True)
df_deaths.rename(columns = {"Country/Region": 'Country'}, inplace = True)
df_recovered.rename(columns = {'Country/Region': 'Country'}, inplace = True)

In [None]:
#All unique countries affected by COVID-19
df_confirmed['Country'].unique()

In [None]:
#Confirmed dataframe
df_confirmed.head()

In [None]:
#Recovered dataframe
df_recovered.head()

In [None]:
#Deaths dataframe
df_deaths.head()

In [None]:
#Latest Cases

df.tail()

In [None]:
#Grouping by Date, Country and Province to find the total number of Confirmed, Deaths and recovered cases
df2 = df.groupby(['Date', 'Country', 'Province/State'])[['SNo', 'Date', 'Province/State', 'Country', 'Confirmed', 'Deaths', 'Recovered', 'Active Cases']].sum().reset_index()
df2

In [None]:
#Number of cases viewed by Country (China)

df.query('Country == "China"').groupby("Last Update")[['Confirmed', 'Deaths', 'Recovered', 'Active Cases']].sum().reset_index()

In [None]:
#Number of cases everyday  in China
df.query('Country == "China"').groupby("Date")[['Confirmed', 'Deaths', 'Recovered', 'Active Cases']].sum().reset_index()

Based on above table, you can plot density map just for China

In [None]:
#Cases by country
cases_by_country = df.groupby(['Date', 'Country'])['Confirmed', 'Deaths', 'Recovered'].sum().reset_index()
cases_by_country

In [None]:
cases_by_country['Country'].unique()

In [None]:
cases_by_country[cases_by_country['Country'] == 'China'].index.tolist()

In [None]:
cases_by_country['Country'][90]

In [None]:
#Number of cases recoreded everyday in the USA
cases_by_country.query('Country == "US"').groupby("Date")[['Confirmed', 'Deaths', 'Recovered']].sum().reset_index()

In [None]:
#Number of cases recoreded everyday in Poland
cases_by_country.query('Country == "Poland"').groupby("Date")[['Confirmed', 'Deaths', 'Recovered']].sum().reset_index()

In [None]:
#Writing a function to find out new number and percent changes of confirmed, deaths, and recovered cases 
def summary_cases(Country):
    '''
    Argument: Country should be a condition. 
    For ex: 'Country == US'
    '''
    cols = ['New_Confirmed_Cases', 'New_Deaths_Cases', 'New_Recovered_Cases']
    query = cases_by_country.query(Country).groupby("Date")[['Confirmed', 'Deaths', 'Recovered']].sum().reset_index()
    query['New_Confirmed_Cases'] = query['Confirmed'].diff()
    query['New_Deaths_Cases'] = query['Deaths'].diff()
    query['New_Recovered_Cases'] = query['Recovered'].diff()
    query['Percent_Change_Confirmed'] = query['Confirmed'].pct_change()
    query['Percent_Change_Confirmed'] = query['Percent_Change_Confirmed'] * 100
    query['Percent_Change_Deaths'] = query['Deaths'].pct_change()
    query['Percent_Change_Deaths'] = query['Percent_Change_Deaths'] * 100
    query['Percent_Change_Recovered'] = query['Recovered'].pct_change()
    query['Percent_Change_Recovered'] = query['Percent_Change_Recovered'] * 100
    query.round({'Percent_Change_Confirmed': 2, 'Percent_Change_Deaths': 2, 'Percent_Change_Recovered': 2})
    cols = ['New_Confirmed_Cases', 'New_Deaths_Cases', 'New_Recovered_Cases']
    query[cols] = query[cols].fillna(0).applymap(np.int64)
    return query

In [None]:
cases_by_country['Country'].unique()

In [None]:
#Summary of cases and percent changes in the USA
US_cases = summary_cases(Country = 'Country == "US"')
US_cases

In [None]:
def bar_chart(x, y, title):    
    fig = go.Figure()
    fig.add_trace(go.Bar(x = x, 
                        y = y,
                        name = 'Confirmed', 
                        marker_color = 'blue'
                        ))

    fig.update_layout(
        title=title,
        xaxis_tickfont_size=14,
        yaxis=dict(
            title='Number of Confirmed Cases',
            titlefont_size=16,
            tickfont_size=14,
        ),
        legend = dict(
            x = 0, 
            y = 1.0,
            bgcolor = 'rgba(255, 255, 255, 0)',
            bordercolor = 'rgba(255, 255, 255, 0)'
        ),
            barmode = 'group',
            bargap = 0.15,
            bargroupgap = 0.1)
    fig.show()

In [None]:
bar_chart(x = US_cases['Date'], y = US_cases['Confirmed'], title = 'Corona Virus Cases - Confirmed in the USA (Bar Chart)')

In [None]:
#Summary of cases and percent changes in China
China_cases = summary_cases(Country = 'Country == "China"')
China_cases

In [None]:
bar_chart(x = China_cases['Date'], y = China_cases['Confirmed'], title = 'Corona Virus Cases - Confirmed in China (Bar Chart)')

We can use the following code to find out on what day the maximum new number of confirmed cases were recorded for any country. Similarly, we could use it as a framework to find out what day the maximum/minimum new number of recovered/deaths cases were recorded for any country.

In [None]:
#Maximum number of confirmed cases recored on what day in China

China_cases.loc[China_cases['New_Confirmed_Cases'].idxmax()] #All columns of that particular row
China_cases.iloc[China_cases.New_Confirmed_Cases.idxmax(), 0:1] #You can decide how many columns of particular row to be printed

China_cases.loc[China_cases.New_Confirmed_Cases.idxmax(), ['Date', 'New_Confirmed_Cases']]

In [None]:
#Summary of cases for Italy
Italy_cases = summary_cases(Country = 'Country == "Italy"')
Italy_cases

In [None]:
#Latest cases in Italy
Italy_cases.tail(10)

In [None]:
bar_chart(x = Italy_cases['Date'], y = Italy_cases['Confirmed'], title = "Corona Virus Cases - Confirmed in Italy (Bar Chart)")

In [None]:
bar_chart(x = Italy_cases['Date'], y = Italy_cases['Deaths'], title = "Corona Virus Cases - Deaths in Italy (Bar Chart)")

In [None]:
bar_chart(x = Italy_cases['Date'], y = Italy_cases['New_Confirmed_Cases'], title = "Corona Virus New Confirmed Confirmed Cases each day in Italy (Bar Chart)")

In [None]:
#Comparing new Confirmed cases between Italy and the USA
fig = go.Figure()
fig.add_trace(go.Bar(x = US_cases['Date'], 
                    y = US_cases['New_Confirmed_Cases'],
                    name = 'New Confirmed cases in the USA', 
                    marker_color = 'blue'
                    ))
fig.add_trace(go.Bar(x = Italy_cases['Date'], 
                    y = Italy_cases['New_Confirmed_Cases'], 
                    name = "New Confirmed cases in the Italy", 
                    marker_color = 'red'
                    ))

fig.update_layout(
    title='Corona Virus Cases - Comparison of Confirmed cases in the USA and Italy (Bar Chart)',
    xaxis_tickfont_size=14,
    yaxis=dict(
        title='Number of Cases',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend = dict(
        x = 0, 
        y = 1.0,
        bgcolor = 'rgba(255, 255, 255, 0)',
        bordercolor = 'rgba(255, 255, 255, 0)'
    ),
        barmode = 'group',
        bargap = 0.15,
        bargroupgap = 0.1)
fig.show()

In [None]:
#Summary of cases in India
India_cases = summary_cases(Country = 'Country == "India"')
India_cases

In [None]:
India_cases.tail(10)

In [None]:
bar_chart(x = India_cases['Date'], y = India_cases['Confirmed'], title = "Corona Virus Cases - Confirmed in India (Bar Chart)")

In [None]:
bar_chart(x = India_cases['Date'], y = India_cases['New_Confirmed_Cases'], title = "Corona Virus Cases - Confirmed in India (Bar Chart)")

**Visualizations:**

In [None]:
df.groupby('Date').sum()

In [None]:
confirmed = df.groupby('Date').sum()['Confirmed'].reset_index()
deaths = df.groupby('Date').sum()['Deaths'].reset_index()
recovered = df.groupby('Date').sum()['Recovered'].reset_index()

In [None]:
#Number of confirmed cases each day
confirmed

In [None]:
confirmed['New Cases'] = confirmed['Confirmed'].diff()
confirmed

Comparing how number of cases have increased significantly in last few days compared to earlier cases

In [None]:
print("Total number of Confirmed Cases from 22nd Jan to 10th March (47 days): ", confirmed['New Cases'][0:49].sum())
print("Total number of Confirmed Cases from 11th March to Today: ", confirmed['New Cases'][49:].sum())

In [None]:
fig = go.Figure()
fig.add_trace(go.Bar(x = confirmed['Date'], 
                    y = confirmed['New Cases'],
                    name = 'Confirmed', 
                    marker_color = 'blue'
                    ))
fig.update_layout(
    title='New Confirmed Cases Everyday (Bar Chart)',
    xaxis_tickfont_size=14,
    yaxis=dict(
        title='Number of Cases',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend = dict(
        x = 0, 
        y = 1.0,
        bgcolor = 'rgba(255, 255, 255, 0)',
        bordercolor = 'rgba(255, 255, 255, 0)'
    ))
fig.show()

Comparing Number of Confirmed, Deaths and Recovered cases

In [None]:
fig = go.Figure()
fig.add_trace(go.Bar(x = confirmed['Date'], 
                    y = confirmed['Confirmed'],
                    name = 'Confirmed', 
                    marker_color = 'blue'
                    ))
fig.add_trace(go.Bar(x = deaths['Date'], 
                    y = deaths['Deaths'], 
                    name = "Deaths", 
                    marker_color = 'red'
                    ))
fig.add_trace(go.Bar(x = recovered['Date'], 
                    y = recovered['Recovered'], 
                    name = 'Recovored',
                    marker_color = 'green'
                    ))

fig.update_layout(
    title='Worldwide Corona Virus Cases - Confirmed, Deaths, Recovered (Bar Chart)',
    xaxis_tickfont_size=14,
    yaxis=dict(
        title='Number of Cases',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend = dict(
        x = 0, 
        y = 1.0,
        bgcolor = 'rgba(255, 255, 255, 0)',
        bordercolor = 'rgba(255, 255, 255, 0)'
    ),
        barmode = 'group',
        bargap = 0.15,
        bargroupgap = 0.1)
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x = confirmed['Date'],
                        y = confirmed['Confirmed'],
                        mode = 'lines+markers',
                        name = 'Confirmed',
                        line = dict(color = 'blue', width = 2)))
fig.add_trace(go.Scatter(x = deaths['Date'],
                        y = deaths['Deaths'],
                        mode = 'lines+markers',
                        name = 'Deaths',
                        line = dict(color = 'red', width = 2)))
fig.add_trace(go.Scatter(x = recovered['Date'],
                        y = recovered['Recovered'],
                        mode = 'lines+markers',
                        name = 'Recovered',
                        line = dict(color = 'green', width = 2)))

fig.update_layout(
    title = "WorldWide Corona Virus Cases - Confirmed, Deaths, Recovered (Line Chart)",
    xaxis_tickfont_size = 14,
    yaxis = dict(
        title = 'Number of Cases',
        titlefont_size = 16,
        tickfont_size = 14,
    ),
    legend = dict(
        x = 0,
        y = 1,
        bgcolor = 'rgba(255, 255, 255, 0)',
        bordercolor = 'rgba(255, 255, 255, 0)'
    )
)
fig.show()

In [None]:
df_confirmed = df_confirmed[["Province/State","Lat","Long","Country"]]
df_latlong = pd.merge(df, df_confirmed, on = ['Province/State', 'Country'])
#df_latlong.sort_values(by = ['Date', 'Last Update'], inplace = True)

In [None]:
fig = px.density_mapbox(df_latlong,
                       lat = 'Lat', 
                       lon = 'Long',
                       hover_name="Province/State", 
                        hover_data=["Confirmed","Deaths","Recovered"], 
                        animation_frame="Date",
                        color_continuous_scale="Portland",
                        radius=7, 
                        zoom=0,height=700)

fig.update_layout(title = 'Worldwide Corona Virus Cases Time Lapse - Confirmed, Deaths, Recovered',
                 font = dict(family = "Courier New, monospace",
                            size = 12,
                            color = "#7f7f7f"))

fig.update_layout(mapbox_style = 'open-street-map', mapbox_center_lon = 0)
fig.update_layout(margin = {'r':0,'t':0,"l":0,"b":0})

fig.show()

In [None]:
confirmed = df2.groupby(['Date', 'Country']).sum()[['Confirmed']].reset_index()
confirmed

In [None]:
deaths = df2.groupby(['Date', 'Country']).sum()[['Deaths']].reset_index()
deaths

In [None]:
recovered = df2.groupby(['Date', 'Country']).sum()[['Recovered']].reset_index()
recovered

In [None]:
latest_date = recovered['Date'].max()
latest_date

In [None]:
confirmed = confirmed[(confirmed['Date'] == latest_date)][['Country', 'Confirmed']]
confirmed

In [None]:
deaths = deaths[(deaths['Date'] == latest_date)][['Country', 'Deaths']]
deaths

In [None]:
recovered = recovered[(recovered['Date'] == latest_date)][['Country', 'Recovered']]
recovered

In [None]:
all_countries = confirmed['Country'].unique()
print('Number of countries/regions with cases: '+ str(len(all_countries)))
print("Countries/Regions with cases: ")
for i in all_countries:
    print(" " + str(i))

In [None]:
countries = {}
for country in pycountry.countries:
    countries[country.name] = country.alpha_3
    
countries

In [None]:
confirmed['iso_alpha'] = confirmed['Country'].map(countries.get)
deaths['iso_alpha'] = deaths['Country'].map(countries.get)
recovered['iso_alpha'] = recovered['Country'].map(countries.get)
recovered

In [None]:
plot_data_confirmed = confirmed[['iso_alpha', 'Confirmed', 'Country']]
plot_data_deaths = deaths[['iso_alpha', 'Deaths', 'Country']]
plot_data_recovered = recovered[['iso_alpha', 'Recovered', 'Country']]

In [None]:
fig = px.scatter_geo(plot_data_confirmed, locations = 'iso_alpha', color = 'Country',
                    hover_name = 'iso_alpha', size = "Confirmed", projection = "natural earth", 
                    title = 'Worldwide Confirmed Cases')
fig.show()

**Forecasting**

In [None]:
#Transforming data for forecasting

confirmed = df.groupby('Date').sum()['Confirmed'].reset_index()
deaths = df.groupby('Date').sum()['Deaths'].reset_index()
recovered = df.groupby('Date').sum()['Recovered'].reset_index()

In [None]:
confirmed.columns = ['ds', 'y']
confirmed['ds'] = pd.to_datetime(confirmed['ds'])

In [None]:
confirmed.head(15)

**Forecastiing Confirmed Cases Worldwide with Prophet (Baseline)**

In [None]:
m = Prophet(interval_width = 0.97)
m.fit(confirmed.tail(15))
future = m.make_future_dataframe(periods = 9)
future_confirmed = future.copy() #For non-baseline predictions later on
future

In [None]:
forecast = m.predict(future)
cols = ['yhat', 'yhat_lower', 'yhat_upper']
forecast[cols] = forecast[cols].applymap(np.int64)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(15)

In [None]:
confirmed_forecast_plot = m.plot(forecast)

**Forecasting Deaths Worldwide with Prophet (Baseline)**

In [None]:
deaths.columns = ['ds', 'y']
deaths['ds'] = pd.to_datetime(deaths['ds'])

In [None]:
m = Prophet(interval_width = 0.95)
m.fit(deaths)
future = m.make_future_dataframe(periods = 9)
future_deaths = future.copy() 
future.tail()

In [None]:
forecast = m.predict(future)
cols = ['yhat', 'yhat_lower', 'yhat_upper']
forecast[cols] = forecast[cols].applymap(np.int64)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(15)

In [None]:
deaths_forecast_plot = m.plot(forecast)

**Forecasting Recovered Cases Worldwide with Prophet (Worldwide)**

In [None]:
recovered.columns = ['ds', 'y']
recovered['ds'] = pd.to_datetime(recovered['ds'])

In [None]:
m = Prophet(interval_width = 0.98)
m.fit(recovered)
future = m.make_future_dataframe(periods = 9)
future_recovered = future.copy()
future.tail()

In [None]:
forecast = m.predict(future)
cols = ['yhat', 'yhat_lower', 'yhat_upper']
forecast[cols] = forecast[cols].applymap(np.int64)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(8)

In [None]:
recovered_forecast_plot = m.plot(forecast)