In [1]:
get_ipython().system(' pip install PyGithub')
get_ipython().run_line_magic('matplotlib', 'notebook')



In [2]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import datetime
from config import git_key

In [3]:
def corona_df(git_key, branch):
    from github import Github
    import requests
    import io
    # First create a Github instance  using username and password
    # g = Github("user", "password")
    # # or using an access token
    g = Github(git_key)
    repo = g.get_repo("CSSEGISandData/COVID-19")
    # URL for GET requests to retrieve coronavirus data
    contents = repo.get_contents(branch)
    df = pd.read_csv(io.StringIO(contents.decoded_content.decode('utf-8')))
    return df

In [4]:
# Pull "APIs" using defined formula above
confirmed_df = corona_df(git_key,"/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv")
deaths_df = corona_df(git_key,"/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv")
recovered_df = corona_df(git_key,"/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv")

In [5]:
confirmed_df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,2/10/20,2/11/20,2/12/20,2/13/20,2/14/20,2/15/20,2/16/20,2/17/20,2/18/20,2/19/20
0,Anhui,Mainland China,31.82571,117.2264,1,9,15,39,60,70,...,830,860,889,910,934,950,962,973,982,986
1,Beijing,Mainland China,40.18238,116.4142,14,22,36,41,68,80,...,337,342,352,366,372,375,380,381,387,393
2,Chongqing,Mainland China,30.05718,107.874,6,9,27,57,75,110,...,486,505,518,529,537,544,551,553,555,560
3,Fujian,Mainland China,26.07783,117.9895,1,5,10,18,35,59,...,261,267,272,279,281,285,287,290,292,293
4,Gansu,Mainland China,36.0611,103.8343,0,2,2,4,7,14,...,83,86,87,90,90,90,90,91,91,91


In [6]:
#Define function for cleaning the data
def clean_df(df):
    
    df = df.fillna(value=0)                                          # Fill NaN with zero values
    df = df.drop(columns=[ "Lat", "Long"])                           # Drop "Lat" "Long" for raw number data
    df = df.groupby(['Country/Region'], as_index=False).agg('sum')   # Create groupby object for sorting by country/region and aggregate
    df = df.set_index(["Country/Region"])                            # Define new index
    df = df.astype(int)                                              # Set all values as integers
    df = df.sort_values(by=df.columns[-1], ascending=False)          # Sort by highest value of most recent recorded date

    return df

In [7]:
# Initiate new formula for all data sets and assign to variables
df_deaths_clean = clean_df(deaths_df)
df_confirmed_clean = clean_df(confirmed_df)
df_recovered_clean = clean_df(recovered_df)

In [8]:
# Define function to plot each line on a fig
def myplot(ax, x_axis, country, df):
    import random
    marker = ["o","1","2","3","4","s","p","h","x","+", "d"]
    ax.plot(x_axis, df.iloc[country,:], marker=random.choice(marker), linestyle="-", label=df.index[country])

# Define function to plot full plot with top 10 lines in a given df
def myplot_full(fig, ax, df, title):
    myplot(ax, df.columns, 1, df)
    myplot(ax, df.columns, 2, df)
    myplot(ax, df.columns, 3, df)
    myplot(ax, df.columns, 4, df)
    myplot(ax, df.columns, 5, df)
    myplot(ax, df.columns, 6, df)
    myplot(ax, df.columns, 7, df)
    myplot(ax, df.columns, 8, df)
    myplot(ax, df.columns, 9, df)
    myplot(ax, df.columns, 10, df)

    ax.set(xlabel="Days (MM/DD/YY)", ylabel="People", title=title)

    plt.xticks(df.columns, df.columns, rotation=90);
    ax.grid();
    ax.legend();

# Define fig saving function
def fig_save(fig, file_name):
    import os
    path = os.path.join("images", file_name)
    fig.savefig(path)

In [13]:
# Plot to show Mainland China Data over time
fig, ax = plt.subplots(figsize=(10,8))
ax.plot(df_deaths_clean.columns, df_confirmed_clean.loc["Mainland China",:],
        label="Mainland China - Confirmed", marker="o", color='b');
ax.plot(df_deaths_clean.columns, df_deaths_clean.loc["Mainland China",:],
        label="Mainland China - Deaths", marker="x", color='r');
ax.plot(df_deaths_clean.columns, df_recovered_clean.loc["Mainland China",:],
        label="Mainland China - Recovered", marker="^", color='g');
plt.xticks(df_deaths_clean.columns, df_deaths_clean.columns, rotation=90);
ax.legend();
ax.set(xlabel="Days (MM/DD/YY)", ylabel="People", title="Coronavirus Data in Mainland China")
ax.grid(b=None, axis="y")
fig_save(fig, "coronavirusdata_mainlandchina")

<IPython.core.display.Javascript object>

In [10]:
fig2, ax2 = plt.subplots(figsize=(10,8))
myplot_full(fig2, ax2, df_confirmed_clean, "Confirmed Cases of Corona Virus (Excluding Mainland China)")
ax2.grid(b=None, axis="x")
fig_save(fig2, "confirmed_othercountries")

<IPython.core.display.Javascript object>

In [11]:
fig3, ax3 = plt.subplots(figsize=(10,8))
myplot_full(fig3, ax3, df_deaths_clean, "Confirmed Deaths from Corona Virus (Excluding Mainland China)")
ax3.grid(b=None, axis="x")
#ax3.set_yticks(np.arange(0, max(df_deaths_clean.iloc[:,"Country/Region"])+1, 1.0))
#ax3.set_yticks(np.arange(0, max(df_deaths_clean.columns[-1].loc)+1, 1.0))
ax3.set_yticks(np.arange(0, 10+1, 1.0))

fig_save(fig3, "deaths_othercountries")

<IPython.core.display.Javascript object>

In [12]:
fig4, ax4 = plt.subplots(figsize=(10,8))
myplot_full(fig4, ax4, df_recovered_clean, "Recovered Cases of Corona Virus (Excluding Mainland China)")
ax4.grid(b=None, axis="x")
fig_save(fig4, "recovered_othercountries")

<IPython.core.display.Javascript object>