In [None]:
get_ipython().system(' pip install PyGithub')
get_ipython().run_line_magic('matplotlib', 'notebook')

In [None]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import datetime
from config import git_key

In [None]:
def corona_df(git_key, branch):
    from github import Github
    import requests
    import io
    # First create a Github instance  using username and password
    # g = Github("user", "password")
    # # or using an access token
    g = Github(git_key)
    repo = g.get_repo("CSSEGISandData/COVID-19")
    # URL for GET requests to retrieve coronavirus data
    contents = repo.get_contents(branch)
    df = pd.read_csv(io.StringIO(contents.decoded_content.decode('utf-8')))
    return df

In [None]:
# Pull "APIs" using defined formula above
confirmed_df = corona_df(git_key,"/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv")
deaths_df = corona_df(git_key,"/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv")
recovered_df = corona_df(git_key,"/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv")

In [None]:
confirmed_df.head()

In [None]:
#Define function for cleaning the data
def clean_df(df):
    
    df = df.fillna(value=0)                                          # Fill NaN with zero values
    df = df.drop(columns=[ "Lat", "Long"])                           # Drop "Lat" "Long" for raw number data
    df = df.groupby(['Country/Region'], as_index=False).agg('sum')   # Create groupby object for sorting by country/region and aggregate
    df = df.set_index(["Country/Region"])                            # Define new index
    df = df.astype(int)                                              # Set all values as integers
    df = df.sort_values(by=df.columns[-1], ascending=False)          # Sort by highest value of most recent recorded date

    return df

In [None]:
# Initiate new formula for all data sets and assign to variables
df_deaths_clean = clean_df(deaths_df)
df_confirmed_clean = clean_df(confirmed_df)
df_recovered_clean = clean_df(recovered_df)

In [None]:
# Define function to plot each line on a fig
def myplot(ax, x_axis, country, df):
    import random
    marker = ["o","1","2","3","4","s","p","h","x","+", "d"]
    ax.plot(x_axis, df.iloc[country,:], marker=random.choice(marker), linestyle="-", label=df.index[country])

# Define function to plot full plot with top 10 lines in a given df
def myplot_full(fig, ax, df, title):
    myplot(ax, df.columns, 1, df)
    myplot(ax, df.columns, 2, df)
    myplot(ax, df.columns, 3, df)
    myplot(ax, df.columns, 4, df)
    myplot(ax, df.columns, 5, df)
    myplot(ax, df.columns, 6, df)
    myplot(ax, df.columns, 7, df)
    myplot(ax, df.columns, 8, df)
    myplot(ax, df.columns, 9, df)
    myplot(ax, df.columns, 10, df)

    ax.set(xlabel="Date (MM/DD/YY)", ylabel="People", title=title)

    plt.xticks(df.columns, df.columns, rotation=90);
    ax.grid();
    ax.legend();

# Define fig saving function
def fig_save(fig, file_name):
    import os
    path = os.path.join("images", file_name)
    fig.savefig(path)

In [None]:
# Plot to show Mainland China Data over time
fig, ax = plt.subplots(figsize=(10,8))
ax.plot(df_deaths_clean.columns, df_confirmed_clean.loc["Mainland China",:],
        label="Mainland China - Confirmed", marker="o", color='b');
ax.plot(df_deaths_clean.columns, df_deaths_clean.loc["Mainland China",:],
        label="Mainland China - Deaths", marker="x", color='r');
ax.plot(df_deaths_clean.columns, df_recovered_clean.loc["Mainland China",:],
        label="Mainland China - Recovered", marker="^", color='g');
plt.xticks(df_deaths_clean.columns, df_deaths_clean.columns, rotation=90);
ax.legend();
ax.set(xlabel="Date (MM/DD/YY)", ylabel="People", title="Coronavirus Data in Mainland China")
ax.grid(b=None, axis="y")
plt.tight_layout()
fig_save(fig, "coronavirusdata_mainlandchina")

In [None]:
fig2, ax2 = plt.subplots(figsize=(10,8))
myplot_full(fig2, ax2, df_confirmed_clean, "Confirmed Cases of Coronavirus Outside of Mainland China (ROW)")
ax2.grid(b=None, axis="x")
plt.tight_layout()
fig_save(fig2, "confirmed_ROW")

In [None]:
fig3, ax3 = plt.subplots(figsize=(10,8))
myplot_full(fig3, ax3, df_deaths_clean, "Confirmed Deaths from Coronavirus Outside of Mainland China (ROW)")
ax3.grid(b=None, axis="x")
#ax3.set_yticks(np.arange(0, max(df_deaths_clean.iloc[:,"Country/Region"])+1, 1.0))
#ax3.set_yticks(np.arange(0, max(df_deaths_clean.columns[-1].loc)+1, 1.0))
ax3.set_yticks(np.arange(0, 10+1, 1.0))
plt.tight_layout()

fig_save(fig3, "deaths_ROW")

In [None]:
fig4, ax4 = plt.subplots(figsize=(10,8))
myplot_full(fig4, ax4, df_recovered_clean, "Recovered Cases of Coronavirus Outside of Mainland China (ROW)")
ax4.grid(b=None, axis="x")
plt.tight_layout()

fig_save(fig4, "recovered_ROW")