In [1]:
get_ipython().system(' pip install PyGithub')
get_ipython().run_line_magic('matplotlib', 'notebook')



In [2]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import datetime

In [3]:
def corona_df(git_key, branch):
    from github import Github
    import requests
    import io
    # First create a Github instance  using username and password
    # g = Github("user", "password")
    # # or using an access token
    g = Github(git_key)

    repo = g.get_repo("CSSEGISandData/2019-nCoV")

    # URL for GET requests to retrieve coronavirus data
    contents = repo.get_contents(branch)

    df = pd.read_csv(io.StringIO(contents.decoded_content.decode('utf-8')))

    return df

In [4]:
# Pull "APIs" using defined formula above
confirmed_df = corona_df("af8f023aa45495d6296d450b36cd2fb83e236a72","/time_series/time_series_2019-ncov-Confirmed.csv")
deaths_df = corona_df("af8f023aa45495d6296d450b36cd2fb83e236a72","/time_series/time_series_2019-ncov-Deaths.csv")
recovered_df = corona_df("af8f023aa45495d6296d450b36cd2fb83e236a72","/time_series/time_series_2019-ncov-Recovered.csv")

In [5]:
# Create list of dates based upon column headers

dates = []                        #Empty list to store dates
columns = list(deaths_df)         #List of headers from df
del columns[0:4]                  #Delete unnecessary column headers
for i in columns:                 #Iterate through newly created list and...
    i = i[:7]                          #Shorten date
    dates.append(i)                    #Append to dates list


In [6]:
#Define function for cleaning the data
def clean_df(df):
    df = df.fillna(value=0)                                          # Fill NaN with zero values
    df = df.drop(columns=[ "Lat", "Long"])                           # Drop "Lat" "Long" for raw number data
    df = df.groupby(['Country/Region'], as_index=False).agg('sum')   # Create groupby object for sorting by country/region and aggregate
    df = df.set_index(["Country/Region"])                            # Define new index
    df = df.astype(int)                                              # Set all values as integers
    df = df.sort_values(by=df.columns[-1], ascending=False)          # Sort by highest value of most recent recorded date
    return df

In [7]:
# Initiate new formula for all data sets and assign to variables
df_deaths_clean = clean_df(deaths_df)
df_confirmed_clean = clean_df(confirmed_df)
df_recovered_clean = clean_df(recovered_df)

In [8]:
# Define function to plot each line on a fig
def myplot(ax, x_axis, country, df):
    import random
    marker = ["o","1","2","3","4","s","p","h","x","+", "d"]
    ax.plot(x_axis, df.iloc[country,:], marker=random.choice(marker), linestyle="--", label=df.index[country])

# Define function to plot full plot with top 10 lines in a given df
def myplot_full(fig, ax, df, title):
    myplot(ax, dates, 1, df)
    myplot(ax, dates, 2, df)
    myplot(ax, dates, 3, df)
    myplot(ax, dates, 4, df)
    myplot(ax, dates, 5, df)
    myplot(ax, dates, 6, df)
    myplot(ax, dates, 7, df)
    myplot(ax, dates, 8, df)
    myplot(ax, dates, 9, df)
    myplot(ax, dates, 10, df)

    ax.set(xlabel="Days (MM/DD/YY)", ylabel="People", title=title)

    plt.xticks(dates, dates, rotation=90);
    ax.grid();
    ax.legend();

# Define fig saving function
def fig_save(fig, file_name):
    import os
    path = os.path.join("images", file_name)
    fig.savefig(path)

In [9]:
# Plot to show Mainland China Data over time
fig, ax = plt.subplots(figsize=(10,8))
ax.plot(dates, df_confirmed_clean.loc["Mainland China",:], label="Mainland China - Confirmed", marker="o");
ax.plot(dates, df_deaths_clean.loc["Mainland China",:], label="Mainland China - Deaths", marker="+");
ax.plot(dates, df_recovered_clean.loc["Mainland China",:], label="Mainland China - Recovered", marker=">");
plt.xticks(dates, dates, rotation=90);
ax.legend();
ax.set(xlabel="Days (MM/DD/YY)", ylabel="People", title="Coronavirus Data in Mainland China")

fig_save(fig, "coronavirusdata_mainlandchina")

<IPython.core.display.Javascript object>

In [10]:
fig2, ax2 = plt.subplots(figsize=(10,8))
myplot_full(fig2, ax2, df_confirmed_clean, "Confirmed Cases of Corona Virus (Excluding Mainland China)")
fig_save(fig2, "confirmed_othercountries")

<IPython.core.display.Javascript object>

In [11]:
fig3, ax3 = plt.subplots(figsize=(10,8))
myplot_full(fig3, ax3, df_deaths_clean, "Confirmed Deaths from Corona Virus (Excluding Mainland China)")
fig_save(fig3, "deaths_othercountries")

<IPython.core.display.Javascript object>

In [12]:
fig4, ax4 = plt.subplots(figsize=(10,8))
myplot_full(fig4, ax4, df_recovered_clean, "Recovered Cases of Corona Virus (Excluding Mainland China)")
fig_save(fig4, "recovered_othercountries")

<IPython.core.display.Javascript object>