# Importing Packages and Loading JHU Global Data

In [201]:
import pandas as pd
import numpy as np
from os import __file__
import matplotlib.pyplot as plt
from matplotlib.pyplot import *
from lmfit.models import LorentzianModel

# Loading in the latest JHU COVID-19 data
confirmedTS_df = pd.read_csv('time_series_covid19_confirmed_global.csv')
deathsTS_df = pd.read_csv('time_series_covid19_deaths_global.csv')
recoveredTS_df = pd.read_csv('time_series_covid19_recovered_global.csv')

# Plotting the list of Countries

In [204]:
# Choosing countries of interest and plotting the data
myCC_li = ['China','Russia','Iran','Korea, South']

def CompileCountryData(myCC_li):
    """
    Input: list of countries of interest
    Output: a dictionary of dataframes containing lists of dataframes
    the different types of statistics for the COVID-19 pandemic for a 
    given country.
        Different Types of Statistics:
        - Confirmed Cases
        - Deaths
        - Recovered Cases
        - New Cases
    """
    myCCdata_dict = {}
    for CC in myCC_li:
        CCc_DF = confirmedTS_df[confirmedTS_df['Country/Region'] == CC]
        # Calculating the number of new cases per day based on the confirmed cases
        conf_shap = CCc_DF.shape
        if conf_shap[0] == 1:
            meta_slice_df = CCc_DF.iloc[:,:4]
            num_slice = CCc_DF.iloc[:,4:].values[0]
            date_columns = CCc_DF.iloc[:,4:].columns
            ind = CCc_DF.iloc[:,4:].index
            new_cases_ar = np.diff(num_slice)
            new_cases_ar = np.vstack(np.append(0,new_cases_ar)).T
            new_cases_df = pd.DataFrame(new_cases_ar,columns=date_columns,index=ind)
            new_cases_df = meta_slice_df.merge(new_cases_df,left_index=True,right_index=True)
        else:
            meta_slice_df = CCc_DF.iloc[:,:4]
            num_slice = CCc_DF.iloc[:,4:].values
            date_columns = CCc_DF.iloc[:,4:].columns
            ind = CCc_DF.iloc[:,4:].index
            new_cases_ar = np.gradient(num_slice)[1]
            new_cases_df = pd.DataFrame(new_cases_ar,columns=date_columns,index=ind)
            new_cases_df = meta_slice_df.merge(new_cases_df,left_index=True,right_index=True)
        # Loading the remainder of the data
        CCd_DF = deathsTS_df[deathsTS_df['Country/Region'] == CC]
        CCr_DF = recoveredTS_df[recoveredTS_df['Country/Region'] == CC]
        CCdata_li = [CCc_DF,CCd_DF,CCr_DF,new_cases_df]
        myCCdata_dict[CC] = CCdata_li
    return myCCdata_dict
        # Plots Below
#         for t in range(len(CCdata_li)):
#             if t == 0:
#                 fig, ax = subplots()
#                 plt.title(CC+' Confirmed Cases')
#                 plt.xlabel('Days')
#                 plt.ylabel('Cases')
#                 myCCdata_dict[CC][t].T[4:].plot(figsize=(15,10),logy=True,ax=ax)
#                 plt.legend(list(myCCdata_dict[CC][t].T.loc['Province/State']))
#             elif t == 1:
#                 fig, ax = subplots()
#                 plt.title(CC+' Deaths')
#                 plt.xlabel('Days')
#                 plt.ylabel('Cases')
#                 myCCdata_dict[CC][t].T[4:].plot(figsize=(15,10),logy=True,ax=ax)
#                 plt.legend(list(myCCdata_dict[CC][t].T.loc['Province/State']))
#             elif t == 2:
#                 fig, ax = subplots()
#                 plt.title(CC+' Recovered Cases')
#                 plt.xlabel('Days')
#                 plt.ylabel('Cases')
#                 myCCdata_dict[CC][t].T[4:].plot(figsize=(15,10),logy=True,ax=ax)
#                 plt.legend(list(myCCdata_dict[CC][t].T.loc['Province/State']))
#             else:
#                 fig, ax = subplots()
#                 plt.title(CC+' New Cases')
#                 plt.xlabel('Days')
#                 plt.ylabel('Cases')
#                 myCCdata_dict[CC][t].T[4:].plot(figsize=(15,10),logy=True,ax=ax)
#                 plt.legend(list(myCCdata_dict[CC][t].T.loc['Province/State']))

def GenerateStabilizationEstimate(myCCdata_dict,country):
    """
    Input: Dictionary of statistics based on each country from CompileCountryData()
    Output: DataFrame containing estimates for ranges
    of times that it took for each individual province to 
    stabilize
    """
    lrtz_mod = LorentzianModel()
    stpr_li = myCCdata_dict[country][0]['Province/State'].tolist()
    time_estimate_li = []
    for i in range(len(myCCdata_dict[country][0])):
        # creating arrays for the Confirmed cases and the New Cases
        countryConfirmed_ar = myCCdata_dict[country][0].iloc[:,4:].values[i]
        countryNewCases_ar = myCCdata_dict[country][3].iloc[:,4:].values[i]
    
#         fig, ax = subplots()
#         plt.title('China COVID-19 Trajectory')
#         plt.xlabel('Confirmed Cases')
#         plt.ylabel('New Cases')
#         ax.set_yscale('log')
#         ax.set_xscale('log')
#         plt.plot(countryConfirmed_ar,countryNewCases_ar)
    
        # creating Lorentzian fit
        params = lrtz_mod.guess(countryNewCases_ar,x=countryConfirmed_ar)
        result = lrtz_mod.fit(countryNewCases_ar,params,x=countryConfirmed_ar)
        fit_params_dict = result.params.valuesdict()
#         result.plot_fit()
#         plt.show()
#         print(result.fit_report())

        # obtaining an estimate of the date of stabilization
        stableCases_pt = fit_params_dict['center'] + 2.5*fit_params_dict['sigma']
        ind_scpt = np.where(countryConfirmed_ar == min(countryConfirmed_ar, key=lambda x:abs(x-stableCases_pt)))[0][0]
        date_li = list(myCCdata_dict[country][0].iloc[:,4:].columns)
        estimate_df = pd.DataFrame({'PROVINCE_STATE':stpr_li[i],'STABLE_CASES_NUM':stableCases_pt,'START_DATE':date_li[0],'STABLE_DATE':date_li[ind_scpt],'DAYS':ind_scpt+1},index=[0])
        time_estimate_li.append(estimate_df)
    
    compiled_df = pd.concat(time_estimate_li,axis=0).reset_index(drop=True)
    return compiled_df

myCCdata_dict = CompileCountryData(myCC_li)
compiled_df = GenerateStabilizationEstimate(myCCdata_dict,'China')
compiled_df.DAYS.mean()

35.21212121212121