# Logistic modeling of the spread of COVID-19

In [None]:
# Import packages
import pandas as pd
import datetime as dt

import numpy as np
from scipy import optimize

from matplotlib import pyplot as plt

from tqdm import tqdm

## Preprocessing

In [None]:
# Load data
# Data retrieved from https://github.com/CSSEGISandData/COVID-19
cases = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv')
recovered = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv')
deaths = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv')

In [None]:
# Drop US counties
ind_to_drop = cases['Province/State'].dropna().index[cases['Province/State'].dropna().str.contains(pat=', ')]
cases = cases.drop(ind_to_drop)
recovered = recovered.drop(ind_to_drop)
deaths = deaths.drop(ind_to_drop)

In [None]:
# Delete province and location
cases = cases.drop(columns=['Province/State', 'Lat', 'Long'])
recovered = recovered.drop(columns=['Province/State', 'Lat', 'Long'])
deaths = deaths.drop(columns=['Province/State', 'Lat', 'Long'])

In [None]:
# Group by country
cases = cases.groupby('Country/Region').sum()
recovered = recovered.groupby('Country/Region').sum()
deaths = deaths.groupby('Country/Region').sum()

## Regression

In [None]:
results = pd.DataFrame(columns=['Current cases', 'Total cases', 'Inflection point', 'Max new cases per day',
                                'Current deaths', 'Total deaths', 'Death ratio (%)'], 
                       index=cases.index)

for ind in tqdm(cases.index):
    s = cases.loc[ind]
    date = [dt.datetime.strptime(ind, '%m/%d/%y').date() for ind in s.index]
    date_int = np.array([(d - date[0]).days for d in date])
    cases_np = np.array(s)
    deaths_np = np.array(deaths.loc[ind])

    def logistic_fit(w, t, y):
        return w[0] / (1 + np.exp(-w[1] * (t - w[2]))) - y
    def logistic(w, t):
        return w[0] / (1 + np.exp(-w[1] * (t - w[2])))

    opt_cases = optimize.least_squares(logistic_fit, [100000, 0.1, 20], args=(date_int, cases_np))
    opt_deaths = optimize.least_squares(logistic_fit, [1000, 0.1, 20], args=(date_int, deaths_np))
    
    results['Current cases'].loc[ind] = cases_np[-1]
    results['Current deaths'].loc[ind] = deaths_np[-1]
    results['Total cases'].loc[ind] = np.round(opt_cases.x[0]).astype(np.int)
    results['Inflection point'].loc[ind] = (date[0] + dt.timedelta(days=opt_cases.x[2])).strftime('%m/%d/%y')
    results['Max new cases per day'].loc[ind] = np.round(opt_cases.x[0] * opt_cases.x[1] / 4).astype(np.int)
    results['Total deaths'].loc[ind] = np.abs(np.round(opt_deaths.x[0])).astype(np.int)
    r = np.abs(np.round(opt_deaths.x[0] / opt_cases.x[0] * 100, 2))
    results['Death ratio (%)'].loc[ind] = r if r > 1e-4 else 0

results_sorted = results.sort_values(by='Current cases', ascending=False)

## Save results

In [None]:
results_sorted.to_csv(r'logistic-results.csv')