Covid Cases Stratified

Author: Zachary Stanke


In [3]:
# Initialization
import numpy as np
from numpy import linalg as la
import pandas as pd
from scipy import optimize
import CovidEM
import CovidCI
import warnings
import math
import sys
from tabulate import tabulate
import csv

import imp
imp.reload(CovidEM) #applies changes made to file

# Read in data
cases = pd.read_csv('CovidStratified/Data/cases.csv', index_col=0)
cases = cases.sort_index(ascending = True)
# to retrieve entries as plain data types: cases.iloc[i,j].values
cases_dict = {}
cases_mat = cases.to_dict("split")["data"] # temp variable to hold onto arrays
for i in range(len(cases)):
    cases_dict[cases.index[i]] = cases_mat[i]
del cases_mat

prem = {
"can" : pd.read_csv('CovidStratified/Data/premCan.csv', index_col=0).to_numpy(),
"chn" : pd.read_csv('CovidStratified/Data/premChn.csv', index_col=0).to_numpy(),
"gbr" : pd.read_csv('CovidStratified/Data/premGbr.csv', index_col=0).to_numpy(),
"isr" : pd.read_csv('CovidStratified/Data/premIsr.csv', index_col=0).to_numpy(),
"ita" : pd.read_csv('CovidStratified/Data/premIta.csv', index_col=0).to_numpy(),
"nld" : pd.read_csv('CovidStratified/Data/premNLD.csv', index_col=0).to_numpy(),
}
# to retrieve entries: prem["can"]

countries = list(prem)

print(cases)
print(prem["can"])


     x00  x10   x20    x30    x40    x50    x60    x70  x80plus
can   43   53   250    301    315    380    300    163      139
chn  416  549  3619   7600   8571  10008   8583   3918     1408
gbr  701  822  7299  10048  12319  15441  12081  14655    24325
isr  319  732  1865   1206   1051   1134    969    495      247
ita   63  118   511    819   1523   2480   2421   2849     2533
nld   71  214  1973   2152   2839   4719   3805   4531     6219
[[ 2.87087681  0.94357477  0.37358052  0.25317159  0.33586201  0.65584323
   1.03423059  0.92004231  0.4489512   0.29422722  0.31204529  0.25033322
   0.16140108  0.13253666  0.07109389  0.03790854]
 [ 0.86595357  4.4816395   0.88978665  0.25207481  0.15642724  0.40257766
   0.76881514  0.93100172  0.74227614  0.31622672  0.21051978  0.17382395
   0.14895517  0.11229653  0.04629893  0.03547422]
 [ 0.21203609  1.41985066  6.67547793  0.80122709  0.29643418  0.25208034
   0.46080405  0.71878219  0.96370292  0.5479558   0.28606516  0.14325162
   0.0

In [None]:
from math import log2, isclose

# Testing
print(cases_dict["can"])
print(CovidEM.case_dim_split(cases_dict["can"]))

print(CovidEM._test_kl_div())

In [None]:
# SCV Eig testing
imp.reload(CovidEM) #applies changes made to file

s = np.array([1]*7)
v = np.array([1]*7)
print("Canada")
print(CovidEM.scv_eig(s, prem["can"], v, debug = True))
print("Britain")
print(CovidEM.scv_eig(s, prem["gbr"], v))

In [None]:
theta = np.array([1]*14)

print("Canada: ",CovidEM.Covid_KL_k(theta, prem["can"], cases_dict["can"]))
print("China: ",CovidEM.Covid_KL_k(theta, prem["chn"], cases_dict["chn"]))
print("Britain: ",CovidEM.Covid_KL_k(theta, prem["gbr"], cases_dict["gbr"]))
print("Isreal: ",CovidEM.Covid_KL_k(theta, prem["isr"], cases_dict["isr"]))
print("Italy: ",CovidEM.Covid_KL_k(theta, prem["ita"], cases_dict["ita"]))
print("Netherlands: ",CovidEM.Covid_KL_k(theta, prem["nld"], cases_dict["nld"]))

In [None]:
# the bounds
theta = np.array([1]*14)
xmin = [0] * 14
xmax = [100] * 14
my_bounds = [(low, high) for low, high in zip(xmin, xmax)]

print ("Canada:\n")
kwargs = dict(args = (prem["can"], cases_dict["can"]), method="L-BFGS-B")
print(optimize.basinhopping(CovidEM.Covid_KL_k, theta, minimizer_kwargs = kwargs))

print ("\n\nChina:\n")
kwargs = dict(args = (prem["chn"], cases_dict["chn"]), method="L-BFGS-B")
print(optimize.basinhopping(CovidEM.Covid_KL_k, theta, minimizer_kwargs = kwargs).x)

print ("\n\nBritain:\n")
kwargs = dict(args = (prem["gbr"], cases_dict["gbr"]), method="L-BFGS-B")
print(optimize.basinhopping(CovidEM.Covid_KL_k, theta, minimizer_kwargs = kwargs).x)

print ("\n\nIsreal:\n")
kwargs = dict(args = (prem["isr"], cases_dict["isr"]), method="L-BFGS-B")
print(optimize.basinhopping(CovidEM.Covid_KL_k, theta, minimizer_kwargs = kwargs).x)

print ("\n\nItaly:\n")
kwargs = dict(args = (prem["ita"], cases_dict["ita"]), method="L-BFGS-B")
print(optimize.basinhopping(CovidEM.Covid_KL_k, theta, minimizer_kwargs = kwargs).x)

print ("\n\nNetherlands:\n")
kwargs = dict(args = (prem["nld"], cases_dict["nld"]), method="L-BFGS-B")
res = optimize.basinhopping(CovidEM.Covid_KL_k, theta, minimizer_kwargs = kwargs)
print(res)
print(res.x)

In [None]:
theta = np.array([1]*14)
# the bounds
#xmin = [0] * 14
#xmax = [100] * 14
#my_bounds = [(low, high) for low, high in zip(xmin, xmax)]

kwargs = dict(args = (prem, cases_dict, countries), method="L-BFGS-B")
res = optimize.basinhopping(CovidEM.Covid_KL, theta, minimizer_kwargs = kwargs)
print(res)

In [47]:
imp.reload(CovidEM) #applies changes made to file
np.random.seed(314)

starts = 30 #number of different starting points to test
trials = 1 # Number of trials per start point
iters = (starts * trials)
theta_iter = [[0] * 33] * iters
best_res = optimize.OptimizeResult(x = 100, fun = 100, success = False) #placeholder var to track best result

print ("Cumulative country calculation with randomized starting points:\n")
kwargs = dict(args = (prem, cases_dict, countries), method="L-BFGS-B")
for i in range(0, starts):
    theta_guess = np.random.gamma(4, 1/4, 14) # mean = 1, sd = 2
    for j in range(0, trials):
        res = optimize.basinhopping(CovidEM.Covid_KL, theta_guess, minimizer_kwargs = kwargs)
        out_start = np.insert(np.insert(theta_guess, 2,1),10,1)
        out_end = np.insert(np.insert(res.x, 2,1),10,1)
        theta_iter[((i*trials)+j)] = np.append(np.append(out_start, out_end), res.fun)
        if (res.fun < best_res.fun): 
            best_res = res
            
    print(f"{((((i+1)*trials)/iters)*100):.2f}","%, ", end = "")


print(tabulate(theta_iter))


Cumulative country calculation with randomized starting points:

3.33 %, 6.67 %, 10.00 %, 13.33 %, 16.67 %, 20.00 %, 23.33 %, 26.67 %, 30.00 %, 33.33 %, 36.67 %, 40.00 %, 43.33 %, 46.67 %, 50.00 %, 53.33 %, 56.67 %, 60.00 %, 63.33 %, 66.67 %, 70.00 %, 73.33 %, 76.67 %, 80.00 %, 83.33 %, 86.67 %, 90.00 %, 93.33 %, 96.67 %, 100.00 %, --------  --------  -  --------  --------  --------  --------  --------  --------  --------  -  --------  --------  --------  --------  --------  --------  --------  -  -------  -------  -------  -------  -------  --------  --------  -  --------  -----------  --------  -------  -------  --------
0.998495  1.34427   1  0.539092  1.41001   0.594334  1.39496   0.698123  1.3334    1.96764   1  2.02593   0.28212   0.644512  0.999552  1.00626   0.301837  0.321736  1  1.22647  1.59667  1.69504  1.82554  1.92292  1.0771    1.99063   1  0.757177  0.0111676    1.75233   2.05714  2.71366  0.322877
1.21572   1.28884   1  1.67027   1.0636    0.859488  1.06455   1.07523  

In [None]:
imp.reload(CovidEM) #applies changes made to file
np.random.seed(451)

best_est = [0.219294308, 0.423309799, 1.124597388, 1.511734653, 1.560386081,
            1.727919503, 1.734579037, 3.77747199,  0.71170154,  0.769347828,
            0.104869899, 2.090248998, 1.98923028,  3.373799478]

if(True): # Log transform parameter estimates?
    for i in range(0, len(best_est)):
        best_est[i] = math.log(best_est[i])

# Initialize variables
CovidCI.set_prem(prem)
CovidCI.set_cases(cases_dict)
CovidCI.set_countries(countries)

cis1 = CovidCI.CI_calc(best_est, CovidEM.neg_Covid_KL)
print("\n\n\nDone 1\n\n\n")
#cis2 = CovidCI.CI_calc(best_est, CovidEM.pos_Covid_KL)
#print("\n\n\nDone 2\n\n\n")

In [None]:
thetas = best_est
confint = cis1

print("Overhauled KL, Estimate")
print("non-trans: theta: ", thetas[0])
print("non-trans: CIs:", confint[0,:])
print("non-trans: theta - upper: ", thetas[0] - confint[0,0])
print("non-trans: theta - lower: ", thetas[0] - confint[0,1])
print("\n")

for i in range(0,len(thetas)):
    print("Parameter #", i+1)
    print("trans: theta: ", CovidEM2.param_trans2(thetas)[i])
    print("trans: CIs:", CovidEM2.param_trans2(confint[i,:]))
    print("trans: theta - upper:", CovidEM2.param_trans2(thetas)[i] - CovidEM2.param_trans2(confint[i,0]))
    print("trans: theta - lower:", CovidEM2.param_trans2(thetas)[i] - CovidEM2.param_trans2(confint[i,1]))
    print("\n")

In [None]:
ci_header = ["Age", "Lower", "Estimate", "Upper"]

ci_res = [["s0-9","s10-19","s30-39","s40-49","s50-59","s60-69","s70-79",
           "v0-9","v10-19","v30-39","v40-49","v50-59","v60-69","v70-79"],
          cis[:,0],
          best_est,
          cis[:,1]]
print(best_est - cis[:,0])
print(best_est)
print(best_est - cis[:,1])
ci_res = np.array(ci_res).T.tolist()

print(ci_res)


In [None]:
with open('CovidStratified/Output/CIEstimates.csv', 'w', encoding='UTF8', newline='') as f:
    writer = csv.writer(f)

    # write the header
    writer.writerow(ci_header)

    # write multiple rows
    writer.writerows(ci_res)
    
f.close()

In [None]:
# Example of Confidence intervals

import numpy as np               # for numerical operations
from scipy import stats          # for stats functions
from scipy import optimize as op # to maximize the likelihood

import numdifftools as nd        # to compute gradient and Hessian numerically;
                                 # the package can be found on pypi.
                                 # Another good package for that purpose
                                 # (using automatic differentiation) is autograd

from ci_rvm import find_CI

# Define the size of the data set
n = 100

# Define the true parameters
k, p = 5, 0.1

# Generate the data set
data = np.random.negative_binomial(k, p, size=n)

# Because the parameters are constrained to the positive range and the
# interval (0, 1), respectively, we work on a transformed parameter space
# with unbounded domain.
def transform_parameters(params):
   k, p = params
   return np.exp(k), 1/(1+np.exp(-p))

# Log-Likelihood function for a negative binomial model
def logL(params):
    k, p = transform_parameters(params)
    return stats.nbinom.logpmf(data, k, p).sum()

# negative log-Likelihood function for optimization (because we use
# minimization algorithms instead of maximization algorithms)
negLogL = lambda params: -logL(params)

# Initial guess
x0 = [0, 0]

# Maximize the likelihood
result = op.minimize(negLogL, x0)
print(result.x)

# Print the result (we need to transform the parameters to the original
# parameter space to make them interpretable)
print("The estimate is: k={:5.3f}, p={:5.3f}".format(*transform_parameters(result.x)))

# Define gradient and Hessian
jac = nd.Gradient(logL)
hess = nd.Hessian(logL)

# Find confidence intervals for all parameters.
# Note: For complicated problems, it is worthwile doing this in parallel.
#       However, then we would need to encapsulate the procedure in a
#       method and define the likelihood function, gradient, and Hessian
#       on the top level of the module.
CIs = find_CI(result.x, logL, jac, hess,
              disp=True) # the disp argument lets the algorithm print
                         # status messages.
    
print(result.x)
print(CIs)
    
# CIs is a 2D numpy array with CIs[i, 0] containing the lower bound of the
# confidence interval for the i-th parameter and CIs[i, 1] containing the
# respective upper bound.

# Print the confidence intervals. Note: we need to transform the parameters
# back to the original parameter space.
original_lower = transform_parameters(CIs[:,0])
original_upper = transform_parameters(CIs[:,1])
print("Confidence interval for k: [{:5.3f}, {:5.3f}]".format(
   original_lower[0], original_upper[0]))
print("Confidence interval for p: [{:5.3f}, {:5.3f}]".format(
   original_lower[1], original_upper[1]))

In [None]:
# Example of basin hopping

import numpy as np
from scipy import optimize

# an example function with multiple minima
def f(x): return x.dot(x) + np.sin(np.linalg.norm(x) * np.pi)

# the starting point
x0 = [10., 10.]

# the bounds
xmin = [1., 1.]
xmax = [11., 11.]

# rewrite the bounds in the way required by L-BFGS-B
bounds = [(low, high) for low, high in zip(xmin, xmax)]

# use method L-BFGS-B because the problem is smooth and bounded
minimizer_kwargs = dict(method="L-BFGS-B", bounds=bounds)
res = optimize.basinhopping(f, x0, minimizer_kwargs=minimizer_kwargs)
print(res)

