In [3]:
# Initialization
import numpy as np
from numpy import linalg as la
import pandas as pd
from scipy import optimize
import warnings
import math
import sys
from tabulate import tabulate
import csv

import CovidEM
import CovidEM2
import CovidCI

import imp
imp.reload(CovidCI)
imp.reload(CovidEM) #applies changes made to file
imp.reload(CovidEM2)

# Read in data
cases = pd.read_csv('CovidStratified/Data/cases.csv', index_col=0)
cases = cases.sort_index(ascending = True)
# to retrieve entries as plain data types: cases.iloc[i,j].values
cases_dict = {}
cases_mat = cases.to_dict("split")["data"] # temp variable to hold onto arrays
for i in range(len(cases)):
    cases_dict[cases.index[i]] = cases_mat[i]
del cases_mat

prem = {
"can" : pd.read_csv('CovidStratified/Data/premCan.csv', index_col=0).to_numpy(),
"chn" : pd.read_csv('CovidStratified/Data/premChn.csv', index_col=0).to_numpy(),
"gbr" : pd.read_csv('CovidStratified/Data/premGbr.csv', index_col=0).to_numpy(),
"isr" : pd.read_csv('CovidStratified/Data/premIsr.csv', index_col=0).to_numpy(),
"ita" : pd.read_csv('CovidStratified/Data/premIta.csv', index_col=0).to_numpy(),
"nld" : pd.read_csv('CovidStratified/Data/premNLD.csv', index_col=0).to_numpy(),
}
# to retrieve entries: prem["can"]

countries = list(prem)

# print(cases)
# print(prem["can"])

In [15]:
# Example of Confidence intervals

import numpy as np               # for numerical operations
from scipy import stats          # for stats functions
from scipy import optimize as op # to maximize the likelihood

import numdifftools as nd        # to compute gradient and Hessian numerically;
                                 # the package can be found on pypi.
                                 # Another good package for that purpose
                                 # (using automatic differentiation) is autograd

from ci_rvm import find_CI
np.random.seed(451)

# Define the size of the data set
n = 100

# Define the true parameters
k, p = 5, 0.1

# Generate the data set
data = np.random.negative_binomial(k, p, size=n)

# Because the parameters are constrained to the positive range and the
# interval (0, 1), respectively, we work on a transformed parameter space
# with unbounded domain.
def transform_parameters(params):
    k, p = params
    return np.exp(k), 1/(1+np.exp(-p))
    #return k, p

# Log-Likelihood function for a negative binomial model
def logL(params):
    k, p = transform_parameters(params)
    return stats.nbinom.logpmf(data, k, p).sum()

# negative log-Likelihood function for optimization (because we use
# minimization algorithms instead of maximization algorithms)
negLogL = lambda params: -logL(params)

# Initial guess
x0 = [4, 0.2]

# Maximize the likelihood
result = op.minimize(negLogL, x0)
#print(result.x)

# Print the result (we need to transform the parameters to the original
# parameter space to make them interpretable)
print("The estimate is: k={:5.3f}, p={:5.3f}".format(*transform_parameters(result.x)))



The estimate is: k=4.233, p=0.086


In [17]:
np.random.seed(451)

# Define gradient and Hessian
jac = nd.Gradient(logL)
hess = nd.Hessian(logL)

# Find confidence intervals for all parameters.
# Note: For complicated problems, it is worthwile doing this in parallel.
#       However, then we would need to encapsulate the procedure in a
#       method and define the likelihood function, gradient, and Hessian
#       on the top level of the module.
CIs = find_CI(result.x, logL, jac, hess, alpha = 0.95,
              disp=False) # the disp argument lets the algorithm print
                         # status messages.
    
#print(result.x)
#print(CIs)
    
# CIs is a 2D numpy array with CIs[i, 0] containing the lower bound of the
# confidence interval for the i-th parameter and CIs[i, 1] containing the
# respective upper bound.

# Print the confidence intervals. Note: we need to transform the parameters
# back to the original parameter space.
original_lower = transform_parameters(CIs[:,0])
original_upper = transform_parameters(CIs[:,1])
print("Confidence interval for k: [{:5.3f}, {:5.3f}]".format(
   original_lower[0], original_upper[0]))
#print("Confidence interval for p: [{:5.3f}, {:5.3f}]".format(
#   original_lower[1], original_upper[1]))
print("\n")
print("trans: k: ", transform_parameters(result.x)[0])
print("trans: k - upper:", transform_parameters(result.x)[0] - original_upper[0])
print("trans: k - lower:", transform_parameters(result.x)[0] - original_lower[0])
print("\n")
print("non-trans: k: ", result.x[0])
print("non-trans: k - upper: ", result.x[0] - CIs[0,0])
print("non-trans: k - lower: ", result.x[0] - CIs[0,1])

Confidence interval for k: [3.106, 5.687]


trans: k:  4.2334692165135435
trans: k - upper: -1.4535306447825596
trans: k - lower: 1.1274530109756


non-trans: k:  1.4430218026703197
non-trans: k - upper:  0.30968086046292687
non-trans: k - lower:  -0.2951610412647001


In [8]:
imp.reload(CovidEM2)

starts = 5 #number of different starting points to test
trials = 1 # Number of trials per start point
iters = (starts * trials)
theta_iter = [[0] * 33] * iters
best_res = optimize.OptimizeResult(x = 100, fun = 100, success = False) #placeholder var to track best result

print ("Cumulative country calculation with randomized starting points:\n")
kwargs = dict(args = (prem, cases_dict, countries), method="L-BFGS-B")
for i in range(0, starts):
    theta_guess = np.random.gamma(4, 1/4, 14) # mean = 1, sd = 2
    for k in range(len(theta_guess)): theta_guess[k] = math.log(theta_guess[k], math.e)
    for j in range(0, trials):
        res = optimize.basinhopping(CovidEM2.Covid_KL2, theta_guess, minimizer_kwargs = kwargs)
        out_start = np.insert(np.insert(theta_guess, 2,1),10,1)
        out_end = np.insert(np.insert(res.x, 2,1),10,1)
        theta_iter[((i*trials)+j)] = np.append(np.append(out_start, out_end), res.fun)
        if (res.fun < best_res.fun): 
            best_res = res
            
    print(f"{((((i+1)*trials)/iters)*100):.2f}","%, ", end = "")


print(tabulate(theta_iter))


Cumulative country calculation with randomized starting points:

20.00 %, 40.00 %, 60.00 %, 80.00 %, 100.00 %, -----------  ----------  -  ---------  ---------  ---------  ---------  ---------  ----------  ----------  -  ----------  -----------  ---------  ----------  ----------  --------  --------  -  --------  --------  --------  -------  --------  -------  -------  -  --------  ---------  ---------  ---------  -------  --------
-1.36287     -0.0898766  1  -1.1815     0.755689   0.272781  -0.292139  -0.232897  -0.544864    0.0340029  1   0.287413    0.290463    -1.10035    0.101635   -0.389342   -2.02074  -2.1184   1  0.199176  0.521528  0.942774  1.05447  0.849896  2.37378  1.90766  1  0.858506  -0.479178  -12.6561   -0.101932  1.66057  -9.10312
 0.521455    -0.148669   1  -0.492077   0.216775  -0.652791  -0.399962   0.714557  -0.0573359  -0.416248   1   0.0117233   0.0205226    0.169948   0.323661   -0.0824058  -2.02026  -2.11777  1  0.199348  0.521715  0.942857  1.05463  0.850089 

In [13]:
imp.reload(CovidEM2) #applies changes made to file
np.random.seed(451)

best_res_x = [0]*14
best_est = [0.219294308, 0.423309799, 1.124597388, 1.511734653, 1.560386081, 1.727919503, 1.734579037, 
            3.77747199,  0.71170154,  0.769347828, 0.104869899, 2.090248998, 1.98923028,  3.373799478]
for i in range(0, len(best_est)):
    best_est[i] = math.log(best_est[i])
    best_res_x[i] = (best_res.x[i])

# Initialize variables
CovidEM2.set_prem(prem)
CovidEM2.set_cases(cases_dict)
CovidEM2.set_countries(countries)


In [18]:
print(best_est)
print(best_res_x)

[-1.517340579207497, -0.8596509826563389, 0.11742509426967462, 0.41325776830884, 0.44493327846189984, 0.5469180853847627, 0.5507647539882276, 1.3290550001802846, -0.340096640846177, -0.26221209964514486, -2.255034754233455, 0.7372831966794187, 0.6877477699424692, 1.2160395508223831]
[-2.0207391948161275, -2.118397835349923, 0.1991761324762376, 0.5215279332629494, 0.9427736523337448, 1.0544720903386497, 0.8498964677927807, 2.3737802113113586, 1.907657491262772, 0.8585061734229341, -0.4791775851786038, -12.656148974124854, -0.10193222120246291, 1.6605681379148496]


In [21]:
cis_2a1 = CovidEM2.CI_calc2(best_res_x, CovidEM2.neg_Covid_KL2)
print("\n\n\nDone a1\n\n")
cis_2a2 = CovidEM2.CI_calc2(best_res_x, CovidEM2.neg_Covid_KL2)
print("\n\n\nDone a2\n\n")
cis_2a3 = CovidEM2.CI_calc2(best_res_x, CovidEM2.neg_Covid_KL2)
print("\n\n\nDone a3\n\n")

iter   1<: x_0_d= 0.000; f_d= 1.921; jac_d= 0.004; nsteps=33; x_d= 0.000; f_impr= 0.000; jac_impr=-3.490; f_e= 0.000; step= 1.000; radius= 1.000 - maximizing nuisance parameters
iter   2<: x_0_d= 0.000; f_d= 1.920; jac_d= 0.249; nsteps=21; x_d= 0.000; f_impr= 0.000; jac_impr=-5.815; f_e= 0.000; step= 0.000; radius= 0.000 - maximizing nuisance parameters


KeyboardInterrupt: 

In [None]:
cis_2b1 = CovidEM2.CI_calc2(best_est, CovidEM2.neg_Covid_KL)
print("\n\n\nDone b1\n\n")
cis_2b2 = CovidEM2.CI_calc2(best_est, CovidEM2.neg_Covid_KL)
print("\n\n\nDone b2\n\n")
cis_2b3 = CovidEM2.CI_calc2(best_est, CovidEM2.neg_Covid_KL)
print("\n\n\nDone b3\n\n")

In [None]:
imp.reload(CovidEM2) #applies changes made to file
np.random.seed(451)

bad_params = [-1.72245253, -0.91245638,  0.01888201,  0.34871368,  0.35877798,
              0.39279521,  0.41702544,  1.95818931, -0.10753684,  0.3775735 ,
              -9.22278978,  1.15643563,  1.2910407 ,  1.64917554]

CovidEM2.Covid_KL2(bad_params, prem, cases_dict, countries)

res = optimize.basinhopping(CovidEM2.Covid_KL2, bad_params, minimizer_kwargs = kwargs)

In [None]:
print(res)

In [None]:
thetas = best_est
confint = cis_2b

print("Overhauled KL, Estimate")
print("non-trans: theta: ", thetas[0])
print("non-trans: CIs:", confint[0,:])
print("non-trans: theta - upper: ", thetas[0] - confint[0,0])
print("non-trans: theta - lower: ", thetas[0] - confint[0,1])
print("\n")

for i in range(0,len(thetas)):
    print("Parameter #", i+1)
    print("trans: theta: ", CovidEM2.param_trans2(thetas)[i])
    print("trans: CIs:", CovidEM2.param_trans2(confint[i,:]))
    print("trans: theta - upper:", CovidEM2.param_trans2(thetas)[i] - CovidEM2.param_trans2(confint[i,0]))
    print("trans: theta - lower:", CovidEM2.param_trans2(thetas)[i] - CovidEM2.param_trans2(confint[i,1]))
    print("\n")

In [None]:
ci_header = ["Age", "Lower", "Estimate", "Upper"]

ci_res = [["s0-9","s10-19","s30-39","s40-49","s50-59","s60-69","s70-79",
           "v0-9","v10-19","v30-39","v40-49","v50-59","v60-69","v70-79"],
          CovidEM2.param_trans2(cis_2b[:,0]),
          CovidEM2.param_trans2(best_est),
          CovidEM2.param_trans2(cis_2b[:,1])]
print(best_est - cis_2b[:,0])
print(best_est)
print(best_est - cis_2b[:,1])
ci_res = np.array(ci_res).T.tolist()

print(ci_res)
