Covid Cases Stratified

Author: Zachary Stanke


In [4]:
# Initialization
import numpy as np
from numpy import linalg as la
import pandas as pd
from scipy import optimize
import warnings
import math
import sys
from tabulate import tabulate
import csv

import CovidReg
import CovidEig

import imp
imp.reload(CovidReg) #applies changes made to file
imp.reload(CovidEig)


# Read in data
cases = pd.read_csv('CovidStratified/Data/cases.csv', index_col=0)
cases = cases.sort_index(ascending = True)
# to retrieve entries as plain data types: cases.iloc[i,j].values
cases_dict = {}
cases_mat = cases.to_dict("split")["data"] # temp variable to hold onto arrays
for i in range(len(cases)):
    cases_dict[cases.index[i]] = cases_mat[i]
del cases_mat

prem = {
"can" : pd.read_csv('CovidStratified/Data/premCan.csv', index_col=0).to_numpy(),
"chn" : pd.read_csv('CovidStratified/Data/premChn.csv', index_col=0).to_numpy(),
"gbr" : pd.read_csv('CovidStratified/Data/premGbr.csv', index_col=0).to_numpy(),
"isr" : pd.read_csv('CovidStratified/Data/premIsr.csv', index_col=0).to_numpy(),
"ita" : pd.read_csv('CovidStratified/Data/premIta.csv', index_col=0).to_numpy(),
"nld" : pd.read_csv('CovidStratified/Data/premNLD.csv', index_col=0).to_numpy(),
}
# to retrieve entries: prem["can"]

countries = list(prem)

print(cases)
print(prem["can"])


     x00  x10   x20    x30    x40    x50    x60    x70  x80plus
can   43   53   250    301    315    380    300    163      139
chn  416  549  3619   7600   8571  10008   8583   3918     1408
gbr  701  822  7299  10048  12319  15441  12081  14655    24325
isr  319  732  1865   1206   1051   1134    969    495      247
ita   63  118   511    819   1523   2480   2421   2849     2533
nld   71  214  1973   2152   2839   4719   3805   4531     6219
[[ 2.87087681  0.94357477  0.37358052  0.25317159  0.33586201  0.65584323
   1.03423059  0.92004231  0.4489512   0.29422722  0.31204529  0.25033322
   0.16140108  0.13253666  0.07109389  0.03790854]
 [ 0.86595357  4.4816395   0.88978665  0.25207481  0.15642724  0.40257766
   0.76881514  0.93100172  0.74227614  0.31622672  0.21051978  0.17382395
   0.14895517  0.11229653  0.04629893  0.03547422]
 [ 0.21203609  1.41985066  6.67547793  0.80122709  0.29643418  0.25208034
   0.46080405  0.71878219  0.96370292  0.5479558   0.28606516  0.14325162
   0.0

In [5]:
# Single Country Estimation
# the bounds
theta = np.array([1]*14)
xmin = [0] * 14
xmax = [100] * 14
my_bounds = [(low, high) for low, high in zip(xmin, xmax)]

print ("Canada:\n")
kwargs = dict(args = (prem["can"], cases_dict["can"]), method="L-BFGS-B")
print(optimize.basinhopping(CovidReg.Covid_reg_k, theta, minimizer_kwargs = kwargs))

print ("\n\nChina:\n")
kwargs = dict(args = (prem["chn"], cases_dict["chn"]), method="L-BFGS-B")
print(optimize.basinhopping(CovidReg.Covid_reg_k, theta, minimizer_kwargs = kwargs).x)

print ("\n\nBritain:\n")
kwargs = dict(args = (prem["gbr"], cases_dict["gbr"]), method="L-BFGS-B")
print(optimize.basinhopping(CovidReg.Covid_reg_k, theta, minimizer_kwargs = kwargs).x)

print ("\n\nIsreal:\n")
kwargs = dict(args = (prem["isr"], cases_dict["isr"]), method="L-BFGS-B")
print(optimize.basinhopping(CovidReg.Covid_reg_k, theta, minimizer_kwargs = kwargs).x)

print ("\n\nItaly:\n")
kwargs = dict(args = (prem["ita"], cases_dict["ita"]), method="L-BFGS-B")
print(optimize.basinhopping(CovidReg.Covid_reg_k, theta, minimizer_kwargs = kwargs).x)

print ("\n\nNetherlands:\n")
kwargs = dict(args = (prem["nld"], cases_dict["nld"]), method="L-BFGS-B")
res = optimize.basinhopping(CovidReg.Covid_reg_k, theta, minimizer_kwargs = kwargs)
print(res)
print(res.x)

Canada:



  df = fun(x) - f0


                        fun: 10.832571619562888
 lowest_optimization_result:       fun: 10.832571619562888
 hess_inv: <14x14 LbfgsInvHessProduct with dtype=float64>
      jac: array([ 2.9146177 , -2.18700898,  1.41031312,  0.75007219,  0.18244038,
        0.26247218, -3.13319541, -0.5402729 ,  0.65686532,  1.50133771,
        0.77176043, -0.42368189, -0.52814908, -0.59135719])
  message: 'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
     nfev: 240
      nit: 14
     njev: 16
   status: 0
  success: True
        x: array([0.36685275, 0.47002142, 1.23198705, 1.29677578, 1.20219416,
       1.97083328, 1.69242374, 3.57788714, 0.01181928, 0.50273644,
       0.85450912, 2.73105686, 1.01276821, 1.12311159])
                    message: ['requested number of basinhopping iterations completed successfully']
      minimization_failures: 53
                       nfev: 26085
                        nit: 100
                       njev: 1739
                          x: array([0.36685275, 0.47

In [6]:
# Cumulative Estimation

theta = np.array([1]*14)
# the bounds
#xmin = [0] * 14
#xmax = [100] * 14
#my_bounds = [(low, high) for low, high in zip(xmin, xmax)]

kwargs = dict(args = (prem, cases_dict, countries), method="L-BFGS-B")
res = optimize.basinhopping(CovidReg.Covid_reg, theta, minimizer_kwargs = kwargs)
print(res)

                        fun: 10802.17184926741
 lowest_optimization_result:       fun: 10802.17184926741
 hess_inv: <14x14 LbfgsInvHessProduct with dtype=float64>
      jac: array([-0.7727067 , -1.22363417, -0.20772859, -0.21664164, -0.10222721,
        0.04529284,  0.1033186 , -0.1196895 , -0.06384653, -0.28885552,
       -0.25138434, -0.086402  ,  0.06311893,  0.28994691])
  message: 'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
     nfev: 2415
      nit: 147
     njev: 161
   status: 0
  success: True
        x: array([0.1829133 , 0.29802888, 1.27577105, 1.80779712, 2.23734802,
       2.46055178, 2.38884949, 5.05246654, 1.64218323, 1.27292151,
       0.10865641, 1.01833569, 1.37152814, 3.09679293])
                    message: ['requested number of basinhopping iterations completed successfully']
      minimization_failures: 96
                       nfev: 33015
                        nit: 100
                       njev: 2201
                          x: array([0.1829133 , 0.2

In [5]:
cases

Unnamed: 0,x00,x10,x20,x30,x40,x50,x60,x70,x80plus
can,43,53,250,301,315,380,300,163,139
chn,416,549,3619,7600,8571,10008,8583,3918,1408
gbr,701,822,7299,10048,12319,15441,12081,14655,24325
isr,319,732,1865,1206,1051,1134,969,495,247
ita,63,118,511,819,1523,2480,2421,2849,2533
nld,71,214,1973,2152,2839,4719,3805,4531,6219


In [8]:
# Iterative Cumulative Estimations

imp.reload(CovidReg) #applies changes made to file
np.random.seed(451)

starts = 20 #number of different starting points to test
trials = 10 # Number of trials per start point
iters = (starts * trials)
theta_iter = [[0] * 33] * iters
best_res = optimize.OptimizeResult(x = 100, fun = 100, success = False) #placeholder var to track best result

print ("Cumulative country calculation with randomized starting points:\n")
kwargs = dict(args = (prem, cases_dict, countries), method="Nelder-Mead")
for i in range(0, starts):
    theta_guess = np.random.gamma(4, 1/4, 14) # mean = 1, sd = 2
    for j in range(0, trials):
        res = optimize.basinhopping(CovidReg.Covid_reg, theta_guess, minimizer_kwargs = kwargs)
        out_start = np.insert(np.insert(theta_guess, 2,1),10,1)
        out_end = np.insert(np.insert(res.x, 2,1),10,1)
        theta_iter[((i*trials)+j)] = np.append(np.append(out_start, out_end), res.fun)
        if (res.fun < best_res.fun): 
            best_res = res
            
    print(f"{((((i+1)*trials)/iters)*100):.2f}","%, ", end = "")


print(tabulate(theta_iter))


Cumulative country calculation with randomized starting points:



  np.max(np.abs(fsim[0] - fsim[1:])) <= fatol):


5.00 %, 10.00 %, 15.00 %, 20.00 %, 25.00 %, 30.00 %, 35.00 %, 40.00 %, 45.00 %, 50.00 %, 55.00 %, 60.00 %, 65.00 %, 70.00 %, 75.00 %, 80.00 %, 85.00 %, 90.00 %, 95.00 %, 100.00 %, --------  --------  -  --------  --------  --------  --------  --------  --------  --------  -  --------  --------  --------  --------  --------  ---------  --------  -  --------  --------  -------  -------  -------  -----------  -----------  -  -----------  -----------  -----------  -----------  -----------  -------
1.28832   1.58811   1  0.304628  0.654188  0.815713  1.31789   1.11245   0.323341  0.685253  1  0.720261  1.2793    0.841494  1.13993   2.12827   0.181556   0.299264  1  1.27561   1.81031   2.23683  2.45769  2.38871  5.13585      1.61761      1  1.26972      0.10432      1.01672      1.37155      3.08712      10802.3
1.28832   1.58811   1  0.304628  0.654188  0.815713  1.31789   1.11245   0.323341  0.685253  1  0.720261  1.2793    0.841494  1.13993   2.12827   0.184247   0.303968  1  1.28434   1.

In [7]:
# Lower error

imp.reload(CovidReg) #applies changes made to file
np.random.seed(451)

starts = 5 #number of different starting points to test
trials = 5 # Number of trials per start point
iters = (starts * trials)
theta_iter_min = [[0] * 33] * iters

print ("Cumulative country calculation with randomized starting points:\n")
kwargs = dict(args = (prem, cases_dict, countries, 2, 2, (42-((42*2)**0.5))/12), method="Nelder-Mead")
for i in range(0, starts):
    theta_guess = np.random.gamma(4, 1/4, 14) # mean = 1, sd = 2
    for j in range(0, trials):
        res = optimize.basinhopping(CovidReg.Covid_reg, theta_guess, minimizer_kwargs = kwargs)
        out_start = np.insert(np.insert(theta_guess, 2,1),10,1)
        out_end = np.insert(np.insert(res.x, 2,1),10,1)
        theta_iter_min[((i*trials)+j)] = np.append(np.append(out_start, out_end), res.fun)
            
    print(f"{((((i+1)*trials)/iters)*100):.2f}","%, ", end = "")
print(tabulate(theta_iter_min))

Cumulative country calculation with randomized starting points:



  np.max(np.abs(fsim[0] - fsim[1:])) <= fatol):


20.00 %, 40.00 %, 60.00 %, 80.00 %, 100.00 %, --------  --------  -  --------  --------  --------  --------  --------  --------  --------  -  --------  --------  --------  --------  -------  --------  --------  -  -------  -------  -------  -------  -------  -------  --------  -  --------  ---------  --------  -------  -------  -------
1.28832   1.58811   1  0.304628  0.654188  0.815713  1.31789   1.11245   0.323341  0.685253  1  0.720261  1.2793    0.841494  1.13993   2.12827  0.180939  0.299985  1  1.26959  1.80734  2.2214   2.43993  2.38561  5.17917  1.60126   1  1.30126   0.0884567  1.05112   1.38385  3.11819  10788.4
1.28832   1.58811   1  0.304628  0.654188  0.815713  1.31789   1.11245   0.323341  0.685253  1  0.720261  1.2793    0.841494  1.13993   2.12827  0.180814  0.302654  1  1.27057  1.80711  2.22228  2.45076  2.38601  5.23074  1.5605    1  1.28933   0.0967248  1.05693   1.37125  3.11783  10787.7
1.28832   1.58811   1  0.304628  0.654188  0.815713  1.31789   1.11245   0.323

In [8]:
#Upper error

imp.reload(CovidReg) #applies changes made to file
np.random.seed(451)

starts = 5 #number of different starting points to test
trials = 5 # Number of trials per start point
iters = (starts * trials)
theta_iter_max = [[0] * 33] * iters

print ("Cumulative country calculation with randomized starting points:\n")
kwargs = dict(args = (prem, cases_dict, countries, 2, 2, (42+((42*2)**0.5)/12)), method="Nelder-Mead")
for i in range(0, starts):
    theta_guess = np.random.gamma(4, 1/4, 14) # mean = 1, sd = 2
    for j in range(0, trials):
        res = optimize.basinhopping(CovidReg.Covid_reg, theta_guess, minimizer_kwargs = kwargs)
        out_start = np.insert(np.insert(theta_guess, 2,1),10,1)
        out_end = np.insert(np.insert(res.x, 2,1),10,1)
        theta_iter_max[((i*trials)+j)] = np.append(np.append(out_start, out_end), res.fun)
            
    print(f"{((((i+1)*trials)/iters)*100):.2f}","%, ", end = "")
print(tabulate(theta_iter_max))

Cumulative country calculation with randomized starting points:

20.00 %, 40.00 %, 60.00 %, 80.00 %, 100.00 %, --------  --------  -  --------  --------  --------  --------  --------  --------  --------  -  --------  --------  --------  --------  -------  --------  --------  -  -------  -------  -------  -------  -------  -----------  -----------  -  --------  --------  --------  --------  --------  -------
1.28832   1.58811   1  0.304628  0.654188  0.815713  1.31789   1.11245   0.323341  0.685253  1  0.720261  1.2793    0.841494  1.13993   2.12827  0.206502  0.279561  1  1.33613  1.84192  2.41792  2.60229  2.49193  3.27355      1.86007      1  1.03747   0.214161  0.592623  1.28217   2.60576   11043.1
1.28832   1.58811   1  0.304628  0.654188  0.815713  1.31789   1.11245   0.323341  0.685253  1  0.720261  1.2793    0.841494  1.13993   2.12827  0.204061  0.276197  1  1.32591  1.82643  2.40119  2.57569  2.46389  3.40012      1.94309      1  1.07268   0.22475   0.612493  1.33139   2.69807

In [None]:
#Save results

theta_header = ["s0-9i","s10-19i","s20-29i","s30-39i","s40-49i","s50-59i","s60-69i","s70-79i",
                "v0-9i","v10-19i","v20-29i","v30-39i","v40-49i","v50-59i","v60-69i","v70-79i",
                "s0-9","s10-19","s20-29","s30-39","s40-49","s50-59","s60-69","s70-79",
                "v0-9","v10-19","v20-29","v30-39","v40-49","v50-59","v60-69","v70-79", "Value"]


with open('CovidStratified/Output/ThetaIterationsMin.csv', 'w', encoding='UTF8', newline='') as f:
    writer = csv.writer(f)

    # write the header
    writer.writerow(theta_header)

    # write multiple rows
    writer.writerows(theta_iter_min)
    
f.close()
