# Check fast cholesky updates sufficiently approximate the original

In [1]:
# load packages
using Revise
using Knockoffs
using LinearAlgebra
using Random
using StatsBase
using Statistics
using ToeplitzMatrices
using CSV, DataFrames
using BlockDiagonals
using Distributions

┌ Info: Precompiling Knockoffs [878bf26d-0c49-448a-9df5-b057c815d613]
└ @ Base loading.jl:1342


## Data from gnomAD

In [4]:
datadir = "/Users/biona001/Benjamin_Folder/research/4th_project_PRS/group_knockoff_test_data"
mapfile = CSV.read(joinpath(datadir, "Groups_2_127374341_128034347.txt"), DataFrame)
groups = mapfile[!, :group]
covfile = CSV.read(joinpath(datadir, "CorG_2_127374341_128034347.txt"), DataFrame)
Σ = covfile |> Matrix{Float64}

3782×3782 Matrix{Float64}:
  1.0           0.00710886    0.364293    …   0.0264997     0.0264099
  0.00710886    1.0           0.0731382       0.0035012     0.002885
  0.364293      0.0731382     1.0             0.0107343     0.0107998
 -0.117632     -0.0432335    -0.0757611      -0.0041054    -0.00329875
  0.122778      0.502735      0.25271        -0.0103248    -0.0114025
  0.124399      0.500263      0.25183     …  -0.0118251    -0.012911
  0.124164      0.500373      0.252304       -0.0123146    -0.0133988
  0.0672066    -0.0298754     0.115745       -0.000961697   0.000611668
 -0.141431     -0.176522     -0.427692       -0.000259765  -0.000154645
 -0.144941     -0.177199     -0.430318       -0.00169021   -0.00145797
 -0.144968     -0.176179     -0.430123    …  -0.00146174   -0.00135189
 -0.155744     -0.0190514    -0.300435       -0.0132113    -0.0131039
 -0.119514     -0.0345581    -0.0729309       0.023471      0.0241919
  ⋮                                       ⋱   ⋮           

In [5]:
using Distributions
n = 500
μ = zeros(size(Σ, 1))
Σ = 0.99Σ + 0.01I
X = rand(MvNormal(Σ), n)' |> Matrix;

## Check solution from robust/not-robust matches exactly

In [22]:
p = 1241 # this includes group 263, which is the largest group with 192 members
groups = groups[1:p]
Σ = Σ[1:p, 1:p]

1241×1241 Matrix{Float64}:
  1.0          0.00703777   0.36065     …   0.0313152    0.0851741
  0.00703777   1.0          0.0724068       0.00757982  -0.019931
  0.36065      0.0724068    1.0             0.0812242   -0.00220094
 -0.116456    -0.0428011   -0.0750035       0.0056124   -0.0375318
  0.12155      0.497708     0.250183        0.0269969   -0.0435219
  0.123155     0.49526      0.249311    …   0.0263315   -0.0432584
  0.122923     0.495369     0.249781        0.0271842   -0.0433027
  0.0665345   -0.0295767    0.114587       -0.0250835   -0.0557703
 -0.140017    -0.174757    -0.423415       -0.0336668   -0.00074335
 -0.143492    -0.175427    -0.426015       -0.0342407   -0.00227216
 -0.143518    -0.174417    -0.425822    …  -0.0335972   -0.00223795
 -0.154186    -0.0188609   -0.297431       -0.0438349    0.00865955
 -0.118319    -0.0342125   -0.0722016      -0.0183773   -0.0543074
  ⋮                                     ⋱                ⋮
  0.0326093    0.016891     0.0899976  

Max entropy group knockoffs

In [26]:
# added try-catch block
@time Snotrubost, _ = solve_s_group(
    Symmetric(Σ), groups, :maxent, 
    m = 1,          # number of knockoffs per variable to generate
    tol = 0.001,    # convergence tolerance
    niter = 10,    # max number of coordinate descent iterations
    robust = false, # whether to use robust cholesky updates
    verbose = true    # whether to print informative intermediate results
);

solve_group_max_entropy_ccd: Optimizing 58997 variables


└ @ Knockoffs /Users/biona001/.julia/dev/Knockoffs/src/group.jl:230


initial obj = -13811.93738418022
Iter 1: obj = -8863.340455999352, δ = 0.9033274319764998, t1 = 8.85, t2 = 18.23, t3 = 0.04
Iter 2: obj = -7509.4699925543855, δ = 0.7627302489344706, t1 = 19.04, t2 = 36.57, t3 = 0.08
Iter 3: obj = -7495.512979153219, δ = 0.153471691368552, t1 = 29.25, t2 = 54.77, t3 = 0.12
Iter 4: obj = -7491.027981075002, δ = 0.0467433749392695, t1 = 39.2, t2 = 72.95, t3 = 0.16
Iter 5: obj = -7489.0243504490645, δ = 0.007200479138871564, t1 = 49.08, t2 = 90.98, t3 = 0.2
Iter 6: obj = -7487.899557778442, δ = 0.004873425110742543, t1 = 58.98, t2 = 109.34, t3 = 0.24
Iter 7: obj = -7487.198762711129, δ = 0.0031886466238885947, t1 = 68.75, t2 = 127.98, t3 = 0.28
Iter 8: obj = -7486.7318922165505, δ = 0.002106998635746351, t1 = 78.17, t2 = 146.58, t3 = 0.32
Iter 9: obj = -7486.405698134362, δ = 0.0011212737859179697, t1 = 87.39, t2 = 165.21, t3 = 0.37
Iter 10: obj = -7486.1699950030525, δ = 0.0006708441935208511, t1 = 96.52, t2 = 183.44, t3 = 0.41
283.395020 seconds (3.23 M

In [35]:
@time Srobust, _ = solve_s_group(
    Symmetric(Σ), groups, :maxent, 
    m = 1,          # number of knockoffs per variable to generate
    tol = 0.001,    # convergence tolerance
    niter = 10,    # max number of coordinate descent iterations
    robust = true,  # whether to use robust cholesky updates
    verbose=true    # whether to print informative intermediate results
);

solve_group_max_entropy_ccd: Optimizing 58997 variables


└ @ Knockoffs /Users/biona001/.julia/dev/Knockoffs/src/group.jl:230


initial obj = -13811.93738418022
Iter 1: obj = -8863.34045599856, δ = 0.9033274319764997, t1 = 59.13, t2 = 18.41, t3 = 0.04
Iter 2: obj = -7509.469992554344, δ = 0.7627302489344778, t1 = 224.55, t2 = 37.55, t3 = 0.13
Iter 3: obj = -7495.51297915318, δ = 0.15347169136855976, t1 = 388.09, t2 = 57.28, t3 = 0.24
Iter 4: obj = -7491.027981074967, δ = 0.046743374939196, t1 = 545.06, t2 = 76.03, t3 = 0.32
Iter 5: obj = -7489.02435044903, δ = 0.007200479138872248, t1 = 700.49, t2 = 94.59, t3 = 0.41
Iter 6: obj = -7487.899557758589, δ = 0.0048734251107428635, t1 = 855.49, t2 = 113.3, t3 = 0.49
Iter 7: obj = -7487.198762678394, δ = 0.0031886466414335875, t1 = 1009.74, t2 = 131.81, t3 = 0.56
Iter 8: obj = -7486.7318922293, δ = 0.0021069986089067823, t1 = 1162.05, t2 = 150.21, t3 = 0.63
Iter 9: obj = -7486.405698133207, δ = 0.0011212738236271928, t1 = 1312.22, t2 = 168.55, t3 = 0.7
Iter 10: obj = -7486.169995071759, δ = 0.0006708442051516744, t1 = 1462.29, t2 = 187.99, t3 = 0.79
1652.269286 second

In [36]:
idx = findall(!iszero, Snotrubost)
[vec(Snotrubost[idx]) vec(Srobust[idx])]

58997×2 Matrix{Float64}:
 0.398095    0.398095
 0.38275     0.38275
 0.217803    0.217803
 0.505603    0.505603
 0.0240037   0.0240037
 0.00824416  0.00824416
 0.00821002  0.00821002
 0.00824416  0.00824416
 0.0198802   0.0198802
 0.0106959   0.0106959
 0.00821002  0.00821002
 0.0106959   0.0106959
 0.0216111   0.0216111
 ⋮           
 0.264056    0.264056
 0.0427542   0.0427542
 0.0233907   0.0233907
 0.0233907   0.0233907
 0.0427375   0.0427375
 0.309755    0.309755
 0.0911903   0.0911903
 0.282318    0.282318
 0.315909    0.315909
 0.303404    0.303404
 0.0932321   0.0932321
 0.228432    0.228432

MVR group knockoffs

In [37]:
@time Snotrubost, _ = solve_s_group(
    Symmetric(Σ), groups, :mvr, 
    m = 1,          # number of knockoffs per variable to generate
    tol = 0.001,    # convergence tolerance
    niter = 5,    # max number of coordinate descent iterations
    robust = false, # whether to use robust cholesky updates
    verbose = true    # whether to print informative intermediate results
);

solve_group_MVR_ccd: Optimizing 58997 variables


└ @ Knockoffs /Users/biona001/.julia/dev/Knockoffs/src/group.jl:230


initial obj = 3.122502572196882e7
Iter 1: obj = 108931.1028819222, δ = 0.7026937323619613, t1 = 10.43, t2 = 50.16, t3 = 0.06
Iter 2: obj = 107754.13306041813, δ = 0.23295200799593951, t1 = 20.65, t2 = 99.9, t3 = 0.12
Iter 3: obj = 107544.97877939657, δ = 0.04584481163606506, t1 = 30.9, t2 = 150.22, t3 = 0.18
Iter 4: obj = 107467.22770949526, δ = 0.0035605204531314605, t1 = 41.23, t2 = 198.53, t3 = 0.24
Iter 5: obj = 107426.5640447448, δ = 0.0021367206975021493, t1 = 51.09, t2 = 246.46, t3 = 0.3
300.044465 seconds (1.78 M allocations: 426.458 MiB, 0.05% gc time, 0.24% compilation time)


In [38]:
@time Srobust, _ = solve_s_group(
    Symmetric(Σ), groups, :mvr, 
    m = 1,          # number of knockoffs per variable to generate
    tol = 0.001,    # convergence tolerance
    niter = 5,    # max number of coordinate descent iterations
    robust = true,  # whether to use robust cholesky updates
    verbose=true    # whether to print informative intermediate results
);

solve_group_MVR_ccd: Optimizing 58997 variables


└ @ Knockoffs /Users/biona001/.julia/dev/Knockoffs/src/group.jl:230


initial obj = 3.122502572196882e7
Iter 1: obj = 108931.10288191834, δ = 0.7026937323619987, t1 = 165.94, t2 = 50.97, t3 = 0.09
Iter 2: obj = 107754.13306041222, δ = 0.2329520079958844, t1 = 332.2, t2 = 101.85, t3 = 0.17
Iter 3: obj = 107544.97877938757, δ = 0.045844811636079695, t1 = 499.61, t2 = 153.1, t3 = 0.25
Iter 4: obj = 107467.22770948234, δ = 0.0035605204531310147, t1 = 666.61, t2 = 204.24, t3 = 0.34
Iter 5: obj = 107426.5640447279, δ = 0.0021367206975019364, t1 = 833.22, t2 = 255.18, t3 = 0.42
1090.118206 seconds (300.90 k allocations: 346.572 MiB)


In [39]:
idx = findall(!iszero, Snotrubost)
[vec(Snotrubost[idx]) vec(Srobust[idx])]

58997×2 Matrix{Float64}:
 0.246872    0.246872
 0.239266    0.239266
 0.154311    0.154311
 0.283051    0.283051
 0.0209303   0.0209303
 0.00369209  0.00369209
 0.00340602  0.00340602
 0.00369209  0.00369209
 0.0155045   0.0155045
 0.00528621  0.00528621
 0.00340602  0.00340602
 0.00528621  0.00528621
 0.016602    0.016602
 ⋮           
 0.148463    0.148463
 0.0317079   0.0317079
 0.0114166   0.0114166
 0.0114166   0.0114166
 0.0317805   0.0317805
 0.198156    0.198156
 0.0693754   0.0693754
 0.169965    0.169965
 0.178362    0.178362
 0.197475    0.197475
 0.0688963   0.0688963
 0.151359    0.151359