# MVR group knockoff with CCD, block descent, and PCA

In [1]:
# load packages needed for this tutorial
using Revise
using Knockoffs
using Random
using GLMNet
using Distributions
using LinearAlgebra
using ToeplitzMatrices
using StatsBase
using CSV, DataFrames
using Plots
gr(fmt=:png);

function get_sigma(option::Int, p::Int)
    # note: groups are defined empirically within each simuation
    datadir = "/Users/biona001/Benjamin_Folder/research/4th_project_PRS/group_knockoff_test_data"
    if option == 1
        ρ = 0.7
        Σ = SymmetricToeplitz(ρ.^(0:(p-1))) |> Matrix
    elseif option == 2
        ρ = 0.7
        γ = 0.1
        groups = repeat(1:Int(p/5), inner=5)
        Σ = simulate_block_covariance(groups, ρ, γ)
    elseif option == 3
        covfile = CSV.read(joinpath(datadir, "CorG_2_127374341_128034347.txt"), DataFrame) # 3782 SNPs
        Σ = covfile |> Matrix{Float64}
        Σ = 0.99Σ + 0.01I #ensure PSD
    elseif option == 4
        df = CSV.read(joinpath(datadir, "21_37870779_38711704.csv"), DataFrame)
        Σ = df[:, 7:end] |> Matrix |> Symmetric |> Matrix
    elseif option == 5
        df = CSV.read(joinpath(datadir, "22_17674295_18295575.csv"), DataFrame)
        Σ = df[:, 7:end] |> Matrix |> Symmetric |> Matrix
    else
        error("Option should be 1-5 but was $option")
    end
    return Σ[1:p, 1:p]
end

Random.seed!(2023)
sigma_option = 4
p = 10
Σ = get_sigma(sigma_option, p)

10×10 Matrix{Float64}:
 1.0   0.7   0.7   0.7   0.7   0.07  0.07  0.07  0.07  0.07
 0.7   1.0   0.7   0.7   0.7   0.07  0.07  0.07  0.07  0.07
 0.7   0.7   1.0   0.7   0.7   0.07  0.07  0.07  0.07  0.07
 0.7   0.7   0.7   1.0   0.7   0.07  0.07  0.07  0.07  0.07
 0.7   0.7   0.7   0.7   1.0   0.07  0.07  0.07  0.07  0.07
 0.07  0.07  0.07  0.07  0.07  1.0   0.7   0.7   0.7   0.7
 0.07  0.07  0.07  0.07  0.07  0.7   1.0   0.7   0.7   0.7
 0.07  0.07  0.07  0.07  0.07  0.7   0.7   1.0   0.7   0.7
 0.07  0.07  0.07  0.07  0.07  0.7   0.7   0.7   1.0   0.7
 0.07  0.07  0.07  0.07  0.07  0.7   0.7   0.7   0.7   1.0

$\Sigma$ is 10 by 10 with obvious blocks. Lets define 2 groups that capture this structure.

In [2]:
groups = repeat(1:2, inner=5)

10-element Vector{Int64}:
 1
 1
 1
 1
 1
 2
 2
 2
 2
 2

Lets solve for MVR knockoff using various methods

In [19]:
@time mvr_block, _, _ = solve_s_group(Symmetric(Σ), groups, :mvr_block)
@time mvr_ccd, _, _ = solve_s_group(Symmetric(Σ), groups, :mvr)
@time mvr_pca, _, _ = solve_s_group(Symmetric(Σ), groups, :mvr_pca);

  0.837200 seconds (197.50 k allocations: 12.544 MiB)
  0.001397 seconds (992 allocations: 140.703 KiB)
  0.000594 seconds (738 allocations: 108.703 KiB)


In [21]:
mvr_block

10×10 Matrix{Float64}:
 0.961037  0.661037  0.661037  0.661037  …  0.0       0.0       0.0
 0.661037  0.961037  0.661037  0.661037     0.0       0.0       0.0
 0.661037  0.661037  0.961037  0.661037     0.0       0.0       0.0
 0.661037  0.661037  0.661037  0.961037     0.0       0.0       0.0
 0.661037  0.661037  0.661037  0.661037     0.0       0.0       0.0
 0.0       0.0       0.0       0.0       …  0.661047  0.661047  0.661047
 0.0       0.0       0.0       0.0          0.661047  0.661047  0.661047
 0.0       0.0       0.0       0.0          0.961047  0.661047  0.661047
 0.0       0.0       0.0       0.0          0.661047  0.961047  0.661047
 0.0       0.0       0.0       0.0          0.661047  0.661047  0.961047

In [22]:
mvr_ccd

10×10 Matrix{Float64}:
 0.966258  0.667312  0.66787   0.668349  …  0.0       0.0       0.0
 0.667312  0.968186  0.668824  0.669282     0.0       0.0       0.0
 0.66787   0.668824  0.969306  0.669794     0.0       0.0       0.0
 0.668349  0.669282  0.669794  0.970268     0.0       0.0       0.0
 0.668849  0.669742  0.670262  0.670712     0.0       0.0       0.0
 0.0       0.0       0.0       0.0       …  0.680315  0.705379  0.688627
 0.0       0.0       0.0       0.0          0.681397  0.705618  0.689151
 0.0       0.0       0.0       0.0          0.983726  0.708979  0.692824
 0.0       0.0       0.0       0.0          0.708979  1.03529   0.716237
 0.0       0.0       0.0       0.0          0.692824  0.716237  1.00003

In [23]:
mvr_pca

10×10 Matrix{Float64}:
 1.02147   0.671466  0.671466  0.671466  …  0.0       0.0       0.0
 0.671466  1.02147   0.671466  0.671466     0.0       0.0       0.0
 0.671466  0.671466  1.02147   0.671466     0.0       0.0       0.0
 0.671466  0.671466  0.671466  1.02147      0.0       0.0       0.0
 0.671466  0.671466  0.671466  0.671466     0.0       0.0       0.0
 0.0       0.0       0.0       0.0       …  0.632299  0.715632  0.694799
 0.0       0.0       0.0       0.0          0.632299  0.715632  0.694799
 0.0       0.0       0.0       0.0          1.0323    0.715632  0.694799
 0.0       0.0       0.0       0.0          0.715632  0.865633  0.694799
 0.0       0.0       0.0       0.0          0.694799  0.694799  0.928133

Check objective:

In [24]:
m = 1
@show group_block_objective(Σ, mvr_block, m, :mvr_block)
@show group_block_objective(Σ, mvr_ccd, m, :mvr)
@show group_block_objective(Σ, mvr_pca, m, :mvr_pca);

group_block_objective(Σ, mvr_block, m, :mvr_block) = 54.40459761441509
group_block_objective(Σ, mvr_ccd, m, :mvr) = 54.423700169872866
group_block_objective(Σ, mvr_pca, m, :mvr_pca) = 59.40088406887517
