In [1]:
using Revise
using Knockoffs
using LinearAlgebra
using Random
using Test
using StatsBase
using Statistics
using Convex, SCS

┌ Info: Precompiling Knockoffs [878bf26d-0c49-448a-9df5-b057c815d613]
└ @ Base loading.jl:1317


# Discrete Markov chain knockoffs

In [14]:
Random.seed!(123)

n = 20
p = 10
K = 5

# generate transition probability matrices for each state
Q = Vector{Matrix{Float64}}(undef, p - 1)
for j in 1:p-1
    Q[j] = rand(K, K)
end
Q[1][:, 1]

# X = rand(1:K, n, p)

5-element Vector{Float64}:
 0.7684476751965699
 0.940515000715187
 0.6739586945680673
 0.3954531123351086
 0.3132439558075186

# Gaussian knockoffs for model-X

In [57]:
Random.seed!(222)

# simulate matrix and then normalize columns
n = 300
p = 600
X = randn(n, p)
@show rank(X)
# zscore!(X, mean(X, dims=1), std(X, dims=1))
# normalize_col!(X)
# @show rank(X)

knockoff = modelX_gaussian_knockoffs(X, :sdp, zeros(p));
X = knockoff.X
X̃ = knockoff.X̃
s = knockoff.s
Σ = knockoff.Σ
Σinv = knockoff.Σinv;

rank(X) = 300


In [58]:
[vec(X̃' * X̃) vec(Σ)]

360000×2 Matrix{Float64}:
 277.812    277.808
   3.13176    3.04479
 -16.199    -16.1479
  13.3139    13.4561
   2.2489     2.15267
 -13.4907   -13.5272
 -17.5551   -17.5305
  -1.92496   -1.87318
  21.6934    21.5677
  16.0864    16.0451
  21.0387    20.9524
 -11.4561   -11.4702
  -6.33713   -6.52687
   ⋮        
  31.0182    30.9569
 -10.0784   -10.1302
 -13.9167   -13.9458
 -17.9627   -17.9188
 -33.436    -33.4528
 -22.1668   -22.1366
  16.4543    16.4834
  17.9495    17.8061
  -1.3833    -1.34597
 -30.7285   -30.6817
  -8.54919   -8.43816
 280.266    280.314

In [65]:
@test all(X' * X .≈ Σ)
@test all(isapprox.(X̃' * X̃, Σ, atol=0.5)) # numerical accuracy not good?
@test all(s .≥ 0)
@test all(1 .≥ s)

[32m[1mTest Passed[22m[39m

In [80]:
for i in 1:p, j in 1:p
    if i == j
        @test isapprox(dot(X[:, i], X̃[:, i]), Σ[i, i] - s[i], atol=1.0)
    else
        @test isapprox(dot(X[:, i], X̃[:, j]), dot(X[:, i], X[:, j]), atol=1.0)
    end
end

# Fixed SDP knockoffs

In [8]:
Random.seed!(2021)

# simulate matrix and then normalize columns
n = 1000
p = 100
X = randn(n, p)
zscore!(X, mean(X, dims=1), std(X, dims=1))
normalize_col!(X)

A = knockoff_sdp(X);

In [15]:
A * rand(2p)

1000-element Vector{Float64}:
 -0.2747348889922186
 -0.011604297343139128
 -0.17855822130485055
  0.6238723141373661
  0.06914494502748851
 -0.030693483292735548
  0.10525570631129927
  0.387685066727066
 -0.3684436547296998
  0.2883308973326147
 -0.3960114338778803
 -0.26313858252300076
  0.17003773708817202
  ⋮
  0.14732667489817364
  0.048202855906769135
  0.3067825148701804
 -0.3947697723673398
 -0.3302745133319064
  0.43085347351430314
 -0.19253463276021876
 -0.23262824799596488
  0.1575767171756602
  0.363888792693726
 -0.4264288205354244
 -0.1210684082128515

### Check knockoff properties

In [10]:
X̃ = knockoff.X̃
s = knockoff.s
C = knockoff.C
Ũ = knockoff.Ũ
Σ = knockoff.Σ
Σinv = knockoff.Σinv;

In [18]:
@test all(isapprox.(X̃' * X̃, Σ, atol=0.5)) # numerical accuracy not good?
# [vec(X̃' * X̃) vec(Σ)]

[32m[1mTest Passed[22m[39m

In [19]:
@test all(isapprox.(X' * X, Σ, atol=1e-10)) # good accuracy

[32m[1mTest Passed[22m[39m

In [27]:
for i in 1:p
    isapprox(dot(X[:, i], X̃[:, i]), Σ[i, i] - s[i])
    isapprox(dot(X[:, i], X̃[:, i]), 1 - s[i], atol=1e-3)
end

# Fixed equi-correlated knockoffs

In [37]:
Random.seed!(2021)

# simulate matrix and then normalize columns
n = 300
p = 100
X = randn(n, p)
normalize_col!(X)
# zscore!(X, mean(X, dims=1), std(X, dims=1))
# for xi in eachcol(X)
#     normalize!(xi)
# end

# construct knockoff struct
knockoff = knockoff_equi(X);

In [38]:
knockoff.s

100-element Vector{Float64}:
 0.4178885915925523
 0.4178885915925523
 0.4178885915925523
 0.4178885915925523
 0.4178885915925523
 0.4178885915925523
 0.4178885915925523
 0.4178885915925523
 0.4178885915925523
 0.4178885915925523
 0.4178885915925523
 0.4178885915925523
 0.4178885915925523
 ⋮
 0.4178885915925523
 0.4178885915925523
 0.4178885915925523
 0.4178885915925523
 0.4178885915925523
 0.4178885915925523
 0.4178885915925523
 0.4178885915925523
 0.4178885915925523
 0.4178885915925523
 0.4178885915925523
 0.4178885915925523

In [4]:
dot(X[:, 1], X[:, 1])

0.9999421897299496

### Check knockoff properties

In [5]:
X̃ = knockoff.X̃
s = knockoff.s
C = knockoff.C
Ũ = knockoff.Ũ
Σ = knockoff.Σ
Σinv = knockoff.Σinv;

In [8]:
@test all(isapprox.(X̃' * X̃, Σ, atol=5e-2)) # numerical accuracy not good?
# [vec(X̃' * X̃) vec(Σ)]

[32m[1mTest Passed[22m[39m

In [7]:
@test all(isapprox.(X' * X, Σ, atol=1e-10)) # good accuracy

[32m[1mTest Passed[22m[39m

In [140]:
for i in 1:p-1
    @test dot(X[:, i], X̃[:, i+1]) ≈ dot(X[:, i], X[:, i+1])
end

In [143]:
Σ[i, i]

0.9993066871404116

In [144]:
dot(X[:, i], X̃[:, i])

0.6378027299143559

In [166]:
for i in 1:p
    isapprox(dot(X[:, i], X̃[:, i]), Σ[i, i] - s[i])
    isapprox(dot(X[:, i], X̃[:, i]), 1 - s[i], atol=1e-3)
end