# Make all knockoff functions efficient

In [1]:
using Revise
using LinearAlgebra
using DelimitedFiles
using Distributions
using ProgressMeter
using SnpArrays
using Random
using Knockoffs
using fastPHASE
using BenchmarkTools
plinkname = "/Users/biona001/.julia/dev/Knockoffs/fastphase/ukb.10k.chr10"
datadir = "/Users/biona001/.julia/dev/Knockoffs/fastphase"
# plinkname = "/scratch/users/bbchu/ukb_SHAPEIT/subset/ukb.10k.chr10"
# datadir = "/scratch/users/bbchu/fastphase"
T = 10
extension="ukb_chr10_n1000"
cd(datadir)

┌ Info: Precompiling Knockoffs [878bf26d-0c49-448a-9df5-b057c815d613]
└ @ Base loading.jl:1317


## Load data

In [12]:
snpdata = SnpData(plinkname)
Xfull = snpdata.snparray
n, p = size(Xfull)
K = size(θ, 2)
statespace = (K * (K + 1)) >> 1
table = MarkovChainTable(K)

# get initial states (marginal distribution vector) and Markov transition matrices
q = get_initial_probabilities(α, table)
Q = get_genotype_transition_matrix(r, θ, α, q, table)

# preallocated arrays
X̃full = SnpArray("benchmark.bed", n, p)
X = zeros(Float64, p)
Z = zeros(Int, p)
Z̃ = zeros(Int, p)
X̃ = zeros(Int, p)
N = zeros(p, statespace)
d_K = Categorical([1 / statespace for _ in 1:statespace]) # for sampling markov chains (length statespace)
d_3 = Categorical([1 / statespace for _ in 1:statespace]) # for sampling genotypes (length 3)
α̂ = zeros(p, statespace) # scaled α, where α̂[j, k] = P(x_1,...,x_k, z_k) / P(x_1,...,x_k)
c = zeros(p); # normalizing constants, c[k] = p(x_k | x_1,...,x_{k-1})


## (scaled) Forward backward algorithm to get Z

In [18]:
copyto!(X, @view(Xfull[1, :]))
@btime forward_backward_sampling!($Z, $X, $Q, $q, $θ, $table, $d_K, $α̂, $c);

  9.675 ms (0 allocations: 0 bytes)


## Sample knockoff of markov chain

In [16]:
@btime markov_knockoffs!($Z̃, $Z, $N, $d_K, $Q, $q);

  5.569 ms (0 allocations: 0 bytes)


## Sample genotype knockoffs

In [17]:
@btime sample_markov_chain!($X̃, $Z̃, $table, $θ, $d_3);

  425.607 μs (0 allocations: 0 bytes)
