In [1]:
import numpy as np 
import pandas as pd 

In [2]:
from knockpy import KnockoffFilter
import knockpy

np.random.seed(123)
n = 300 # number of data points
p = 500  # number of features
Sigma = knockpy.dgp.AR1(p=p, rho=0.5) # Stationary AR1 process with correlation 0.5

# Sample X
X = np.random.multivariate_normal(mean=np.zeros(p), cov=Sigma, size=(n,))

# Create random sparse coefficients
beta = knockpy.dgp.create_sparse_coefficients(p=p, sparsity=0.1)
y = np.dot(X, beta) + np.random.randn(n)

In [3]:
kfilter = KnockoffFilter(
    ksampler='gaussian',
    fstat='lasso',
)

In [4]:
kfilter2 = KnockoffFilter(
    ksampler='gaussian', fstat='lasso'
)

# Estimate cov matrix
rejections = kfilter.forward(X=X, y=y, fdr=0.1, shrinkage="ledoitwolf")

# Check the number of discoveries we made
power = np.dot(rejections, beta != 0) / (beta != 0).sum()
fdp = np.around(100*np.dot(rejections, beta == 0) / rejections.sum(), 2)
print(f"The knockoff filter has discovered {100*power}% of the non-nulls with a FDP of {fdp}%")

	---------------------------------------------------------
	---------------------------------------------------------
	The choldate package is not installed:
	solving for MVR or maxent knockoffs without it will be 
	~5x slower. For instructions on how to install choldate,
	see https://amspector100.github.io/knockpy/installation.html.
	argument or a knockoff_kwarg.
	----------------------------------------------------------
	----------------------------------------------------------



The knockoff filter has discovered 100.0% of the non-nulls with a FDP of 10.71%


In [5]:
rejections 

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
       0., 0., 1., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
       0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 1., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0.

In [None]:
kfilter1 = KnockoffFilter(ksampler='gaussian', knockoff_kwargs={'method':'maxent'})

# This uses fixed-X SDP knockoffs
kfilter2 = KnockoffFilter(ksampler='fx', knockoff_kwargs={'method':'sdp'})