# Deep Knockoff tuning
Problem: surrogate distribution is too narrow and doesn't overlap at all with empirical values

Goal: fine tune the DKO machine to generate a distribution of surrogates to be closer to the empirical observations, allowing for a fine selection of significant features



In [None]:
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import scipy
import seaborn as sns
matplotlib.rcParams['figure.figsize'] = (12.0, 6.0)

from implementation import glm, knockoff_class, params
from implementation.load import load_pickle, load_fmri
from implementation.utils import KNOCK_DIR, compare_diagnostics

In [None]:
# selecting the same task and subject as in the report
task = 'MOTOR'
subject = 1

# loading all the data
fmri_data = load_fmri(task=task)

In [None]:
# train a new knockoff machine
deepko = knockoff_class.DeepKnockOff(task, subject)  

# Pre-processing the data: clustering to avoid correlations
deepko.pre_process(max_corr=.3, save=True)

# Training the machine to build higher-order knockoffs. The parameters can be changed at params.py
_ = deepko.fit() 

# Plotting diagnostics to show the performance of the knockoffs
res_deepko = deepko.diagnostics()

In [None]:
# generating deep knockoffs
data_deepko = deepko.transform(groups=groups)

# calculating the GLM betas for the knockoffs
deepko_betas = deepko.statistic(data_deepko, save=True)

# executing the non-parametric test
uncorrected_betas_deepko, corrected_betas_deepko = deepko.threshold(deepko_betas, save=True)

In [None]:
# histogram

# print(plt.style.available) # check different available styles
plt.style.use("seaborn-dark")

plt.hist(corrected_betas_deepko)

plt.xlab("Value")
plt.ylab("Count")
plt.title("Surrogate distribution")

plt.show()