In [None]:
import sys
import os

from pathlib import Path

# add parent directory to path
library_path = os.path.abspath('..')
if library_path not in sys.path:
    sys.path.append(library_path)

from ideal_genom_qc.AncestryQC import AncestryQC

In [None]:
import ipywidgets as widgets
from IPython.display import display

# Create interactive widgets for input
input_path = widgets.Text(
    value='',
    description='Path to input plink1.9 files:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

input_name = widgets.Text(
    value='',
    description='Name of the plink1.9 files:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

dependables_path = widgets.Text(
    value='',
    description='Path to dependable files:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

output_path = widgets.Text(
    value='',
    description='Path to output files:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)
output_name = widgets.Text(
    value='',
    description='Name of the resulting files:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)
# Display the widgets
display(input_path, input_name, dependables_path, output_path, output_name)

# Function to get the text parameter values
def get_params():

    path_params = {
        'input_path': Path(input_path.value),
        'input_name': input_name.value,
        'dependables_path': Path(dependables_path.value),
        'output_path': Path(output_path.value),
        'output_name': output_name.value
    }
    return path_params

In [None]:
param_path = get_params()
param_path

In [None]:
# Create interactive widgets for list input
ind_par = widgets.Textarea(
    value='50, 5, 0.2',
    description='indep pairwise (comma-separated):',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

maf = widgets.FloatText(
    value=0.01,  # Default value
    description='maf (float):',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

pca = widgets.IntText(
    value=10,  # Default value
    description='mind (float):',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

ref_threshold = widgets.FloatText(
    value=4,  # Default value
    description='threshold for the deviations from the mean on reference panel:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

stu_threshold = widgets.FloatText(
    value=4,  # Default value
    description='threshold for the deviations from the mean on study population:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

num_pcs = widgets.IntText(
    value=10,  # Default value
    description='Number of PCs to determine a sample outlier status:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

reference_pop = widgets.Text(
    value='SAS',
    description='Name of the resulting files:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

# display the widgets
display(ind_par, maf, pca, stu_threshold, ref_threshold, num_pcs, reference_pop)

def get_ancestry_qc_params():

    ancestry_qc_params = dict()

    indep = ind_par.value.split(',')

    ancestry_qc_params['maf'] = maf.value
    ancestry_qc_params['indep'] = [int(indep[0]), int(indep[1]), float(indep[2])]
    ancestry_qc_params['pca'] = pca.value
    ancestry_qc_params['ref_threshold'] = ref_threshold.value
    ancestry_qc_params['stu_threshold'] = stu_threshold.value
    ancestry_qc_params['num_pcs'] = num_pcs.value
    ancestry_qc_params['reference_pop'] = reference_pop.value

    return ancestry_qc_params

In [None]:
ancestry_params = get_ancestry_qc_params()
ancestry_params

In [None]:
ancestry_qc = AncestryQC(
    input_path = param_path['input_path'], 
    input_name = param_path['input_name'], 
    output_path= param_path['output_path'], 
    output_name= param_path['output_name'], 
    high_ld_regions= param_path['dependables_path'] / 'high-LD-regions.txt',
    recompute_merge=False
)

In [None]:
ancestry_qc_steps = {
    'merge_study_reference'    : (ancestry_qc.merge_reference_study, {"ind_pair":ancestry_params['indep']}),
    'delete_intermediate_files': (ancestry_qc._clean_merging_dir, {}),
    'pca_analysis'             : (ancestry_qc.run_pca, 
        {
            "ref_population": ancestry_params['reference_pop'],
            "pca":ancestry_params['pca'],
            "maf":ancestry_params['maf'],
            "num_pca":ancestry_params['num_pcs'],
            "ref_threshold":ancestry_params['ref_threshold'],
            "stu_threshold":ancestry_params['stu_threshold'],
        }
    ),
}

step_description = {
    'merge_study_reference'    : "Merge reference genome with study genome",
    'delete_intermediate_files': "Delete intermediate files generated during merging",
    'pca_analysis'             : "Run a PCA analysis to perfom ancestry QC"
}

for name, (func, params) in ancestry_qc_steps.items():
    print(f"\033[1m{step_description[name]}.\033[0m")
    func(**params)