In [1]:
import sys
import os

# add parent directory to path
library_path = os.path.abspath('..')
if library_path not in sys.path:
    sys.path.append(library_path)

from cge_comrare_pipeline.SampleQC import SampleQC

In [2]:
import ipywidgets as widgets
from IPython.display import display

# Create interactive widgets for input
input_path = widgets.Text(
    value='/home/luis/data/rawdata-sexupdated/inputData',
    description='Path to input plink1.9 files:',
    style={'description_width': 'initial'}
)

input_name = widgets.Text(
    value='luxgiant_data_combined_12098-updated-sex',
    description='Name of the plink1.9 files:',
    style={'description_width': 'initial'}
)

dependables_path = widgets.Text(
    value='/home/luis/data/rawdata-sexupdated/dependables',
    description='Path to dependable files:',
    style={'description_width': 'initial'}
)

output_path = widgets.Text(
    value='/home/luis/data/rawdata-sexupdated/outputData',
    description='Path to output files:',
    style={'description_width': 'initial'}
)
output_name = widgets.Text(
    value='luxgiant_res',
    description='Name of the resulting files:',
    style={'description_width': 'initial'}
)
# Display the widgets
display(input_path, input_name, dependables_path, output_path, output_name)

# Function to get the text parameter values
def get_params():
    return input_path.value, input_name.value, dependables_path.value, output_path.value, output_name.value

Text(value='/home/luis/data/rawdata-sexupdated/inputData', description='Path to input plink1.9 files:', style=…

Text(value='luxgiant_data_combined_12098-updated-sex', description='Name of the plink1.9 files:', style=TextSt…

Text(value='/home/luis/data/rawdata-sexupdated/dependables', description='Path to dependable files:', style=Te…

Text(value='/home/luis/data/rawdata-sexupdated/outputData', description='Path to output files:', style=TextSty…

Text(value='luxgiant_res', description='Name of the resulting files:', style=TextStyle(description_width='init…

In [3]:
# Use the parameter values
path_params = get_params()
print(f"Input Path: {path_params[0]}")
print(f"Input Name: {path_params[1]}")
print(f"Dependables: {path_params[2]}")
print(f"Output Path: {path_params[3]}")
print(f"Output Name: {path_params[4]}")

Input Path: /home/luis/data/rawdata-sexupdated/inputData
Input Name: luxgiant_data_combined_12098-updated-sex
Dependables: /home/luis/data/rawdata-sexupdated/dependables
Output Path: /home/luis/data/rawdata-sexupdated/outputData
Output Name: luxgiant_res


In [4]:
# Create interactive widgets for list input
ind_par = widgets.Textarea(
    value='50, 5, 0.2',
    description='indep pairwise (comma-separated):',
    style={'description_width': 'initial'}
)

mind = widgets.FloatText(
    value=0.2,  # Default value
    description='mind (float):',
    style={'description_width': 'initial'}
)

sex_check = widgets.Textarea(
    value = '',
    description='sex check (comma-separated):',
    style={'description_width': 'initial'}
)

maf = widgets.FloatText(
    value=0.01,  # Default value
    description='maf (float):',
    style={'description_width': 'initial'}
)

kingship = widgets.FloatText(
    value=0.354,  # Default value
    description='mind (float):',
    style={'description_width': 'initial'}
)

# display the widgets
display(ind_par, mind, sex_check, maf, kingship)

def get_sample_qc_params():

    sample_qc_params = dict()

    indep = ind_par.value.split(',')
    sex = sex_check.value.split(',')

    sample_qc_params['maf'] = maf.value
    sample_qc_params['mind']= mind.value
    sample_qc_params['kingship'] = kingship.value
    sample_qc_params['indep'] = [int(indep[0]), int(indep[1]), float(indep[2])]
    if sex!=['']:
        sample_qc_params['sex_check'] = [float(x) for x in sex]
    else:
        sample_qc_params['sex_check'] = []

    return sample_qc_params

Textarea(value='50, 5, 0.2', description='indep pairwise (comma-separated):', style=TextStyle(description_widt…

FloatText(value=0.2, description='mind (float):', style=DescriptionStyle(description_width='initial'))

Textarea(value='', description='sex check (comma-separated):', style=TextStyle(description_width='initial'))

FloatText(value=0.01, description='maf (float):', style=DescriptionStyle(description_width='initial'))

FloatText(value=0.354, description='mind (float):', style=DescriptionStyle(description_width='initial'))

In [5]:
sample_params = get_sample_qc_params()
sample_params

{'maf': 0.01,
 'mind': 0.2,
 'kingship': 0.354,
 'indep': [50, 5, 0.2],
 'sex_check': []}

In [6]:
sample = SampleQC(
    input_path      =input_path.value,
    input_name      =input_name.value,
    output_path     =output_path.value,
    output_name     =output_name.value,
    dependables_path=dependables_path.value,
)

In [None]:
sample_qc_steps = {
    'hh_to_missing'         : (sample.execute_haploid_to_missing, ()),
    'ld_pruning'            : (sample.execute_ld_pruning, (sample_params['indep'],)),
    'miss_genotype'         : (sample.execute_miss_genotype, (sample_params['mind'],)),
    'sex_check'             : (sample.execute_sex_check, (sample_params['sex_check'])),
    'heterozygosity'        : (sample.execute_heterozygosity_rate, (sample_params['maf'],)),
    'duplicates_relatedness': (sample.execute_dup_relatedness, (sample_params['kingship'],)),
    'report_failures'       : (sample.get_fail_samples, (0.2, 3, sample_params['maf'])),
    'drop_failing_samples'  : (sample.execute_drop_samples, ()),
}

step_description = {
    'hh_to_missing'         : 'Solve hh warnings by setting to missing',
    'ld_pruning'            : 'Perform LD pruning',
    'miss_genotype'         : 'Get samples with high missing rate',
    'sex_check'             : 'Get samples with discordant sex information',
    'heterozygosity'        : 'Get samples with high heterozygosity rate',
    'duplicates_relatedness': 'Get samples with high relatedness rate or duplicates',
    'report_failures'       : 'Collect all failing samples',
    'drop_failing_samples'  : 'Drop samples failing QC from raw data',
}

for name, (func, params) in sample_qc_steps.items():
    print(f"\033[1m{step_description[name]}.\033[0m")
    func(*params)

PLINK v1.90b7.4 64-bit (18 Aug 2024)           www.cog-genomics.org/plink/1.9/
(C) 2005-2024 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/luis/data/rawdata-sexupdated/inputData/luxgiant_data_combined_12098-updated-sex-hh-missing.log.
Options in effect:
  --bfile /home/luis/data/rawdata-sexupdated/inputData/luxgiant_data_combined_12098-updated-sex
  --make-bed
  --out /home/luis/data/rawdata-sexupdated/inputData/luxgiant_data_combined_12098-updated-sex-hh-missing
  --set-hh-missing

63927 MB RAM detected; reserving 31963 MB for main workspace.
1842636 variants loaded from .bim file.
12098 people (8066 males, 4032 females) loaded from .fam.
12098 phenotype values loaded from .fam.
Using 1 thread (no multithreaded calculations invoked).
Before main variant filters, 12098 founders and 0 nonfounders present.
Calculating allele frequencies... 101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566