In [1]:
import sys
import os

import pandas as pd

# add parent directory to path
library_path = os.path.abspath('..')
if library_path not in sys.path:
    sys.path.append(library_path)

from cge_comrare_pipeline.VariantQC import VariantQC

In [2]:
import ipywidgets as widgets
from IPython.display import display

# Create interactive widgets for input
input_path = widgets.Text(
    value='/home/luis/data/rawdata-sexupdated/outputData/ancestry_results/clean_files',
    description='Path to input plink1.9 files:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

input_name = widgets.Text(
    value='luxgiant_res-ancestry-clean',
    description='Name of the plink1.9 files:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

output_path = widgets.Text(
    value='/home/luis/data/rawdata-sexupdated/outputData/',
    description='Path to output files:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)
output_name = widgets.Text(
    value='luxgiant_res',
    description='Name of the resulting files:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)
# Display the widgets
display(input_path, input_name, output_path, output_name)

# Function to get the text parameter values
def get_params():
    return input_path.value, input_name.value, output_path.value, output_name.value

Text(value='/home/luis/data/rawdata-sexupdated/outputData/ancestry_results/clean_files', description='Path to …

Text(value='luxgiant_res-ancestry-clean', description='Name of the plink1.9 files:', style=TextStyle(descripti…

Text(value='/home/luis/data/rawdata-sexupdated/outputData/', description='Path to output files:', style=TextSt…

Text(value='luxgiant_res', description='Name of the resulting files:', style=TextStyle(description_width='init…

In [3]:
# Use the parameter values
path_params = get_params()
print(f"Input Path: {path_params[0]}")
print(f"Input Name: {path_params[1]}")
print(f"Output Path: {path_params[2]}")
print(f"Output Name: {path_params[3]}")

Input Path: /home/luis/data/rawdata-sexupdated/outputData/ancestry_results/clean_files
Input Name: luxgiant_res-ancestry-clean
Output Path: /home/luis/data/rawdata-sexupdated/outputData/
Output Name: luxgiant_res


In [4]:
# Create interactive widgets for list input

chr_y = widgets.IntText(
    value=24,  # Default value
    description='chr_y (int):',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

miss_data_rate = widgets.FloatText(
    value=0.2,  # Default value
    description='Missing data rate for variants (float):',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

diff_genotype_rate = widgets.FloatText(
    value=1e-5,  # Default value
    description='Different genotype rate (float):',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

# display the widgets
display(chr_y, miss_data_rate, diff_genotype_rate)

def get_sample_qc_params():

    variant_qc_params = dict()

    variant_qc_params['chr-y'] = chr_y.value
    variant_qc_params['miss_data_rate']= miss_data_rate.value
    variant_qc_params['diff_genotype_rate'] = diff_genotype_rate.value
    
    return variant_qc_params

IntText(value=24, description='chr_y (int):', style=DescriptionStyle(description_width='initial'))

FloatText(value=0.2, description='Missing data rate for variants (float):', style=DescriptionStyle(description…

FloatText(value=1e-05, description='Different genotype rate (float):', style=DescriptionStyle(description_widt…

In [5]:
variant_params = get_sample_qc_params()
variant_params

{'chr-y': 24, 'miss_data_rate': 0.2, 'diff_genotype_rate': 1e-05}

In [6]:
variant = VariantQC(
    input_path      =input_path.value,
    input_name      =input_name.value,
    output_path     =output_path.value,
    output_name     =output_name.value
)

In [7]:
variant_qc_steps = {
    'Missing data rate'         : (variant.execute_missing_data_rate, (variant_params['chr-y'],)),
    'Different genotype'        : (variant.execute_different_genotype_call_rate, ())
}

step_description = {
    'Missing data rate'         : 'Compute missing data rate for males and females',
    'Different genotype'        : 'Case/control nonrandom missingness test'
}

for name, (func, params) in variant_qc_steps.items():
    print(f"\033[1m{step_description[name]}.\033[0m")
    func(*params)

[1mCompute missing data rate for males and females.[0m
PLINK v1.90b7.4 64-bit (18 Aug 2024)           www.cog-genomics.org/plink/1.9/
(C) 2005-2024 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/luis/data/rawdata-sexupdated/outputData/variant_qc_results/luxgiant_res-missing-males-only.log.
Options in effect:
  --bfile /home/luis/data/rawdata-sexupdated/outputData/ancestry_results/clean_files/luxgiant_res-ancestry-clean
  --chr 24
  --filter-males
  --memory 37855.0
  --missing
  --out /home/luis/data/rawdata-sexupdated/outputData/variant_qc_results/luxgiant_res-missing-males-only

63927 MB RAM detected; reserving 37855 MB for main workspace.
5729 out of 1842636 variants loaded from .bim file.
11182 people (7468 males, 3714 females) loaded from .fam.
11182 phenotype values loaded from .fam.
3714 people removed due to gender filter (--filter-males).
Using 1 thread (no multithreaded calculations invoked).
Before main variant filters, 7468 founders and 

In [8]:
variant.get_fail_variants(
    marker_call_rate_thres=0.2, 
    case_controls_thres=variant_params['diff_genotype_rate']
)

Unnamed: 0,Failure,count
0,Different genotype call rate,12617
1,Missing data rate on females,10261
2,Missing data rate on males,9
3,Duplicated SNPs,-717
4,Total,22170


In [9]:
variant.execute_drop_variants()

PLINK v1.90b7.4 64-bit (18 Aug 2024)           www.cog-genomics.org/plink/1.9/
(C) 2005-2024 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/luis/data/rawdata-sexupdated/outputData/variant_qc_results/clean_files/luxgiant_res-variantQCed.log.
Options in effect:
  --autosome
  --bfile /home/luis/data/rawdata-sexupdated/outputData/ancestry_results/clean_files/luxgiant_res-ancestry-clean
  --exclude /home/luis/data/rawdata-sexupdated/outputData/variant_qc_results/fail_samples/fail_markers.txt
  --geno 0.1
  --hwe 5e-08
  --maf 5e-08
  --make-bed
  --out /home/luis/data/rawdata-sexupdated/outputData/variant_qc_results/clean_files/luxgiant_res-variantQCed

63927 MB RAM detected; reserving 31963 MB for main workspace.
1772816 out of 1842636 variants loaded from .bim file.
11182 people (7468 males, 3714 females) loaded from .fam.
11182 phenotype values loaded from .fam.
--exclude: 1752147 variants remaining.
Using 1 thread (no multithreaded calculations invoke

{'pass': True,
 'step': 'remove_markers',
 'output': {'plink_out': '/home/luis/data/rawdata-sexupdated/outputData/variant_qc_results'}}