In [None]:
import sys
import os

import pandas as pd

# add parent directory to path
library_path = os.path.abspath('..')
if library_path not in sys.path:
    sys.path.append(library_path)

from cge_comrare_pipeline.VariantQC import VariantQC

In [None]:
import ipywidgets as widgets
from IPython.display import display

# Create interactive widgets for input
input_path = widgets.Text(
    value='/home/luis/data/rawdata-sexupdated/outputData/ancestry_results',
    description='Path to input plink1.9 files:',
    style={'description_width': 'initial'}
)

input_name = widgets.Text(
    value='luxgiant_res-ancestry-clean',
    description='Name of the plink1.9 files:',
    style={'description_width': 'initial'}
)

output_path = widgets.Text(
    value='/home/luis/data/rawdata-sexupdated/outputData/',
    description='Path to output files:',
    style={'description_width': 'initial'}
)
output_name = widgets.Text(
    value='luxgiant_res',
    description='Name of the resulting files:',
    style={'description_width': 'initial'}
)
# Display the widgets
display(input_path, input_name, output_path, output_name)

# Function to get the text parameter values
def get_params():
    return input_path.value, input_name.value, output_path.value, output_name.value

In [None]:
# Use the parameter values
path_params = get_params()
print(f"Input Path: {path_params[0]}")
print(f"Input Name: {path_params[1]}")
print(f"Output Path: {path_params[2]}")
print(f"Output Name: {path_params[3]}")

In [None]:
# Create interactive widgets for list input

chr_y = widgets.IntText(
    value=24,  # Default value
    description='chr_y (int):',
    style={'description_width': 'initial'}
)

miss_data_rate = widgets.FloatText(
    value=0.2,  # Default value
    description='Missing data rate for variants (float):',
    style={'description_width': 'initial'}
)

diff_genotype_rate = widgets.FloatText(
    value=1e-4,  # Default value
    description='Different genotype rate (float):',
    style={'description_width': 'initial'}
)

# display the widgets
display(chr_y, miss_data_rate, diff_genotype_rate)

def get_sample_qc_params():

    variant_qc_params = dict()

    variant_qc_params['chr-y'] = chr_y.value
    variant_qc_params['miss_data_rate']= miss_data_rate.value
    variant_qc_params['diff_genotype_rate'] = diff_genotype_rate.value
    
    return variant_qc_params

In [None]:
variant_params = get_sample_qc_params()
variant_params

In [None]:
variant = VariantQC(
    input_path      =input_path.value,
    input_name      =input_name.value,
    output_path     =output_path.value,
    output_name     =output_name.value
)

In [None]:
variant_qc_steps = {
    'Missing data rate'         : (variant.execute_missing_data_rate, (variant_params['chr-y'],)),
    'Different genotype'        : (variant.execute_different_genotype_call_rate, ())
}

step_description = {
    'Missing data rate'         : 'Solve hh warnings by setting to missing',
    'Different genotype'        : 'Perform LD pruning'
}

for name, (func, params) in variant_qc_steps.items():
    print(f"\033[1m{step_description[name]}.\033[0m")
    func(*params)

In [None]:
variant.get_fail_variants()

In [None]:
variant.execute_drop_variants()