# Association Analysis using a generalized linear model (GLM)

The present notebook serves as a guide of how to use the library `IDEAL-GENOM` to perform a genome wide association analysis (GWAS). The cornerstone of this proposed analysis is a GLM model.

In [1]:
import sys
import os

import pandas as pd

# add parent directory to path
library_path = os.path.abspath('..')
if library_path not in sys.path:
    sys.path.append(library_path)

from ideal_genom.gwas.gwas_fixed import GWASfixed

In [4]:
import ipywidgets as widgets
from IPython.display import display

# Create interactive widgets for input
input_path = widgets.Text(
    value='/media/luis/LaCie/valente_gwas/outputData/preparatory/',
    description='Path to input zip files:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

input_name = widgets.Text(
    value='test_valente',
    description='Prefix of PLINK binary files:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

dependables_path = widgets.Text(
    value='/media/luis/LaCie/valente_gwas/dependables/',
    description='Path to dependable files:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

output_path = widgets.Text(
    value='/media/luis/LaCie/valente_gwas/outputData/',
    description='Path to output files:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)
output_name = widgets.Text(
    value='test_valente_gwas_fix',
    description='Name of the resulting files:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)
# Display the widgets
display(input_path, input_name, dependables_path, output_path, output_name)

# Function to get the text parameter values
def get_params():
    return input_path.value, input_name.value, dependables_path.value, output_path.value, output_name.value

Text(value='/media/luis/LaCie/valente_gwas/outputData/preparatory/', description='Path to input zip files:', l…

Text(value='test_valente', description='Prefix of PLINK binary files:', layout=Layout(width='50%'), style=Text…

Text(value='/media/luis/LaCie/valente_gwas/dependables/', description='Path to dependable files:', layout=Layo…

Text(value='/media/luis/LaCie/valente_gwas/outputData/', description='Path to output files:', layout=Layout(wi…

Text(value='test_valente_gwas_fix', description='Name of the resulting files:', layout=Layout(width='50%'), st…

In [5]:
path_params = get_params()
print('input_path: ', path_params[0])
print('input_name: ', path_params[1])
print('dependables: ', path_params[2])
print('output_path: ', path_params[3])
print('output_name: ', path_params[4])

input_path:  /media/luis/LaCie/valente_gwas/outputData/preparatory/
input_name:  test_valente
dependables:  /media/luis/LaCie/valente_gwas/dependables/
output_path:  /media/luis/LaCie/valente_gwas/outputData/
output_name:  test_valente_gwas_fix


In [None]:
gwas_glm = GWASfixed(
    input_path=path_params[0], 
    input_name=path_params[1],
    dependables=path_params[2],
    output_path=path_params[3], 
    output_name=path_params[4]
)

In [None]:
maf = widgets.FloatText(
    value=0.05,
    description='Minor Allele Frequency:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

mind = widgets.FloatText(
    value=0.1,
    description='Individual missing rate:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

hwe = widgets.FloatText(
    value=5e-8,
    description='Hardy-Weinberg Equilibrium:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

ci = widgets.FloatText(
    value=0.1,
    description='Confidence interval:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)


pca = widgets.IntText(
    value=10,
    description='Number of Principal Components:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

display(maf, mind, hwe, ci)

def get_gwas_params():

    gwas_params = dict()

    gwas_params['maf']     = maf.value
    gwas_params['hwe']     = hwe.value
    gwas_params['mind']    = mind.value
    gwas_params['ci']     = ci.value

    return gwas_params

In [None]:
gwas_params = get_gwas_params()
gwas_params

In [None]:
gwas_steps = {
    'train_model': (gwas_glm.fixed_model_association_analysis, {
        'maf' :gwas_params['maf'], 
        'mind':gwas_params['mind'], 
        'hwe' :gwas_params['hwe'], 
        'ci'  :gwas_params['ci']
    }),
    'top_hits'   : (gwas_glm.get_top_hits, {'maf':gwas_params['maf']}),
}

step_description = {
    'train_model': 'Train the model',
    'top_hits'   : 'Get top hits'
}

for name, (func, params) in gwas_steps.items():
    print(f"\033[1m{step_description[name]}.\033[0m")
    func(**params)