# Association Analysis using a generalized linear model (GLM)

The present notebook serves as a guide of how to use the library `IDEAL-GENOM` to perform a genome wide association analysis (GWAS). The cornerstone of this proposed analysis is a GLM model.

In [1]:
import sys
import os

import pandas as pd

# add parent directory to path
library_path = os.path.abspath('..')
if library_path not in sys.path:
    sys.path.append(library_path)

from ideal_genom.gwas.gwas_fixed import GWASfixed

INFO:matplotlib.font_manager:Failed to extract font properties from /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf: In FT2Font: Can not load face (unknown file format; error code 0x2)
INFO:matplotlib.font_manager:generated new fontManager


In [9]:
import ipywidgets as widgets
from IPython.display import display

# Create interactive widgets for input
input_path = widgets.Text(
    value='/media/luisggon/LaCie/valente_gwas/outputData/post_imputation/analysis_ready',
    description='Path to input zip files:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

input_name = widgets.Text(
    value='test_valente',
    description='Prefix of PLINK binary files:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

dependables_path = widgets.Text(
    value='/media/luisggon/LaCie/valente_gwas/dependables',
    description='Path to dependable files:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

output_path = widgets.Text(
    value='/media/luisggon/LaCie/valente_gwas/outputData/',
    description='Path to output files:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)
output_name = widgets.Text(
    value='test_valente_gwas_fix',
    description='Name of the resulting files:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)
# Display the widgets
display(input_path, input_name, dependables_path, output_path, output_name)

# Function to get the text parameter values
def get_params():
    return input_path.value, input_name.value, dependables_path.value, output_path.value, output_name.value

Text(value='/media/luisggon/LaCie/valente_gwas/outputData/post_imputation/analysis_ready', description='Path t…

Text(value='test_valente', description='Prefix of PLINK binary files:', layout=Layout(width='50%'), style=Text…

Text(value='/media/luisggon/LaCie/valente_gwas/dependables', description='Path to dependable files:', layout=L…

Text(value='/media/luisggon/LaCie/valente_gwas/outputData/', description='Path to output files:', layout=Layou…

Text(value='test_valente_gwas_fix', description='Name of the resulting files:', layout=Layout(width='50%'), st…

In [10]:
path_params = get_params()
print('input_path: ', path_params[0])
print('input_name: ', path_params[1])
print('dependables: ', path_params[2])
print('output_path: ', path_params[3])
print('output_name: ', path_params[4])

input_path:  /media/luisggon/LaCie/valente_gwas/outputData/post_imputation/analysis_ready
input_name:  test_valente
dependables:  /media/luisggon/LaCie/valente_gwas/dependables
output_path:  /media/luisggon/LaCie/valente_gwas/outputData/
output_name:  test_valente_gwas_fix


In [11]:
gwas_glm = GWASfixed(
    input_path=path_params[0], 
    input_name=path_params[1],
    dependables_path=path_params[2],
    output_path=path_params[3], 
    output_name=path_params[4]
)

[1;32mAnalysis of GWAS data using a fixed model initialized.[0m


In [12]:
maf = widgets.FloatText(
    value=0.05,
    description='Minor Allele Frequency:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

mind = widgets.FloatText(
    value=0.1,
    description='Individual missing rate:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

hwe = widgets.FloatText(
    value=5e-8,
    description='Hardy-Weinberg Equilibrium:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

ci = widgets.FloatText(
    value=0.1,
    description='Confidence interval:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)


pca = widgets.IntText(
    value=10,
    description='Number of Principal Components:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

display(maf, mind, hwe, ci)

def get_gwas_params():

    gwas_params = dict()

    gwas_params['maf']  = maf.value
    gwas_params['hwe']  = hwe.value
    gwas_params['mind'] = mind.value
    gwas_params['ci']   = ci.value

    return gwas_params

FloatText(value=0.05, description='Minor Allele Frequency:', layout=Layout(width='50%'), style=DescriptionStyl…

FloatText(value=0.1, description='Individual missing rate:', layout=Layout(width='50%'), style=DescriptionStyl…

FloatText(value=5e-08, description='Hardy-Weinberg Equilibrium:', layout=Layout(width='50%'), style=Descriptio…

FloatText(value=0.1, description='Confidence interval:', layout=Layout(width='50%'), style=DescriptionStyle(de…

In [13]:
gwas_params = get_gwas_params()
gwas_params

{'maf': 0.05, 'hwe': 5e-08, 'mind': 0.1, 'ci': 0.1}

In [14]:
gwas_steps = {
    'train_model': (gwas_glm.fixed_model_association_analysis, {
        'maf' :gwas_params['maf'], 
        'mind':gwas_params['mind'], 
        'hwe' :gwas_params['hwe'], 
        'ci'  :gwas_params['ci']
    }),
    'top_hits'   : (gwas_glm.get_top_hits, {'maf':gwas_params['maf']}),
}

step_description = {
    'train_model': 'Train the model',
    'top_hits'   : 'Get top hits'
}

for name, (func, params) in gwas_steps.items():
    print(f"\033[1m{step_description[name]}.\033[0m")
    func(**params)

[1mTrain the model.[0m
PLINK v2.0.0-a.6.6LM AVX2 AMD (16 Jan 2025)        cog-genomics.org/plink/2.0/
(C) 2005-2025 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /media/luisggon/LaCie/valente_gwas/outputData/gwas_fixed/test_valente_gwas_fix_glm.log.
Options in effect:
  --adjust
  --bfile /media/luisggon/LaCie/valente_gwas/outputData/post_imputation/analysis_ready/test_valente
  --ci 0.1
  --covar /media/luisggon/LaCie/valente_gwas/outputData/post_imputation/analysis_ready/test_valente.eigenvec
  --glm hide-covar omit-ref sex cols=+a1freq,+beta
  --hwe 5e-08
  --maf 0.05
  --mind 0.1
  --out /media/luisggon/LaCie/valente_gwas/outputData/gwas_fixed/test_valente_gwas_fix_glm
  --threads 10

Start time: Wed Jan 22 11:15:07 2025
13795 MiB RAM detected, ~9148 available; reserving 6897 MiB for main workspace.
Using up to 10 threads (change this with --threads).
380 samples (0 females, 0 males, 380 ambiguous; 380 founders) loaded from
/media/luisggon/LaCie/vale

FileNotFoundError: [Errno 2] No such file or directory: '/media/luisggon/LaCie/valente_gwas/outputData/gwas_fixed/test_valente_gwas_fix_glm.PHENO1.glm.logistic.hybrid'