# GWAS workflow
This notebook contains a typical workflow for running a GWAS.
In this case, we study a set of cardiac morphological and functional parameters of the heart ventricles extracted from shape models derived from cardiovascular magnetic resonance (CMR).

In [None]:
import os, shlex
from subprocess import call, check_output
repo_rootdir = check_output(shlex.split("git rev-parse --show-toplevel")).strip().decode('ascii')
os.chdir(repo_rootdir)

In [None]:
import src.auxiliary

In [None]:
# Import modules
import ipywidgets as widgets
import pandas as pd
import rpy2
import yaml
from copy import deepcopy
import re

In [None]:
import sys
import src.run_gwas as gwas 
from pprint import pprint

In [None]:
from src.run_gwas import GWAS_Run

### Define GWAS configuration

In [None]:
config_dir = "config_files"
w = widgets.Dropdown(options=[x for x in os.listdir(config_dir) if x.endswith("yaml")], value="ref_config.yaml")
display(w)

In [None]:
config_file = os.path.join(config_dir, w.value)
config = yaml.load(open(config_file))
pprint(config)

##### Define file name rules

In [None]:
from main import adjust_for_covariates, generate_summary_and_figures, extract_formatter_tokens

In [None]:
name_rules = yaml.load(open(os.path.join(config_dir, "filename_rules/filename_rules.yaml")))
pprint(name_rules)

In [None]:
suffix_pattern = "{covariates}__{sample_white_lists}__{quality_control}"

In [None]:
tokens = extract_formatter_tokens(suffix_pattern)

In [None]:
for token in tokens:
    if token in config.keys():
        if isinstance(config[token], list):
            # need to cast to tuple because lists cannot be dict keys
            option_value = tuple(config[token])
        else:
            option_value = config[token]
        tokens[token] = name_rules[token][option_value]
suffix = suffix_pattern.format(**tokens) 
config["suffix"] = suffix

In [None]:
pprint(config)

In [None]:
config["filenames"] = {}
for _fp in ["phenotype", "phenotype_intermediate", "tmpdir", "gwas"]:
    fp = config["filename_patterns"][_fp]
    tokens = extract_formatter_tokens(fp)
    filename = fp.format(**{token: config.get(token, None) for token in tokens})
        
    if _fp != "gwas":
        config["filename_patterns"].pop(_fp, None)
        config["filenames"][_fp] = filename
    else:
        config["filename_patterns"][_fp] = filename

In [None]:
config["chromosomes"] = 22

In [None]:
config

### Adjust for covariates and inverse-normalise
The studied phenotypes were found to be strongly associated with variables such as gender, height, BMI, age and blood pressure.
In order to standardize the phenotypes, they are adjusted for all of these covariates.

In [None]:
adjust_for_covariates(config)

### Run GWAS

In [None]:
gwas = GWAS_Run(config)
gwas.run()

In [None]:
config

### Generate figures