# GWAS workflow
This notebook contains a typical workflow for running a GWAS.
In this case, we study a set of cardiac morphological and functional parameters of the heart ventricles extracted from shape models derived from cardiovascular magnetic resonance (CMR).

In [1]:
# Import modules
import ipywidgets as widgets
import os
import pandas as pd
import rpy2

## Import my modules
import GWAS_pipeline.code.run_gwas as gwas 

## Preprocess data (not yet implemented here, this is done separately)


### Filter by ethnicity (or control for population stratification)
Taking care of population stratification is essential to avoid fake associations in the GWAS.
In this case we filter for the major ethnic group in UK Biobank (British).

### Adjust for covariates
The studied phenotypes were found to be strongly associated with variables such as gender, height, BMI, age and blood pressure.
In order to standardize the phenotypes, they are adjusted for all of these covariates.

### Inverse-normalization
This a usual procedure where the phenotype values are mapped to new values following a standard normal distribution. This step is necessary if the usual hypothesis test for GWAS is applied.


## Run GWAS with Plink

### Select configuration file

In [2]:
w_yaml = widgets.Dropdown(
    options=[x for x in os.listdir("GWAS_pipeline/code/yaml_files/") if x.endswith(".yaml") and x.startswith("config")],
    description='Yaml File',
    disabled=False,
    value='config_multix_LV.yaml'
)

display(w_yaml)

Dropdown(description='Yaml File', index=1, options=('config_local.yaml', 'config_multix_LV.yaml', 'config_mult…

In [3]:
yaml_config_file = "GWAS_pipeline/code/yaml_files/" + w_yaml.value
gwas_run = gwas.GWAS_Run(yaml_config_file)

### Print configuration file

In [4]:
from json import dumps; print(dumps(gwas_run.config, indent=4, sort_keys=True))

# Clean output directory before GWAS run
if os.path.exists("/MULTIX/DATA/OUTPUT/gwas"):
    os.system("rm -Rf /MULTIX/DATA/OUTPUT/gwas")

{
    "chromosomes": 22,
    "data_dir": "/MULTIX/DATA/INPUT",
    "exec": {
        "plink": "plink"
    },
    "filename_patterns": {
        "genotype": {
            "bed": "{data_dir}/calls_filtered_31802/ukb_cal_chr{{chromosome}}_v2_31802_indiv.bed",
            "bim": "{data_dir}/calls_filtered_31802/ukb_cal_chr{{chromosome}}_v2_31802_indiv.bim",
            "fam": "{data_dir}/calls_filtered_31802/ukb_cal_chr{{chromosome}}_v2_31802_indiv.fam"
        },
        "gwas": "gwas/{phenotype}/gwas__{phenotype}{suffix}",
        "phenotype": {
            "covariates": "",
            "phenotype_file": "Cardiac_InvNorm_Indexes_Adj_11350.tsv",
            "phenotype_file_tmp": "tmp/Cardiac_InvNorm_Indexes_Adj_11350_tmp.tsv"
        }
    },
    "individuals": "{data_dir}/ids_list/cmr_british_ids.txt",
    "options": {
        "covariate_adjustment": true,
        "ethnicity": "white",
        "inverse_normalization": true,
        "segmentation_method": "automatic"
    },
    "output_di

### Select phenotypes and chromosomes

In [5]:
w_phenos = widgets.SelectMultiple(
    options=gwas_run.phenotypes_all,
    value=gwas_run.phenotypes_all,
    description='Phenotypes:',
    disabled=False
)
display(w_phenos)

w_chr = widgets.SelectMultiple(
    options=["all"] + [i for i in range(1,23)],
    value=['all'],
    description='Chromosomes',
    disabled=False
)

display(w_chr)


SelectMultiple(description='Phenotypes:', index=(0, 1, 2, 3, 4), options=('LVEDV_automatic', 'LVESV_automatic'…

SelectMultiple(description='Chromosomes', index=(0,), options=('all', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1…

In [6]:
gwas_run.chromosomes = w_chr.value if 'all' not in w_chr.value else [i for i in range(1,23)]

In [7]:
gwas_run.chromosomes = w_chr.value if 'all' not in w_chr.value else [i for i in range(1,23)]
gwas_run.phenotypes = w_phenos.value
print(gwas_run.chromosomes)
print(gwas_run.phenotypes)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
('LVEDV_automatic', 'LVESV_automatic', 'LVSV_automatic', 'LVEF_automatic', 'LVM_automatic')


In [8]:
gwas_run.run()

Processing LVEDV_automatic...
  Processing chromosome 1...
  Processing chromosome 2...
  Processing chromosome 3...
  Processing chromosome 4...
  Processing chromosome 5...
  Processing chromosome 6...
  Processing chromosome 7...
  Processing chromosome 8...
  Processing chromosome 9...
  Processing chromosome 10...
  Processing chromosome 11...
  Processing chromosome 12...
  Processing chromosome 13...
  Processing chromosome 14...
  Processing chromosome 15...
  Processing chromosome 16...
  Processing chromosome 17...
  Processing chromosome 18...
  Processing chromosome 19...
  Processing chromosome 20...
  Processing chromosome 21...
  Processing chromosome 22...
Processing LVESV_automatic...
  Processing chromosome 1...
  Processing chromosome 2...
  Processing chromosome 3...
  Processing chromosome 4...
  Processing chromosome 5...
  Processing chromosome 6...
  Processing chromosome 7...
  Processing chromosome 8...
  Processing chromosome 9...
  Processing chromosome 10..

## Generate Manhattan plots

For this, we use the R qqman package. In order to run R code within this Python notebook, I load the rpy2 module.
Then I can run R commands by writing %R in front of the command

In [9]:
# To plot in R
%load_ext rpy2.ipython
%R suppressMessages(require(qqman))
%R suppressMessages(require(tidyverse))

array([1], dtype=int32)

### Control panel

In [10]:
w_pheno = widgets.Dropdown(
    options=gwas_run.phenotypes,
    description='Phenotype',
    disabled=False,
)

display(w_pheno)

w_chr = widgets.Dropdown(
    options=['all'] + list(gwas_run.chromosomes),
    value='all',
    rows=10,
    description='chromosomes:',
)

display(w_chr)

Dropdown(description='Phenotype', options=('LVEDV_automatic', 'LVESV_automatic', 'LVSV_automatic', 'LVEF_autom…

Dropdown(description='chromosomes:', options=('all', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…

### Plot

In [11]:
gwas_filename = gwas_run.gwas_fp.format(phenotype=w_pheno.value, suffix=gwas_run.output_suffix)
img_filename = gwas_run.gwas_fp.format(phenotype=w_pheno.value, suffix=gwas_run.output_suffix) + ".png"

chromosome = int(w_chr.value) if w_chr.value != "all" else "all"
chromosome = w_chr.value

%R -i img_filename
%R -i gwas_filename

if chromosome == "all":
  if not os.path.exists(img_filename):
    %R gwas_df <- suppressMessages(read_delim(gwas_filename, delim="\t")  )
    %R png(img_filename, width=800, height=400); manhattan(gwas_df %>% filter(!is.na(P))); dev.off()
  image = open(img_filename, "rb").read()
  w_img = widgets.Image(
    value=image,
    format='png',
    width=800,
    height=400,
  )
  display(w_img)

else:
    %R gwas_df <-  suppressMessages(read_delim(gwas_filename, delim="\t"))
    %R -i chromosome    
    %R manhattan(gwas_df %>% filter(!is.na(P) & CHR == chromosome), main= glue::glue("{chromosome}")
    

Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x03 \x00\x00\x01\x90\x08\x02\x00\x00\x00\xd9G\x93c\x…