In [None]:
import sys
import os

import pandas as pd

import ipywidgets as widgets
from IPython.display import display

# add parent directory to path
library_path = os.path.abspath('..')
if library_path not in sys.path:
    sys.path.append(library_path)

from ideal_genom.manhattan_type import manhattan_draw

In [None]:
# Create interactive widgets for input
input_path = widgets.Text(
    value='/home/luis/data/gwasResult/',
    description='Path to project folder:',
    style={'description_width': 'initial'}
)

input_name = widgets.Text(
    value='meta_analysis_results.out',
    description='Name of GWAS summary file:',
    style={'description_width': 'initial'}
)

highlight = widgets.Text(
    value='meta_top_hits.csv',
    description='Name of file with SNPs to highlight:',
    style={'description_width': 'initial'}
)

annotate = widgets.Text(
    value='meta_top_hits.csv',
    description='Name of file with SNPs to annotate:',
    style={'description_width': 'initial'}
)

# Display the widgets
display(input_path, input_name, highlight, annotate)

# Function to get the text parameter values
def get_params():
    return input_path.value, input_name.value, highlight.value, annotate.value

In [None]:
# Use the parameter values
path_params = get_params()
print(f"Parameter 1: {path_params[0]}")
print(f"Parameter 2: {path_params[1]}")
print(f"Parameter 3: {path_params[2]}")
print(f"Parameter 4: {path_params[3]}")

In [None]:
cols_touse = widgets.Textarea(
    value="chromosome, position, rs_number, p-value, n_studies",
    description='Columns to use on the Mannhattan plot (comma-separated):',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)
display(cols_touse)

def get_cols():
    return cols_touse.value

In [None]:
cols = get_cols()

df_gwas = pd.read_csv(
    os.path.join(path_params[0], path_params[1]), sep='\t', usecols=[col.strip() for col in cols.split(',')]
)
df_gwas.head(5)

In [None]:
df_gwas = df_gwas[df_gwas['n_studies']==2].reset_index(drop=True).drop(columns='n_studies')
df_gwas.columns = ['CHR', 'POS', 'SNP',	'P']
df_gwas.head(5)

In [None]:
rsID = widgets.Text(
    value='rs_number',
    description='Column name with the rsID:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='20%')
)

gene_col = widgets.Text(
    value='Gene',
    description='Column name with the nearest gene name:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='20%')
)

highlight_hue_col = widgets.Text(
    value='Hue',
    description='Column name with hue for highlights:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='20%')
)

display(rsID, gene_col, highlight_hue_col)

def get_rsID():
    return rsID.value
def get_gene_col():
    return gene_col.value
def get_highlight_hue_col():
    return highlight_hue_col.value

In [None]:
gwas_rsID = get_rsID()
hue_col = get_highlight_hue_col()

if path_params[2] is not None or path_params[2] != '':
    highlit_path = os.path.join(path_params[0], path_params[2])
    if os.path.exists(highlit_path):
        df_high = pd.read_csv(
            highlit_path, sep=','
        )
        if hue_col not in df_high.columns:
            to_highlight = df_high[[gwas_rsID]].copy()
        else:
            to_highlight = df_high[[gwas_rsID, hue_col]].copy()
        del df_high
    else:
        print('Path to file with SNPs to highlight does not exist')
        to_highlight = pd.DataFrame()
else:
    print('No file with SNPs to highlight')
    to_highlight = pd.DataFrame()

to_highlight.head()

In [None]:
gene_col = get_gene_col()

if path_params[3] is not None and path_params[3] != '':
    anno_path = os.path.join(path_params[0], path_params[3])
    if os.path.exists(anno_path):
        df_anno = pd.read_csv(
            anno_path, sep=',',
        )
        if gene_col not in df_anno.columns:
            to_annotate = df_anno[[gwas_rsID]]
        else:
            to_annotate = df_anno[[gwas_rsID, gene_col]].copy()
        del df_anno
    else:
        print('Path to file with SNPs to annotate does not exist')
        to_annotate = pd.DataFrame()
else:
    print('No file with SNPs to annotate')
    to_annotate = pd.DataFrame()

to_annotate.head()

In [None]:
SNP_col = widgets.Text(
    value='SNP',
    description='Column with rsID:',
    style={'description_width': 'initial'}
)

CHR_col = widgets.Text(
    value='CHR',
    description='Column with chromosome:',
    style={'description_width': 'initial'}
)

POS_col = widgets.Text(
    value='POS',
    description='Column with base-pair position:',
    style={'description_width': 'initial'}
)

P_col = widgets.Text(
    value='P',
    description='Column with p-values:',
    style={'description_width': 'initial'}
)
display(SNP_col, CHR_col, POS_col, P_col)

def get_col_names():
    return SNP_col.value, CHR_col.value, POS_col.value, P_col.value

In [None]:
snp_col, chr_col, pos_col, p_col = get_col_names()
to_annotate = to_annotate.rename(columns={gwas_rsID: snp_col, gene_col: 'GENENAME'})
to_highlight = to_highlight.rename(columns={gwas_rsID: snp_col, hue_col: 'hue'}, inplace=False)

In [None]:
manhattan_draw(
    data_df     =df_gwas,
    chr_col     =chr_col,
    pos_col     =pos_col,
    p_col       =p_col,
    snp_col     =snp_col,
    plot_dir    =path_params[0],
    to_highlight=to_highlight,
    to_annotate =to_annotate,
    save_name   ='manhattan_plot_meta_indian.jpeg',
    gen_col     =gene_col
)