In [2]:
## Manhattan plot using Bokeh
import numpy as np
import pandas as pd
from math import pi
from bokeh.io import *
from bokeh.plotting import *
from bokeh.transform import *
from bokeh.embed import *
from bokeh.layouts import *
from bokeh.models import * 



# View data using pandas
df = pd.read_csv('.\TSVs\T1D_GWAS_add.tsv', sep='\t')
# Seperate by chromosome ID, and colour them
index_cmap = linear_cmap('chr_id', palette = ['grey','black']*11,low=1,high=22)

# Format figure
p=figure(frame_width= 800, # graph size
         plot_height=500, # graph size
         title = None, # Title added in html
         toolbar_location="right", 
         tools="pan,hover,xwheel_zoom,zoom_out,box_zoom,reset,box_select,tap,undo,save", # Allows mouse hover to bring up information
         tooltips="@rsid: (@chr_id,@chr_pos)" # Shows when mouse is hovered over plot
         )

# Create Manhattan Plot
p.circle(x='cumulative_pos',y='-logp', # x,y for scatter graph
        source=df, # Source of data from the tsv file
        fill_alpha=0.8, # Thickness of line around plot 
        fill_color=index_cmap, # Colour of plot
        size=6, #size of plot  
        selection_color="red",
        hover_color="green"
        )

p.xaxis.axis_label= 'Chromosome' # x-axis label 
p.yaxis.axis_label= '-logp' # y-axis label

# Correct the x-axis by replacing the cumulative_pos with the Chromosome position
p.xaxis.ticker = [119895261, 373943002, 537393504, 716119012, 834845071, 964538826.5, 1147957441, 1306654016, 
1396415411, 1540730893, 1674222993, 1823778230, 1930583064, 2055942929, 2141945578, 2202426536, 2302961360, 2388903150, 
2436333506.5, 2482024730, 2529377491, 2584586808]
p.xaxis.major_label_overrides = {119895261:'1', 373943002:'2', 537393504: '3', 716119012:'4', 834845071:'5', 964538826.5:'6', 1147957441: '7', 1306654016: '8', 
1396415411:'9', 1540730893:'10', 1674222993:'11', 1823778230:'12', 1930583064:'13', 2055942929:'14', 2088496163 :'14', 2141945578:'15',
2202426536:'16', 2302961360:'17', 2388903150:'18', 2436333506.5:'19', 2482024730:'20', 2529377491:'21', 2584586808: '22'}

text_input = TextInput(value="default", title="Label:")
text_input.js_on_change("value", CustomJS(code="""
    console.log('text_input: value=' + this.value, this.toString())
"""))

show(text_input)
show(p)

In [None]:

### THE OLD MATPLOTLIB MANHATTAN PLOT

#import pandas as pd
#import numpy as np
#import seaborn as sns
#import matplotlib.pyplot as plt


#df = pd.read_csv('.\TSVs\T1D_GWAS_add.tsv', sep='\t')

#my_data = df.sample(642)

#g=sns.relplot(
    #data = my_data, # for whole genome this number will be 10000, or however many you want to see
    #x= 'cumulative_pos',
    #y= '-logp',
    #aspect = 5, # size of graph
    #hue = 'CHR_ID', # for whole genome this is the chromosome
    #palette= ['grey','black'] * 11, #random colour scheme, can be changed to any colour(s)
    #linewidth=0,
    #size=6,
    #legend=None
    #)

# Editing x-axis and making it neater
#g.ax.set_xlabel('Chromosome') # x-axis label

# Setting where the labels in the x-axis should be
#g.ax.set_xticks(df.groupby('CHR_ID')['cumulative_pos'].median())
#g.ax.set_xticklabels(df['CHR_ID'].unique())


# Add title to graph
#g.fig.suptitle('Manhatton Plot showing Association between SNPs and T1DM in GWAS')

# Create annotations for each SNP point
#annotations = my_data[my_data['-logp'] > 20].apply(lambda p : g.ax.annotate(p['SNPS'], (p['cumulative_pos'], p['-logp'])), axis=1).to_list()

#adjust_text(annotations, arrowprops = {'arrowstyle': '->', 'color':'blue'})
