tutorial: https://www.datacamp.com/community/tutorials/tutorial-jupyter-notebook#gs.ztPiMM0

cd /data640g1/data/documents/docs/projects/payton_venusar/VENUSAR_DEV/venusar

# 19:11:33 - 17:36:16 = 95 minutes 17 seconds run time
python3 thresholds.py -fpr 0.001 -m ../../data/HOCOMOCOv10.JASPAR_FORMAT.TF_IDS.txt -o ../../data/HOCOMOCOv10.JASPAR_FORMAT.TF_IDS.fpr_0p001.txt

# time to run is inconsequential; under a second
python3 tf_expression.py -i ../../data/FLDL_CCCB_RARE_VARIANTS.MERGED.RNA_DP10.RNA_NODUPS.CHIP_MULTIMARK.SORTED.vcf -o 1 -m ../../data/HOCOMOCOv10.JASPAR_FORMAT.TF_IDS.fpr_0p001.txt -e ../../data/ALL_ARRAYS_NORMALIZED_MAXPROBE_LOG2_COORDS.sorted.txt -mo ../../data/HOCOMOCOv10.JASPAR_FORMAT.TF_IDS.fpr_0p001.txt.bed_reduced.txt

# runtime ~11 hours with homotypic (without 20170115.02:19:36-20170114.19:10:53 = 7 hours 8 minutes 43 seconds)
python3 motifs.py -i ../../data/FLDL_CCCB_RARE_VARIANTS.MERGED.RNA_DP10.RNA_NODUPS.CHIP_MULTIMARK.SORTED.vcf -r ../../data/genome_reference/reference_genome_hg19.fa -m ../../data/HOCOMOCOv10.JASPAR_FORMAT.TF_IDS.fpr_0p001.txt.bed_reduced.txt -o ../../data/output.motif.20170114.vcf -fm -fp -ci ../../data/GM12878.ENCODE.ALL_TFS.bed -co ../../data/output.chip_peaks_output.20170114.bed &> ../../data/0_run_logs/20170114_motifs_run_stdout.txt

# 15:27:32 - 14:51:01 = 36 min 27 seconds run time
# -i is either output of tf_expression if run after motifs, or motifs if it was run later
python3 activity.py -i ../../data/output.motif.20170114.vcf -a ../../data/QN_FLDL_CCCB_K27AC_PEAKS_SIGNAL.bed -ov ../../data/output.activity.20170114.vcf -ob ../../data/output.activity.20170114.bed -th 2 &> ../../data/0_run_logs/20170114_activity_run_stdout.txt

# 15:27:45 - 15:27:33 = 12 seconds run time
python3 gene_expression.py -i ../../data/output.activity.20170114.vcf -e ../../data/ALL_ARRAYS_NORMALIZED_MAXPROBE_LOG2_COORDS.sorted.txt -ov ../../data/output.gene_expression.20170114.vcf -eth 5 -th 2 &> ../../data/0_run_logs/20170114_gene_expression_run_stdout.txt


In [2]:
import venusar
import motif
import thresholds
import motifs
import activity
import tf_expression
import gene_expression


In [22]:
# plotly by default wants to put everything in the cloud
#    ref: https://plot.ly/python/getting-started/
# must run this code before plotly command calls to avoid account setup request and allow local usage
#    note: plotly.plotly methods are cloud only (ridiculous should be able to use localhost server)
#    instead must use plotly.offline and plotly.iplot
#    iplot is jupyter notebook specific
# Plotly Offline allows you to create graphs offline and save them locally.
#   two methods for plotting offline: plotly.offline.plot() and plotly.offline.iplot().
# ref:https://plot.ly/python/offline/
#
# ref: generally plotly plot ref: https://plot.ly/python/user-guide/

#import plotly.plotly as py # cloud only
#import plotly.graph_objs as go
import plotly
#help(plotly.offline.iplot)
print( plotly.__version__ )

plotly.offline.init_notebook_mode(connected=True)

2.0.0


In [39]:
motif_f_base='../../data/HOCOMOCOv10.JASPAR_FORMAT.TF_IDS.txt'
pc = 0.1
th = 0
bp = [0.25, 0.25, 0.25, 0.25]
motif_set_base = motif.get_motifs(motif_f_base, pc, th, bp)

In [40]:
motif_f_expressed='../../data/HOCOMOCOv10.JASPAR_FORMAT.TF_IDS.fpr_0p001.txt.bed_reduced.txt'
motif_set_expressed = motif.get_motifs(motif_f_expressed, pc, th, bp)


In [41]:
print( "length " + format(len(motif_set_expressed.motifs)) )

print( "length " + format(motif_set_expressed.length()) )

length 603
length 603


In [58]:
motif_lengths_base = motif_set_base.element_positions_list(False)
motif_lengths_expressed = motif_set_expressed.element_positions_list(False)
print( "motif_lengths_base type: " + format(type(motif_lengths_base)) + " has " + format(len(motif_lengths_base)) + " elements.")
print( "motif_lengths_expressed type: " + format(type(motif_lengths_expressed)) + " has " + format(len(motif_lengths_expressed)) + " elements.")
print( "motif_lengths_expressed has " + format(len(motif_lengths_base) - len(motif_lengths_expressed)) + " fewer elements.")

motif_lengths_base type: <class 'list'> has 641 elements.
motif_lengths_expressed type: <class 'list'> has 603 elements.
motif_lengths_expressed has 38 fewer elements.


In [46]:

# ref: https://plot.ly/python/histograms/

data = [
    plotly.graph_objs.Histogram(
        x=motif_lengths_base
    )
]

layout = plotly.graph_objs.Layout(
    title='TF Length Histogram after running tf_expression.py',
    xaxis=dict(
        title='TF Length',
        ticklen=5,
        zeroline=False,
        gridwidth=2,
    ),
    yaxis={'title':'Number of TF with Length'}
    )
#plotly.offline.iplot(data)  # basic plot
go_figure = plotly.graph_objs.Figure(data=data, layout=layout)
plotly.offline.iplot(go_figure)

In [62]:
# repeating graph setup using reproducible code
# ref: https://plot.ly/python/histograms/

# -- setup the layout information to reuse
xaxis_template = {
        'title':'TF Length',
        'ticklen':5,
        'zeroline':False,
        'gridwidth':2,
    }
yaxis_template ={'title':'Number of TF with Length'}

# -- plot 1
hist_all_TF = plotly.graph_objs.Histogram(
        name='All TF',
        x=motif_lengths_base,
        opacity=0.75
    )
layout = plotly.graph_objs.Layout(
    title='TF Length Histogram (all TF)',
    xaxis=xaxis_template,
    yaxis=yaxis_template
    )

go_figure = plotly.graph_objs.Figure(data=[hist_all_TF], layout=layout)
plotly.offline.iplot(go_figure)

# -- plot 2
hist_expressed_TF = plotly.graph_objs.Histogram(
        name='Expressed TF',
        x=motif_lengths_expressed,
        opacity=0.75
    )
layout = plotly.graph_objs.Layout(
    title='TF Length Histogram after running tf_expression.py',
    xaxis=xaxis_template,
    yaxis=yaxis_template
    )

go_figure = plotly.graph_objs.Figure(data=[hist_expressed_TF], layout=layout)
plotly.offline.iplot(go_figure)

# -- plot overlay
layout = plotly.graph_objs.Layout(
    title='TF Length Histogram comparison tf_expression.py dropped',
    xaxis=xaxis_template,
    yaxis=yaxis_template,
    barmode='overlay'
    )
go_figure = plotly.graph_objs.Figure(data=[hist_all_TF, hist_expressed_TF], layout=layout)
plotly.offline.iplot(go_figure)



In [61]:
help(go_figure.layout)
go_figure.layout

Help on PlotlyDict in module plotly.graph_objs.graph_objs object:

class PlotlyDict(builtins.dict, PlotlyBase)
 |  Base class for dict-like Plotly objects.
 |  
 |  Method resolution order:
 |      PlotlyDict
 |      builtins.dict
 |      PlotlyBase
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __copy__(self)
 |  
 |  __deepcopy__(self, memodict={})
 |  
 |  __dir__(self)
 |      Dynamically return the existing and possible attributes.
 |  
 |  __getattr__(self, key)
 |      Python only calls this when key is missing!
 |  
 |  __getitem__(self, key)
 |      Calls __missing__ when key is not found. May mutate object.
 |  
 |  __init__(self, *args, **kwargs)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  __missing__(self, key)
 |      Mimics defaultdict. This is called from __getitem__ when key DNE.
 |  
 |  __setattr__(self, key, value)
 |      Maps __setattr__ onto __setitem__
 |  
 |  __setitem__(self, key, value, _raise=True)
 |     

{'barmode': 'overlay',
 'title': 'TF Length Histogram comparison tf_expression.py dropped',
 'xaxis': {'gridwidth': 2,
  'ticklen': 5,
  'title': 'TF Length',
  'zeroline': False},
 'yaxis': {'title': 'Number of TF with Length'}}

In [None]:
# next create a reader that exports motifs=TF, samples, and variant information from the vcf file
#   as a python structure
#
#   get vcf reader for variant elements from motifs.py
#   get vcf reader for samples from gene_expression.py
#
# then do similar and expanded analysis looking at which motifs were selected/exported by variant
# could also group by variant type
#   a->t, etc
#
