In [56]:
import os
from glob import glob
import re
import igv


def meRIPseq_study(files, enzymes, file_type='bigwig', sp='hg38'):
    ## 1.
    if file_type == 'bam':
        files = [f for f in files if '.bam' in f and '.bai' not in f]
    elif file_type == 'bai':
        files = [f for f in files if '.bai' in f]
    elif file_type == 'bigwig':
        files = [f for f in files if '.bw' in f]        
    ## 2.
    if sp=='hg38':
        files = [b for b in files if 'HIV' not in b]
    elif sp=='HIV':
        files = [b for b in files if 'HIV' in b]        
    ## 3.
    TREATED = [b for b in files if 'NT' not in b]
    nonTREATED = [b for b in files if 'NT' in b]
    
    design={}
    for enz in enzymes:
        Treatment = [b for b in TREATED if enz in b]
        design[enz]={            
            'meRIP': [b for b in Treatment if 'meRIP' in b],
            'INPUT': [b for b in Treatment if 'meRIP' in b]
        }
    design['Ctrl'] = {
        'meRIP': [b for b in nonTREATED if 'meRIP' in b],
        'INPUT': [b for b in nonTREATED if 'IN' in b],        
    }
    return design

#   type: "wig",
#   name: "CTCF",
#   url: "https://www.encodeproject.org/files/ENCFF356YES/@@download/ENCFF356YES.bigWig",
#   guideLines: [
#     {color: 'green', dotted: true, y: 25}, 
#     {color: 'red', dotted: false, y: 5}
#   ]
# }
    
#     b.load_track({
#         "name": "meRIPseq",
#         "type": "annotation",
#         "format": "bed",
#         sourceType: "file",
#         url: "//igv.broadinstitute.org/annotations/hg19/genes/gencode.v18.collapsed.bed",
#         indexURL: "//igv.broadinstitute.org/annotations/hg19/genes/gencode.v18.collapsed.bed.idx",
#         displayMode: "EXPANDED"
#     }

In [46]:
reps = ['23','24']
enzymes = ['METTL3', 'METTL14', 'Virma', 'WTAP']


files = os.listdir('bam')
# HIV_files = [b for b in files if 'HIV' in b]
bigwigs = meRIPseq_study(files, enzymes)
bais = meRIPseq_study(files,enzymes, file_type='bai')
bams = meRIPseq_study(files,enzymes, file_type='bam')

In [47]:
bigwigs

{'METTL3': {'meRIP': ['12_meRIP_23_METTL3.bw', '17_meRIP_24_METTL3.bw'],
  'INPUT': ['12_meRIP_23_METTL3.bw', '17_meRIP_24_METTL3.bw']},
 'METTL14': {'meRIP': ['18_meRIP_24_METTL14.bw', '13_meRIP_23_METTL14.bw'],
  'INPUT': ['18_meRIP_24_METTL14.bw', '13_meRIP_23_METTL14.bw']},
 'Virma': {'meRIP': ['14_meRIP_23_Virma.bw', '19_meRIP_24_Virma.bw'],
  'INPUT': ['14_meRIP_23_Virma.bw', '19_meRIP_24_Virma.bw']},
 'WTAP': {'meRIP': ['20_meRIP_24_WTAP.bw', '15_meRIP_23_WTAP.bw'],
  'INPUT': ['20_meRIP_24_WTAP.bw', '15_meRIP_23_WTAP.bw']},
 'Ctrl': {'meRIP': ['16_meRIP_24_NT.bw', '11_meRIP_23_NT.bw'],
  'INPUT': ['01_IN_23_NT.bw', '06_IN_24_NT.bw']}}

Exp. This is meRIPseq data for Judd H. 

The libraries were prepared with **SMARTer kit**, first 3 nt have to be trimmed from the read1, and read1 is the reverse-complement of the RNA. 

There are two donors, one control and 4 KO samples each.

In [66]:
b = igv.Browser({
    "genome": "hg38"
#      "locus": "chr22:24,376,166-24,376,456"
#     1065859	1066009
    })

# b = igv.Browser(
#     {"reference": {
#         "id": "hg38",
# #         "fastaURL": "files/data/hg19.snippet.fasta",
# #         "indexed": False,
# #         "cytobandURL": "files/data/cytoband.hg19.snippet.txt",
#         "tracks": [
#             {
#                 "name": "exomepeak",
#                 "url": "./exomepeak/hg38/control/peak.c.bed"
#             }
#         ]
#     }}
# )


b.load_track({
    "type": "wig",
#     "name": enz+'\t'+mode+'\t'+reps[i],
#         "min": "0",
#     "max": "30",
#     "color": "rgb(0, 0, 150)",
    "url": bigwigs['Ctrl']['meRIP'][0],
})


# add_bigwig_track('Ctrl', 'meRIP')
# add_bigwig_track('Ctrl', 'INPUT')
b.show()

'16_meRIP_24_NT.bw'

In [None]:
b.zoom_in()

In [None]:
b.get_svg()

In [None]:
b.display_svg()

In [42]:
for enz in ['METTL3','METTL14','Virma','WTAP']:
    print (glob('./exomepeak/hg38/'+enz+'/diff_peak.bed'),'\n')


['./exomepeak/hg38/METTL3/diff_peak.bed'] 

['./exomepeak/hg38/METTL14/diff_peak.bed'] 

['./exomepeak/hg38/Virma/diff_peak.bed'] 

['./exomepeak/hg38/WTAP/diff_peak.bed'] 



1. peak calling HIV
2. peak calling other genes of interest 

(Active R Kernel)