
# Generage figres from Micro-C data.
- This script uses the coolbox api along with data generates with micro-c, cooler, mustache, and juicer to plot regions of the Drosophila genome with differences in chromatin.  
### To create the conda environment for this script:

1. mamba create env -n coolbox -c bioconda coolbox 
2. mamba activate coolbox
3. mamba install -c anaconda ipykernel
4. python -m ipykernel install --user --name=coolbox

Then select the coolbox kernel under python environments within the notebook.
- https://medium.com/@nrk25693/how-to-add-your-conda-environment-to-your-jupyter-notebook-in-just-4-steps-abeab8b8d084

### Import packages

9/27/24 Note to self: 

- Use the mustache v4 datasets for plotting the diffloops and gtf's. The files have already been generated and only loops called by mustache across all pairwise comparisons are in those combined datasets. This will allow me to plot the merged maps for JW18 DOX and wMel with the filtered gtf's and the diffloops. 

- I want to change how the plots look so I can:
1. Search by gene 
2. Plot the two contact maps one on top the other inverted, show the p-values too. 
3. Draw circles at tips instead of triangles across the call.

10/1/24 Note to self:
Try plotting diffloop points with HiCPeaksCoverage(loops1, color="#2255ff", line_width=5), this needs a bedpe file that you need to generate. 


In [15]:
import os
import coolbox
from coolbox.api import *
import pandas as pd
from math import floor

# change working directory
os.chdir("/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map")
print(f"Current working directory: {os.path.abspath(os.curdir)}")


Current working directory: /scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map


In [16]:
pairwise = {
  'JW18-DOX_JW18-wMel' : 	['JW18-DOX',	'JW18-wMel'],
 }



Files:
1. /scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/JW18-DOX.matrix_1kb.mcool 
2. /scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/JW18-wMel.matrix_1kb.mcool

In [17]:
def plot(TEST_RANGE,pairwise,comparison,resolution,directory):
    #Contact map 1
    file1=f'/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/{pairwise[comparison][0]}.matrix_1kb.mcool'
    #Contact map2
    file2=f'/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/{pairwise[comparison][1]}.matrix_1kb.mcool'
    #genes with open regions that are enriched in file1
    gtf1=f'/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/mustache_v1/{resolution}/p_value_0.0001/gtf/symbol/{comparison}.diffloop1.symbol.gtf'
    #genes with open regions that are enriched in file1
    gtf2=f'/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/mustache_v1/{resolution}/p_value_0.0001/gtf/symbol/{comparison}.diffloop2.symbol.gtf'
    #mustache identified de loops1
    loops1=f'/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/mustache_v1/{resolution}/p_value_0.0001/bed/plot_bed/{comparison}.diffloop1.bed'
    #mustache identified de loops2
    loops2=f'/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/mustache_v1/{resolution}/p_value_0.0001/bed/plot_bed/{comparison}.diffloop2.bed'
    
    c1,c2=pairwise[comparison]

    with MinValue(-9), MaxValue(-1):
        cool1 = Cool(file1)
        cool2 = Cool(file2)

    frame = ChromName()+ XAxis() + \
        Selfish(cool2, cool1, norm="log") +Title("FDR Corrected\np-value")+ \
        MinValue(0.000000001) + MaxValue(0.1)  + \
        HiCDiff(cool2, cool1, normalize='log', diff_method='diff', cmap="bwr_r") + Title("Differential\nChromatin\nInteractions") + \
        MinValue(-6) + MaxValue(6)  + \
        TADCoverage(loops1, border_only=False, alpha=0.25, color='red',border_color='red') +\
        TADCoverage(loops2, border_only=False, alpha=0.25, color='blue',border_color='blue') +\
        GTF(gtf1)+Title(f"Genetic elements\nenriched in\n{c1}")+ \
        GTF(gtf2)+Title(f"Genetic elements\nenriched in\n{c2}")+ \
        XAxis()
    frame *= Feature(depth_ratio=0.35)
    frame.plot(TEST_RANGE)

    bsr = Browser(frame, reference_genome='/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/reference/dmel-all-chromosome_only-r6.46.genome')
    bsr.goto(TEST_RANGE)
    bsr.save(f'{directory}/{comparison}_{TEST_RANGE}.pdf')



In [18]:
# # Browser for given ranges:
# comparison = 'JW18-DOX-1_JW18-wMel-1' 
# resolution = '10kb'
# TEST_RANGE = '3L:10000-1000000'


# file1=f'/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/{pairwise[comparison][0]}.matrix_1kb.mcool'
#     #Contact map2
# file2=f'/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/{pairwise[comparison][1]}.matrix_1kb.mcool'
# #genes with open regions that are enriched in file1
# gtf1=f'/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/mustache_v1/{resolution}/p_value_0.0001/gtf/symbol/{comparison}.diffloop1.symbol.gtf'
# #genes with open regions that are enriched in file1
# gtf2=f'/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/mustache_v1/{resolution}/p_value_0.0001/gtf/symbol/{comparison}.diffloop2.symbol.gtf'
# #mustache identified de loops1
# loops1=f'/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/mustache_v1/{resolution}/p_value_0.0001/bed/plot_bed/{comparison}.diffloop1.bed'
# #mustache identified de loops2
# loops2=f'/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/mustache_v1/{resolution}/p_value_0.0001/bed/plot_bed/{comparison}.diffloop2.bed'

# c1,c2=pairwise[comparison]

# with MinValue(-9), MaxValue(-1):
#     cool1 = Cool(file1)
#     cool2 = Cool(file2)

# frame = ChromName()+ XAxis() + \
#     Selfish(cool2, cool1, norm="log") +Title("FDR Corrected\np-value")+ \
#     MinValue(0.000000001) + MaxValue(0.1)  + \
#     HiCDiff(cool2, cool1, normalize='log', diff_method='diff', cmap="bwr_r") + Title("Differential\nChromatin\nInteractions") + \
#     MinValue(-6) + MaxValue(6)  + \
#     TADCoverage(loops1, border_only=False, alpha=0.25, color='red',border_color='red') +\
#     TADCoverage(loops2, border_only=False, alpha=0.25, color='blue',border_color='blue') +\
#     GTF(gtf1)+Title(f"Genetic elements\nenriched in\n{c1}")+ \
#     GTF(gtf2)+Title(f"Genetic elements\nenriched in\n{c2}")+ \
#     XAxis()
# frame *= Feature(depth_ratio=0.35)
# frame.plot(TEST_RANGE)

# bsr = Browser(frame, reference_genome='/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/reference/dmel-all-chromosome_only-r6.46.genome')
# bsr.goto(TEST_RANGE)
# # bsr.save(f'{directory}/{comparison}_{TEST_RANGE}.pdf')

In [19]:
from matplotlib.colors import LinearSegmentedColormap

# # give a list of color (color gradient)
# uberplant = '#171123'
# eggplants = '#372248'
# periptant = '#414770'
# blueplant = '#5B85AA'
# kindablue = '#A9BCD0'
fullwhite = '#FFFFFF'
# sadorange = '#F9C784'
# tangerine = '#FCAF58'
# prforange = '#FF8C42'
# xxxorange = '#F46036'
# rustyredy = '#C2320A'

# diff_gradient = [uberplant, eggplants, periptant, blueplant, kindablue, fullwhite, sadorange, tangerine, prforange, xxxorange, rustyredy]
# selfish_gradient = [uberplant, eggplants, periptant, blueplant, kindablue, fullwhite]

purpl = '#241e4e'
blue1 = '#587792'
blue2 = '#8db1ab'
green = '#C7E576'
yell1 = '#f2ed6f'
yell2 = '#f4e04d'
yell3 = '#F0D719'
orang = '#ff4000'

diff_gradient = [purpl,blue1, blue2, green, fullwhite, yell1, yell2, yell3, orang]
selfish_gradient =[purpl, fullwhite]

diff_cmap = LinearSegmentedColormap.from_list('interaction', diff_gradient)
diff_cmap.set_bad(orang)
diff_cmap.set_under(purpl)


selfish_cmap = LinearSegmentedColormap.from_list('interaction', selfish_gradient)
selfish_cmap.set_bad(orang)
selfish_cmap.set_under(purpl)


In [20]:
# # Browser for given ranges:
# directory = '/scratch1/jodie/wolbachia/Wolbachia_induced_differentiation'
# comparison = 'JW18-DOX_JW18-wMel' 
# resolution = '1kb'
# TEST_RANGE = '3L:2980000-3970000'
# # TEST_RANGE = '3R:31491078-31492678' 
# JW18_DOX  = '#BAD86E'
# JW18_wMel = purpl
# #contact map 1
# file1=f'/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/{pairwise[comparison][0]}.matrix_1kb.mcool'
# #Contact map2
# file2=f'/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/{pairwise[comparison][1]}.matrix_1kb.mcool'
# #genes with open regions that are enriched in file1
# gtf1=f'/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/mustache_v1/{resolution}/p_value_0.0001/gtf/symbol/{comparison}.diffloop1.symbol.gtf'
# #genes with open regions that are enriched in file1
# gtf1=f'/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/mustache_v1/{resolution}/p_value_0.0001/gtf/symbol/{comparison}.diffloop1.symbol.gtf'
# #genes with open regions that are enriched in file1
# gtf2=f'/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/mustache_v1/{resolution}/p_value_0.0001/gtf/symbol/{comparison}.diffloop2.symbol.gtf'
# #mustache identified de loops1
# loops1=f'/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/mustache_v1/{resolution}/p_value_0.0001/bed/plot_bed/{comparison}.diffloop1.bed'
# #mustache identified de loops2
# loops2=f'/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/mustache_v1/{resolution}/p_value_0.0001/bed/plot_bed/{comparison}.diffloop2.bed'


# ###Tracks
# #RNA-coverage 
# rnaseq_cov_1 =f'/scratch1/jodie/wolbachia/Wolbachia_induced_differentiation/RNA-seq_coverage/JW18DOX221117.avg.coverage.bw'
# rnaseq_cov_2 =f'/scratch1/jodie/wolbachia/Wolbachia_induced_differentiation/RNA-seq_coverage/JW18wMel221117.avg.coverage.bw'
# #Micro-C_coverage 
# microc_cov_1 =f'/scratch1/jodie/wolbachia/Wolbachia_induced_differentiation/Micro-C_coverage/JW18-DOX-1.coverage.bw'
# microc_cov_2 =f'/scratch1/jodie/wolbachia/Wolbachia_induced_differentiation/Micro-C_coverage/JW18-wMel-1.coverage.bw'
# #Mappability 
# mappability =f'/scratch1/jodie/wolbachia/Wolbachia_induced_differentiation/dmel_6_genmap_e0_k150_final.bw'

# c1,c2=pairwise[comparison]


# with MinValue(-9), MaxValue(-1):
#     cool1 = Cool(file1)
#     cool2 = Cool(file2)

# frame = ChromName()+ XAxis() + \
#     Selfish(cool2, cool1, norm="log", cmap=selfish_cmap) +Title("FDR Corrected\np-value")+ \
#     MinValue(0.000000001) + MaxValue(0.1)  + \
#     HiCDiff(cool2, cool1, normalize='log', diff_method='diff', cmap=diff_cmap) + Title("Differential\nChromatin\nInteractions") + \
#     MinValue(-6) + MaxValue(6)  + \
#     TADCoverage(loops1, border_only=False, alpha=0.45, color=JW18_DOX,border_color=JW18_DOX) +\
#     TADCoverage(loops2, border_only=False, alpha=0.25, color=JW18_wMel,border_color=JW18_wMel) +\
#     GTF(gtf1)+Title(f"Genetic elements\nenriched in\n{c1}")+ \
#     GTF(gtf2)+Title(f"Genetic elements\nenriched in\n{c2}")+ \
#     BigWig(rnaseq_cov_1) + Color(JW18_DOX) + MinValue(0) + MaxValue(500)+ Title("RNA-seq coverage\n JW18-DOX-1")+\
#     BigWig(rnaseq_cov_2) + Color(JW18_wMel) + MinValue(0) + MaxValue(500)+ Title("RNA-seq coverage\n JW18-wMel-1")+\
#     BigWig(microc_cov_1) + Color(JW18_DOX) + MinValue(0) + MaxValue(5000)+ Title("Micro-C coverage\n JW18-DOX-1")+\
#     BigWig(microc_cov_2) + Color(JW18_wMel) + MinValue(0) + MaxValue(5000)+ Title("Micro-C coverage\n JW18-wMel-1")+\
#     BigWig(mappability) + Color('grey') + MinValue(0) + MaxValue(1)+ Title("Mappability")+TrackHeight(1) +\
#     XAxis()
# frame *= Feature(depth_ratio=0.35)
# frame.plot(TEST_RANGE)

# bsr = Browser(frame, reference_genome='/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/reference/dmel-all-chromosome_only-r6.46.genome')
# bsr.goto(TEST_RANGE)
# bsr.show()
# bsr.save(f'{directory}/{comparison}_{TEST_RANGE}.pdf')

In [43]:
# Browser for given ranges:
directory = '/scratch1/jodie/wolbachia/Wolbachia_induced_differentiation'
comparison = 'JW18-DOX_JW18-wMel' 
resolution = '1kb'
TEST_RANGE = '2L:7600000-10000000'
# TEST_RANGE = '3R:31491078-31492678' 
JW18_DOX  = '#BAD86E'
JW18_wMel = purpl
#contact map 1
file1=f'/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/{pairwise[comparison][0]}.matrix_1kb.mcool'
#Contact map2
file2=f'/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/{pairwise[comparison][1]}.matrix_1kb.mcool'
#genes with open regions that are enriched in file1
# gtf1=f'/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/mustache_v1/{resolution}/p_value_0.0001/gtf/symbol/{comparison}.diffloop1.symbol.gtf'
#all genes
gtf_all='/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/reference/dmel-all-r6.46.sorted.uniq.gtf'
# # #genes with open regions that are enriched in file1
# gtf1=f'/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/mustache_v1/{resolution}/p_value_0.0001/gtf/symbol/{comparison}.diffloop1.symbol.gtf'
# #genes with open regions that are enriched in file1
# gtf2=f'/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/mustache_v1/{resolution}/p_value_0.0001/gtf/symbol/{comparison}.diffloop2.symbol.gtf'
#Genes enriched in both 
gtf3=f'/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/mustache_v1/{resolution}/p_value_0.0001/gtf/symbol/{comparison}.diffloop.symbol.gtf'


#mustache identified de loops1
loops1='/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/mustache_v4/combined/diffloop1.bed'
loops1_1kb = '/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/mustache_v4/1kb/diffloop1.plot.bed'
loops1_16kb = '/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/mustache_v4/16kb/diffloop1.bed'
loops1_bedpe ='/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/mustache_v4/1kb/diffloop1.bedpe'
loops2_bedpe ='/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/mustache_v4/1kb/diffloop2.bedpe'
#mustache identified de loops2
loops2='/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/mustache_v4/combined/diffloop2.bed'
loops2_1kb = '/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/mustache_v4/1kb/diffloop2.plot.bed'
loops2_16kb = '/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/map/mustache_v4/16kb/diffloop2.bed'

# print(f'cat {gtf1} {gtf2} |uniq > {gtf3}')

###Tracks
#RNA-coverage 
rnaseq_cov =f'/scratch1/jodie/wolbachia/Wolbachia_induced_differentiation/RNA-seq_coverage/JW18wMel221117_vs_DOX.diff.coverage.bw'
# rnaseq_cov_1 =f'/scratch1/jodie/wolbachia/Wolbachia_induced_differentiation/RNA-seq_coverage/JW18DOX221117.avg.coverage.bw'
# rnaseq_cov_2 =f'/scratch1/jodie/wolbachia/Wolbachia_induced_differentiation/RNA-seq_coverage/JW18wMel221117.avg.coverage.bw'
#Micro-C_coverage 
microc_cov =f'/scratch1/jodie/wolbachia/Wolbachia_induced_differentiation/Micro-C_coverage/JW18-wMel_vs_DOX.diff.coverage.bw'
# microc_cov_1 =f'/scratch1/jodie/wolbachia/Wolbachia_induced_differentiation/Micro-C_coverage/JW18-DOX-1.coverage.bw'
# microc_cov_2 =f'/scratch1/jodie/wolbachia/Wolbachia_induced_differentiation/Micro-C_coverage/JW18-wMel-1.coverage.bw'
#Mappability 
mappability =f'/scratch1/jodie/wolbachia/Wolbachia_induced_differentiation/dmel_6_genmap_e0_k150_final.bw'

c1,c2=pairwise[comparison]

frame = ChromName()+ XAxis() + \
    Cool(file1, cmap='JuiceBoxLike') + Title(f"{pairwise[comparison][0]} Contact Map") + TrackHeight(12) +\
    HiCPeaksCoverage(loops1_bedpe, color='blue',line_width=5) +\
    Cool(file2, cmap='JuiceBoxLike',orientation='inverted') + Title(f"{pairwise[comparison][1]} Contact Map") + TrackHeight(12) +\
    HiCPeaksCoverage(loops2_bedpe, color='blue',line_width=5) +\
    Spacer(height=0.5) + \
    GTF(gtf_all)+Title(f"Genetic elements")+ TrackHeight(6)+\
    Spacer(height=.5) +\
    BigWig(rnaseq_cov) + Color('red') + MinValue(0) + MaxValue(500)+ Title("RNA-seq coverage\nJW18-wMel enrichment\nover JW18-DOX")+TrackHeight(0.5)+\
    BigWig(rnaseq_cov) + Color('black') + MinValue(-500) + MaxValue(0)+ TrackHeight(0.5)+\
    Spacer(height=.5) +\
    BigWig(microc_cov) + Color('red') + MinValue(0) + MaxValue(1000)+ Title("Micro-C coverage\nJW18-wMel enrichment\nover JW18-DOX")+TrackHeight(0.5)+\
    BigWig(microc_cov) + Color('black') + MinValue(-1000) + MaxValue(0)+ TrackHeight(0.5)+\
    Spacer(height=.5) +\
    BigWig(mappability) + Color('grey') + MinValue(0) + MaxValue(1)+ Title("Mappability")+TrackHeight(0.5) +\
    XAxis()
frame *= Feature(depth_ratio=0.35)
frame.plot(TEST_RANGE)

bsr = Browser(frame, reference_genome='/scratch1/jodie/wolbachia/Micro-C/09Nov2023_Micro_C/reference/dmel-all-chromosome_only-r6.46.genome')
bsr.goto(TEST_RANGE)
bsr.show()
bsr.save(f'{directory}/{comparison}_{TEST_RANGE}.svg')
print(f"File saved to: {directory}/{comparison}_{TEST_RANGE}.svg")


VBox(children=(VBox(children=(HBox(children=(Dropdown(options=('2L', '2R', '3L', '3R', '4', 'X', 'Y'), value='…

File saved to: /scratch1/jodie/wolbachia/Wolbachia_induced_differentiation/JW18-DOX_JW18-wMel_2L:7600000-10000000.svg
Gene 'FBgn0005677' not found in the GTF file.


ChatGPT Modifications on 9/30/24

In [58]:
# Search browser for specific Flybase ID

import pandas as pd

# Function to search for gene coordinates in the GTF file using gene_id
def get_gene_range_by_id(gtf_file, gene_id, buffer=100000):
    """
    Searches for a given gene in a GTF file by gene_id and returns its chromosomal range.

    Parameters:
    gtf_file : str
        Path to the GTF file.
    gene_id : str
        Gene ID to search for (e.g., "FBgn0031208").
    buffer : int
        Additional range to include around the gene coordinates (default=10000).

    Returns:
    str
        Chromosomal coordinates in 'chr:start-end' format or None if not found.
    """
    # Load the GTF file into a DataFrame
    df = pd.read_csv(gtf_file, sep='\t', comment='#', header=None, names=[
        'seqname', 'source', 'feature', 'start', 'end', 'score', 'strand', 'frame', 'attribute'])
    
    # Extract the gene_id from the attribute column
    df['gene_id'] = df['attribute'].apply(lambda x: x.split('gene_id "')[1].split('"')[0] if 'gene_id "' in x else None)

    # Search for the gene using the gene_id
    gene_info = df[df['gene_id'] == gene_id]
    if not gene_info.empty:
        # Get chromosomal range with buffer applied
        start = max(gene_info['start'].min() - buffer, 0)  # Avoid negative coordinates
        end = gene_info['end'].max() + buffer
        chrom = gene_info['seqname'].unique()[0]  # Assume all records for a gene have the same seqname
        return f'{chrom}:{start}-{end}'
    else:
        print(f"Gene ID '{gene_id}' not found in the GTF file.")
        return None

# Example usage for gene search by gene_id
gtf_file = gtf_all
gene_id = "FBgn0000097"  # Specify the gene_id you want to search
gene_range = get_gene_range_by_id(gtf_file, gene_id)

if gene_range:
    print(f"Gene '{gene_id}' coordinates: {gene_range}")
else:
    print("No coordinates found for the specified gene ID.")

if gene_range:
    bsr.goto(gene_range)
    bsr.show()
    bsr.save(f'{directory}/{comparison}_{gene_range}.svg')
    print(f"File saved to: {directory}/{comparison}_{gene_range}.svg")


Gene 'FBgn0000097' coordinates: 2L:2056484-2278754


VBox(children=(VBox(children=(HBox(children=(Dropdown(options=('2L', '2R', '3L', '3R', '4', 'X', 'Y'), value='…

File saved to: /scratch1/jodie/wolbachia/Wolbachia_induced_differentiation/JW18-DOX_JW18-wMel_2L:2056484-2278754.svg
