In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as pl
import seaborn as sns
import warnings
import pycistarget
warnings.filterwarnings("ignore")
from pathlib import Path
from tqdm.auto import tqdm

# Set plotting style
from matplotlib import rcParams
# Type 2/TrueType fonts
rcParams['pdf.fonttype'] = 42
rcParams['ps.fonttype'] = 42
# Arial font
rcParams['font.sans-serif'] = "Arial"
rcParams['font.family'] = "sans-serif"

# Paths and Configs
import yaml
with open('../configuration/config.yaml') as f:
    config = yaml.load(f, Loader=yaml.FullLoader)
TEMPDIR = Path(config['TEMPDIR'])

# SRR3845160: SRX1929540  # sc-chip
# SRR3845161: SRX1929542  # sc-input

# Define input regions

In [5]:
import pyranges as pr
import os

In [94]:
file = TEMPDIR / 'macs2/contrast_summits.bed'
file_str = str(file)
bed = pr.read_bed(file_str)
bed.Chromosome = "chr" + bed.Chromosome.astype("str")

genes = {
    'CG9650': ['chrX', 7196029, 7241538],
    'pros': ['chr3R', 11328480, 11407627],
}
region_sets = {
    gene: bed[
        (bed.Chromosome == chrom) & (bed.Start > start) & (bed.End < end)] 
    for gene, (chrom, start, end) in genes.items()
}

# cisTarget for de novo motif discovery

In [96]:
# wget https://resources.aertslab.org/cistarget/databases/drosophila_melanogaster/dm6/flybase_r6.02/mc_v10_clust/region_based/dm6_v10_clust.regions_vs_motifs.rankings.feather
db_path_str = str(TEMPDIR / 'pycistarget/dm6_v10_clust.regions_vs_motifs.rankings.feather')

In [97]:
import pycistarget.motif_enrichment_cistarget as mec
ctx_db = mec.cisTargetDatabase(db_path_str, 
                               region_sets=region_sets)

In [115]:
gene = 'CG9650'

ct=mec.cisTarget(
    region_set=region_sets,
    name=gene,
    species='drosophila_melanogaster',
    auc_threshold=0.005,
    nes_threshold=3,
    rank_threshold=0.05,
    annotation_version='v9',
    motif_similarity_fdr=0.001,
    orthologous_identity_threshold=0,
)
ct.run_ctx(ctx_db)
html = ct.show_result()
with open(f'../results/pycistarget_{gene}.html', 'w') as f:
    f.write(html.data)

2024-06-25 11:22:33,341 cisTarget    INFO     Running cisTarget for CG9650 which has 6 regions
2024-06-25 11:22:33,883 cisTarget    INFO     Annotating motifs for CG9650
2024-06-25 11:22:37,728 cisTarget    INFO     Getting cistromes for CG9650


In [116]:
gene = 'pros'

ct=mec.cisTarget(
    region_set=region_sets,
    name=gene,
    species='drosophila_melanogaster',
    auc_threshold=0.005,
    nes_threshold=3,
    rank_threshold=0.05,
    annotation_version='v9',
    motif_similarity_fdr=0.001,
    orthologous_identity_threshold=0,
)
ct.run_ctx(ctx_db)
html = ct.show_result()
with open(f'../results/pycistarget_{gene}.html', 'w') as f:
    f.write(html.data)

2024-06-25 11:23:00,779 cisTarget    INFO     Running cisTarget for pros which has 29 regions
2024-06-25 11:23:01,132 cisTarget    INFO     Annotating motifs for pros
2024-06-25 11:23:04,916 cisTarget    INFO     Getting cistromes for pros


# cisTarget for known motif enrichment

In [2]:
# scute binding motifs
motifs = ['CANNTG', 'GCAGCTGG', 'GCAGGTGT']
