In [1]:
# convert coverage tracks in bedgraph (bdg) format of eg RNA-seq or ATAC-seq into my circos format

In [2]:
import pandas as pd
import pybedtools
import os

In [3]:
def generate_circos_coverage(bdg_file,karyotype_file):
    cov = pybedtools.BedTool(bdg_file)
    df = pd.DataFrame()
    with open(karyotype_file,'r') as f:
        for line in f.readlines():
            line = line.split(' ')
            loc = line[3]+' '+line[4]+' '+line[5]
            kary = pybedtools.BedTool(loc, from_string=True)
            res = cov.intersect(kary)
            del(kary)
            try:
                res = res.to_dataframe(disable_auto_names=True,header=None)
            except pd.errors.EmptyDataError:
                continue
            res["circos_chr"] = line[2]
            df = pd.concat([df,res])
    del cov
    df.columns = ['original_chr','start','end','score','circos_chr']
    df = df[["circos_chr",'start','end','score']]
    outfile = os.path.join(os.path.dirname(karyotype_file),os.path.basename(bdg_file)+'.circos.bdg')
    df.to_csv(outfile,sep='\t',header=False,index=False)
    return df

In [5]:
# D458 ATAC
karyotype = '/mnt/c/Users/ochapman/Documents/Mesirov/medullo_ecDNA/src/circos/D458/D458_ecDNA.karyotype'
bdg = '/mnt/c/Users/ochapman/Documents/Mesirov/medullo_ecDNA/src/circos/D458/atac/D458.hg38.bdg'
df = generate_circos_coverage(bdg,karyotype)
df


Unnamed: 0,circos_chr,start,end,score
0,31-,56797826,56798000,51
1,31-,56798000,56799000,725
2,31-,56799000,56800000,338
3,31-,56800000,56801000,137
4,31-,56801000,56802000,417
...,...,...,...,...
197,28-29+,56790000,56791000,347
198,28-29+,56791000,56792000,437
199,28-29+,56792000,56793000,239
200,28-29+,56793000,56794000,173


In [6]:
# D458 CRISPRi proliferation screen
karyotype = '/mnt/c/Users/ochapman/Documents/Mesirov/medullo_ecDNA/src/circos/D458/D458_ecDNA.karyotype'
bdg = '/mnt/c/Users/ochapman/Documents/Mesirov/medullo_ecDNA/crispri/2022-03-31_crispr-surf/D458/deconvolved_scores.bedgraph'
df = generate_circos_coverage(bdg,karyotype)
df

#bdg = '/mnt/c/Users/ochapman/Documents/Mesirov/medullo_ecDNA/crispri/2022-03-31_crispr-surf/D283/deconvolved_scores.bedgraph'
#df = generate_circos_coverage(bdg,karyotype)

Unnamed: 0,circos_chr,start,end,score
0,31-,56797991,56797991,0.005159
1,31-,56798011,56798011,0.005159
2,31-,56798031,56798031,0.005159
3,31-,56798051,56798051,0.005159
4,31-,56798071,56798071,0.005159
...,...,...,...,...
1405,28-29+,56791231,56791231,0.003505
1406,28-29+,56791251,56791251,0.003505
1407,28-29+,56791271,56791271,0.003505
1408,28-29+,56791291,56791291,0.003505
