In [1]:
# convert coverage tracks in bedgraph (bdg) format of eg RNA-seq or ATAC-seq into my circos format

In [1]:
import pandas as pd
import pybedtools
import os

In [4]:
def generate_circos_coverage(bdg_file,karyotype_file):
    cov = pybedtools.BedTool(bdg_file)
    df = pd.DataFrame()
    with open(karyotype_file,'r') as f:
        for line in f.readlines():
            line = line.split(' ')
            loc = line[3]+' '+line[4]+' '+line[5]
            kary = pybedtools.BedTool(loc, from_string=True)
            res = cov.intersect(kary)
            del(kary)
            try:
                res = res.to_dataframe(disable_auto_names=True,header=None)
            except pd.errors.EmptyDataError:
                continue
            res["circos_chr"] = line[2]
            df = pd.concat([df,res])
    del cov
    df.columns = ['original_chr','start','end','score','circos_chr']
    df = df[["circos_chr",'start','end','score']]
    outfile = os.path.join(os.path.dirname(karyotype_file),os.path.basename(bdg_file)+'.circos.bdg')
    df.to_csv(outfile,sep='\t',header=False,index=False)
    return df
    
def import_bdg_file(bdg_file):
    df = pd.read_table(bdg_file,header=None,sep='\t',
                       names=['chr','start','end','score'])
    return df
def import_karyotype_file(karyotype_file):
    df = pd.read_table(karyotype_file,header=None,sep=' ',
                       names=['chr','-','n','chrom','start','end','color'],
                       usecols=['n','chrom','start','end'])
    return df

In [5]:
karyotype = '/mnt/c/Users/ochapman/Documents/Mesirov/medullo_ecDNA/src/circos/D458/D458_ecDNA.karyotype'
import_karyotype_file(karyotype)
bdg = '/mnt/c/Users/ochapman/Documents/Mesirov/medullo_ecDNA/src/circos/D458/atac/D458.hg38.bdg'
import_bdg_file(bdg)
df = generate_circos_coverage(bdg,karyotype)
df


Unnamed: 0,circos_chr,start,end,score
0,31-,56797826,56798000,51
1,31-,56798000,56799000,725
2,31-,56799000,56800000,338
3,31-,56800000,56801000,137
4,31-,56801000,56802000,417
...,...,...,...,...
197,28-29+,56790000,56791000,347
198,28-29+,56791000,56792000,437
199,28-29+,56792000,56793000,239
200,28-29+,56793000,56794000,173


# D458 CRISPRi proliferation screen


In [17]:
import pandas as pd
import pyranges as pr
import os
import io

In [31]:
def generate_circos_coverage(bdg_file,karyotype_file,outfile):
    cov = import_bdg_file(bdg_file)
    df = pd.DataFrame()
    with open(karyotype_file,'r') as f:
        for line in f.readlines():
            kary = io.StringIO(line)
            kary = import_karyotype_file(kary)
            res = cov.intersect(kary)
            del(kary)
            res = res.as_df()
            res["circos_chr"] = line.strip().split()[2]
            df = pd.concat([df,res])
    del cov
    df.columns = ['original_chr','start','end','score','circos_chr']
    df = df[["circos_chr",'start','end','score']]
    df.to_csv(outfile,sep='\t',header=False,index=False)
    return df
    
def import_bdg_file(bdg_file):
    df = pd.read_table(bdg_file,header=None,sep='\t',
                       names=['Chromosome','Start','End','Score'])
    return pr.PyRanges(df)

    return df
def import_karyotype_file(karyotype_file):
    df = pd.read_table(karyotype_file,header=None,sep=' ',
                       names=['chr','-','n','Chromosome','Start','End','color'],
                       usecols=['n','Chromosome','Start','End'])
    df = pr.PyRanges(df)
    return df

In [34]:
# D458 CRISPRi proliferation screen
karyotype =  'D458/D458_ecDNA.karyotype'
bdg = '/mnt/c/Users/ochapman/Documents/Mesirov/medullo_ecDNA/crispri/2022-03-31_crispr-surf/D458/deconvolved_scores.bedgraph'
#import_bdg_file(bdg)
#import_karyotype_file(kary)
generate_circos_coverage(bdg,karyotype,'D458/crispri/D458_crispri.hg38.circos.bdg')

bdg = '/mnt/c/Users/ochapman/Documents/Mesirov/medullo_ecDNA/crispri/2022-03-31_crispr-surf/D283/deconvolved_scores.bedgraph'
generate_circos_coverage(bdg,karyotype,'D458/crispri/D283_crispri.hg38.circos.bdg')


Unnamed: 0,circos_chr,start,end,score
0,2,56797991,56797991,0.005159
1,2,56798011,56798011,0.005159
2,2,56798031,56798031,0.005159
3,2,56798051,56798051,0.005159
4,2,56798071,56798071,0.005159
...,...,...,...,...
1405,1,56791231,56791231,0.003505
1406,1,56791251,56791251,0.003505
1407,1,56791271,56791271,0.003505
1408,1,56791291,56791291,0.003505


In [23]:
import_bdg_file(bdg)

Unnamed: 0,Chromosome,Start,End,Score
0,chr8,126957191,126957191,-0.005884
1,chr8,126957211,126957211,-0.005884
2,chr8,126957231,126957231,-0.005884
3,chr8,126957251,126957251,-0.005884
4,chr8,126957271,126957271,-0.005884
...,...,...,...,...
14930,chr17,7484991,7484991,-0.003621
14931,chr17,7485011,7485011,-0.003621
14932,chr17,7485031,7485031,-0.003621
14933,chr17,7485051,7485051,-0.003621
