In [1]:
import pandas
import os
from c1extra import read_remote_sheet

In [2]:
cells_of_interest=[x.split() for x in """18270_A3	cyan
18275_F12	cyan
18275_F9	cyan
18274_E5	cyan
18253_C4	cyan
18257_G3	cyan
20035_C8	cyan
18251_A6	cyan
18256_F3	cyan
18254_D9	cyan
18313_C2	cyan
18251_A7	cyan
18271_B4	cyan
18254_D3	cyan
18251_A10	cyan
18251_A1	cyan
18251_A12	cyan
20040_B4	cyan
20043_E3	cyan
18251_A2	cyan
18272_C5	cyan
18272_C10	cyan
18272_C7	cyan
18271_B9	cyan
20038_F12	cyan
20029_D4	cyan
18272_C11	cyan
18256_F11	cyan
20039_A9	cyan
18271_B6	cyan
18253_C8	cyan
18270_A10	cyan
18270_A12	cyan
20046_C5	cyan
20035_C4	cyan
18271_B5	cyan
20033_A3	cyan
20036_D3	cyan
18275_F7	cyan
18264_B4	cyan
18264_B6	cyan
20029_D6	cyan
20030_E1	cyan
18316_F1	cyan
18256_F8	cyan
20042_D5	cyan
20036_D4	cyan
20043_E5	cyan
20038_F2	cyan
20035_C9	cyan
18274_E9	cyan
18271_B10	cyan
18273_D11	cyan
18256_F1	cyan
18252_B10	cyan
20028_C12	cyan
20028_C2	cyan
20034_B3	orange
19916_C5	orange
18254_D10	orange
18088_G8	orange
20032_G2	orange
19912_G10	yellow
18313_C11	yellow
18312_B12	yellow
20046_C4	yellow
18252_B3	yellow
18316_F2	yellow
18255_E12	yellow
20043_E4	yellow
18255_E3	yellow
18312_B2	yellow""".split('\n')]
cells_of_interest = pandas.Series(
    [x[1] for x in cells_of_interest],
    index=[x[0] for x in cells_of_interest])

In [3]:
def is_cell_present(cells_of_interest, path):
    for cell in cells_of_interest.index:
        if cell in path:
            return True
    return False

In [4]:
with open('merge_bw_all.sh') as instream:
    cell_ids = set()
    uniq_bw = {}
    all_bw = {}
    for line in instream:
        args = line.rstrip().split(' ')[3:]
        cluster = args[0]
        for pathname in args[1:]:
            path, name = os.path.split(pathname)
            cell_id = name[:name.find('mm10')-1]
            if cell_id in cells_of_interest:
                cell_ids.add(cell_id)
                if name.endswith('_all.bw'):
                    all_bw.setdefault(cluster, []).append(pathname)
                elif name.endswith('_uniq.bw'):
                    uniq_bw.setdefault(cluster, []).append(pathname)
                else:
                    print(pathname)
                


In [5]:
sum([len(uniq_bw[k]) for k in uniq_bw])

72

In [6]:
set(cells_of_interest.index).difference(cell_ids)

set()

In [7]:
cell_ids.difference(cells_of_interest.index)

set()

In [8]:
len(set(cell_ids))

72

In [9]:
assert len(set(cell_ids)) == sum([len(uniq_bw[k]) for k in uniq_bw])

In [10]:
target = 'brian_subset_chondro-EMP-macro'

In [11]:
if not os.path.exists(target):
    os.mkdir(target)
genome_dir = os.path.join(target, 'mm10')
if not os.path.exists(genome_dir):
    os.mkdir(genome_dir)

In [12]:
with open('merge_bw_{target}.sh'.format(target=target), 'wt') as outstream:
    for group in [uniq_bw, all_bw]:
        for cluster_name in group:
            args = ['python3', '../merge_bw.py', '-o', cluster_name]
            args.extend(group[cluster_name])
            outstream.write(' '.join(args))
            outstream.write(os.linesep)

In [13]:
os.getcwd()

'/woldlab/loxcyc/home/diane/proj/C1_mouse_limb_combined'

In [14]:
os.path.join(target, target+'.hub.txt')

'brian_subset_chondro-EMP-macro/brian_subset_chondro-EMP-macro.hub.txt'

In [15]:
hub="""hub hub
shortLabel {target}
longLabel {target}
genomesFile {target}.genomes.txt
email diane@caltech.edu
""".format(target=target)

with open(os.path.join(target, target+'.hub.txt'), 'wt') as outstream:
    outstream.write(hub)

In [16]:
genome="""genome mm10
trackDb mm10/trackDb-{target}.txt
""".format(target=target)

with open(os.path.join(target, target+'.genomes.txt'), 'wt') as outstream:
    outstream.write(genome)

In [17]:
sheet = read_remote_sheet(
    "https://woldlab.caltech.edu/nextcloud/index.php/s/XNJQRJbWeXDyf78/download",
    'genes_by_cluster')


In [18]:
sheet[sheet['c1_class'] == 'cyan']

Unnamed: 0,unified_label,abbreviations,c1_abbreviations,abbr. length,order,color,c1_class,c1_label,10x_class,10x_label,Red,Green,Blue
7,Chondrocyte,chon,chon,4.0,8,#00FFFF,cyan,chondrocyte,3,chondrocyte,0,255,255


In [19]:
trackDb = ["""track composite
shortLabel bigwigs
longLabel bigwigs
type bigWig
visibility full
dimensions dimX=multiread
sortOrder multiread
subGroup1 view Views signal=signal
subGroup2 multiread multiread all=all_reads uniq=unique_only
compositeTrack on

    track signal
    shortLabel signal
    longLabel signal
    type bigWig
    visibility full
    parent composite on
    view signal
"""]

track_template = """
            track {cluster}{type}
            bigDataUrl http://woldlab.caltech.edu/~diane/C1_mouse_limb_combined/{target}/{cluster}-mm10-M4-male_{type}.bw
            shortLabel {name}{type}
            longLabel {name} {type}
            type bigWig
            visibility full
            color {color}
            subGroups multiread={type} view=signal
            parent signal on"""

cluster_color = {
    'cyan': '82,207,206',
    'orange': '251,200,60',
    'yellow': '254,216,93',#'251,232,112'
}
for track_type in ['uniq', 'all']:
    for cluster in cells_of_interest.unique():
        selected = sheet[sheet['c1_class'] == cluster]
        row = selected.loc[selected.first_valid_index()]
        track_row = track_template.format(
            target=target,
            cluster=cluster,
            type=track_type,
            name=row.unified_label,
            color=cluster_color[cluster]
        )
        trackDb.append(track_row)
        
with open(os.path.join(target, 'mm10', 'trackDb-' + target +'.txt'), 'wt') as outstream:
    outstream.write('\n'.join(trackDb))

In [20]:
url="http://woldlab.caltech.edu/~diane/C1_mouse_limb_combined/{target}/{target}.hub.txt".format(target=target)
cmd="hubCheck " + url
print(cmd)

hubCheck http://woldlab.caltech.edu/~diane/C1_mouse_limb_combined/brian_subset_chondro-EMP-macro/brian_subset_chondro-EMP-macro.hub.txt


In [21]:
print("http://genome.ucsc.edu/cgi-bin/hgTracks?db=mm10&hubUrl=" + url)


http://genome.ucsc.edu/cgi-bin/hgTracks?db=mm10&hubUrl=http://woldlab.caltech.edu/~diane/C1_mouse_limb_combined/brian_subset_chondro-EMP-macro/brian_subset_chondro-EMP-macro.hub.txt


In [22]:
import matplotlib.colors
import numpy

In [23]:
numpy.asarray(matplotlib.colors.hex2color('#FED85D')) * 255

array([254., 216.,  93.])