<a href="https://colab.research.google.com/github/irinaachikhmina/Triplexes/blob/main/1_10_Data_processing_mm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install ray

In [None]:
!pip install pyranges

In [None]:
!pip install pybedtools

In [None]:
!pip install biopython

In [None]:
import os
import numpy as np
import pandas as pd
import sys
from collections import defaultdict

import Bio
from Bio import SeqIO

from pybedtools import BedTool
import pyranges as pr

import scipy

from joblib import Parallel, delayed, dump, load
from tqdm import trange
from tqdm.notebook import tqdm

import pickle

In [None]:
# Sparse vector
!git clone --recurse-submodules https://github.com/Nazar1997/Sparse_vector
from Sparse_vector.sparse_vector import SparseVector

Cloning into 'Sparse_vector'...
remote: Enumerating objects: 81, done.[K
remote: Counting objects: 100% (81/81), done.[K
remote: Compressing objects: 100% (60/60), done.[K
remote: Total 81 (delta 37), reused 62 (delta 18), pack-reused 0[K
Unpacking objects: 100% (81/81), 17.52 KiB | 815.00 KiB/s, done.


#Data preparation

##Chromosomes

In [None]:
chrom_names = [f'chr{i}' for i in list(range(1, 20)) + ['X', 'Y']]

In [None]:
#!gunzip '/content/drive/MyDrive/Triplexes/data/mm10'/*

In [None]:
chroms = {}
length_genome = 0
for name in tqdm(chrom_names):
    chroms[name] = SeqIO.read('/content/drive/MyDrive/Triplexes/data/mm10/' 
                              + name[:4] 
                              + '.fa', 
                              'fasta')
    length_genome += len(chroms[name])
lens_of_chroms = {chrom: len(chroms[chrom]) for chrom in chroms}

  0%|          | 0/21 [00:00<?, ?it/s]

##Triplex regions

### Intervals positive and negative

In [None]:
length = 34
overlap = 2

In [None]:
GF = BedTool('/content/drive/MyDrive/Triplexes/mm/GF.bed')

In [None]:
def get_splices(record, subseq_len=length):
  if record.end - record.start > subseq_len:
    output = []
    start = record.start
    while start < record.end:          
      if (start + subseq_len) >= record.end:
        temp = [record.chrom, start, record.end]
        start += subseq_len
      else:
        temp = [record.chrom, start, start + subseq_len]
        start += subseq_len - overlap
      output.append(BedTool(' '.join(str(el) for el in temp), from_string=True)[0])
    return output
  else:
    return [record]

In [None]:
def get_subseq(record, subseq_len=length):
  right_add = subseq_len - (record.end - record.start)
  return [chroms[record.chrom][record.start:record.end + right_add].seq]

In [None]:
def seq_to_features(seq):
    feat = np.zeros((len(seq), 4))
    for i, nuc in enumerate(seq):
        if nuc.lower() == 'a':
            feat[i][0] = 1
        elif nuc.lower() == 't':
            feat[i][1] = 1
        elif nuc.lower() == 'g':
            feat[i][2] = 1
        elif nuc.lower() == 'c':
            feat[i][3] = 1
    return feat

Cutting sequences over 35 nc long to subsequences, padding shorter sequences, one-hot encoding

In [None]:
intervals = []
intervals.extend([get_splices(record) for record in GF])
intervals = [item for sublist in intervals for item in sublist]
intervals = BedTool(intervals)

ints_in = []
for record in intervals:
  right_add = length - (record.end - record.start)
  ints_in.append((record.chrom, record.start, record.start + length))
ints_in = BedTool(ints_in)

HDNA_pos = []
chroms_used = {}

for name in chrom_names:
    chroms_used[name] = set()

for record in intervals:
  HDNA_pos.extend([seq_to_features(x) for x in get_subseq(record)])
  chroms_used[record.chrom].update(set(range(record.start, record.end)))

print(len(HDNA_pos))
print(HDNA_pos[0].shape)

23093
(34, 4)


In [None]:
ints_in.saveas('/content/drive/MyDrive/Triplexes/mm/data/ints_in.bed')

<BedTool(/content/drive/MyDrive/Triplexes/mm/data/ints_in.bed)>

Negative class

In [None]:
from random import choice, randrange

In [None]:
portions = [1, 20]
for portion in portions:
  num_GF_neg = len(ints_in) * portion
 
  GF_neg = []
 
  for i in tqdm(range(num_GF_neg)):
    chrom = choice(chrom_names)
    start = randrange(len(chroms[chrom]) - length)
    end = start + length
    while chroms[chrom][start:end].seq.count('N') / length >= 0.01 or \
    not chroms_used[chrom].isdisjoint(set(range(start, end))):
        chrom = choice(chrom_names)
        start = randrange(len(chroms[chrom]) - length)
        end = start + length
    chroms_used[chrom].update(set(range(start, end)))
    GF_neg.append((chrom, start, end))
  
  ints_out = BedTool(GF_neg)

  HDNA_neg = []
  for record in ints_out:
    HDNA_neg.extend([seq_to_features(x) for x in get_subseq(record)])

  print(len(HDNA_neg))
  print(HDNA_neg[0].shape)  
  
  ints_out.saveas(f'/content/drive/MyDrive/Triplexes/mm/data/ints_out{portion}.bed')
#  with open(f'/content/drive/MyDrive/Triplexes/data/HDNA_neg{portion}.pkl', 'wb') as fp:
#    pickle.dump(HDNA_neg, fp)

  0%|          | 0/23093 [00:00<?, ?it/s]

23093
(34, 4)


  0%|          | 0/461860 [00:00<?, ?it/s]

461860
(34, 4)


##Omics data

###Histone marks, transcription factors, RNA polymerase, DNase

In [None]:
files = os.listdir('/content/drive/MyDrive/Triplexes/mm/data/omics_raw/')[::-1]
files

['tf.bed', 'his.bed', 'pol.bed', 'dns.bed']

In [None]:
factors = []
for file in files:
  if file.endswith('.bed'):
    df = pd.read_csv(f'/content/drive/MyDrive/Triplexes/mm/data/omics_raw/{file}', sep='\t')
    print(file[:-4], len(df.Factor.unique()))
    for factor in df.Factor.unique():
      factors.append((file[:-4], factor))
factors_df = pd.DataFrame(factors, columns=['Group', 'Factor'])
factors_df.to_csv('/content/drive/MyDrive/Triplexes/mm/data/factors_df.bed')

tf 57
his 40
pol 1
dns 1


In [None]:
! head -2 /content/drive/MyDrive/Triplexes/mm/data/omics_raw/tf.bed

Chromosome	Start	End	Factor	Cell_type	Score
chr1	3011930	3011964	Rag1	B_cells	223


In [None]:
def sparser(file, lens_of_chroms, chrom_names, done_files):
  df = pd.read_csv(f'/content/drive/MyDrive/Triplexes/mm/data/omics_raw/{file}', sep='\t')
  for factor, factor_df in tqdm(df.groupby(df.Factor), desc=file):
    if factor in done_files:
      continue
    loc_dd = {}
    for chrom, sub_df in tqdm(factor_df.groupby(factor_df.Chromosome), desc=factor):
        if chrom not in chrom_names:
          print(chrom)
          continue
        vec = np.zeros(lens_of_chroms[chrom])
        for inter in sub_df.values:
          vec[inter[1]:inter[2]+1] = np.maximum(vec[inter[1]:inter[2]+1], inter[5])
        loc_dd[chrom] = SparseVector(vec)
    for chrom in chrom_names:
      if chrom not in loc_dd:
        loc_dd[chrom] = SparseVector(np.zeros(lens_of_chroms[chrom]))
    dump(loc_dd, f'/content/drive/MyDrive/Triplexes/mm/data/omics_sparse/{factor}.pkl', 3)

In [None]:
done_files = set()

In [None]:
for file in files:
  if file.endswith('.bed'):
    sparser(file, lens_of_chroms, chrom_names, done_files)

tf.bed:   0%|          | 0/57 [00:00<?, ?it/s]

Aicda:   0%|          | 0/42 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456221_random
chr4_GL456216_random
chr4_JH584292_random
chrM
chrUn_GL456239
chrUn_GL456359
chrUn_GL456370
chrUn_GL456372
chrUn_GL456378
chrUn_GL456381
chrUn_GL456383
chrUn_GL456387
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304


Ash2l:   0%|          | 0/30 [00:00<?, ?it/s]

chr1_GL456210_random
chr4_GL456216_random
chr4_JH584295_random
chrM
chrUn_GL456359
chrUn_GL456370
chrUn_GL456389
chrUn_GL456392
chrUn_GL456396
chrUn_JH584304


Batf:   0%|          | 0/28 [00:00<?, ?it/s]

chr4_GL456216_random
chrM
chrUn_GL456370
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304


Bhlhe41:   0%|          | 0/30 [00:00<?, ?it/s]

chr4_GL456216_random
chrM
chrUn_GL456370
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304


Biotin:   0%|          | 0/60 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chr4_GL456350_random
chr4_JH584292_random
chr4_JH584293_random
chr4_JH584294_random
chr4_JH584295_random
chr5_GL456354_random
chr5_JH584296_random
chr5_JH584297_random
chr5_JH584298_random
chr5_JH584299_random
chr7_GL456219_random
chrM
chrUn_GL456239
chrUn_GL456359
chrUn_GL456366
chrUn_GL456367
chrUn_GL456370
chrUn_GL456372
chrUn_GL456378
chrUn_GL456382
chrUn_GL456383
chrUn_GL456385
chrUn_GL456387
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random
chrY_JH584300_random
chrY_JH584303_random


Bmi1:   0%|          | 0/30 [00:00<?, ?it/s]

chr4_GL456216_random
chrUn_GL456370
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random


Brca1:   0%|          | 0/32 [00:00<?, ?it/s]

chr4_GL456216_random
chrM
chrUn_GL456370
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random


Brd4:   0%|          | 0/40 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chr5_GL456354_random
chrM
chrUn_GL456359
chrUn_GL456368
chrUn_GL456370
chrUn_GL456372
chrUn_GL456378
chrUn_GL456383
chrUn_GL456387
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random


BrdU:   0%|          | 0/27 [00:00<?, ?it/s]

chr4_GL456216_random
chrM
chrUn_GL456370
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304


Cebpa:   0%|          | 0/31 [00:00<?, ?it/s]

chr4_GL456216_random
chr5_GL456354_random
chrM
chrUn_GL456370
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304


Chd4:   0%|          | 0/42 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chr4_GL456350_random
chr4_JH584292_random
chr4_JH584295_random
chr5_JH584297_random
chrM
chrUn_GL456359
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304


Crebbp:   0%|          | 0/29 [00:00<?, ?it/s]

chr4_GL456216_random
chrUn_GL456370
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304


Ctcf:   0%|          | 0/58 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chr4_GL456350_random
chr4_JH584292_random
chr4_JH584293_random
chr4_JH584294_random
chr5_GL456354_random
chr5_JH584296_random
chr5_JH584297_random
chr5_JH584298_random
chr5_JH584299_random
chrM
chrUn_GL456239
chrUn_GL456359
chrUn_GL456366
chrUn_GL456367
chrUn_GL456370
chrUn_GL456372
chrUn_GL456378
chrUn_GL456381
chrUn_GL456383
chrUn_GL456385
chrUn_GL456387
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random
chrY_JH584300_random
chrY_JH584303_random


Ebf1:   0%|          | 0/33 [00:00<?, ?it/s]

chr4_GL456216_random
chrM
chrUn_GL456370
chrUn_GL456381
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random


Ep300:   0%|          | 0/35 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456213_random
chr4_GL456216_random
chrM
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304


Epitope:   0%|          | 0/35 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456221_random
chr4_GL456216_random
chr4_JH584295_random
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456396
chrUn_JH584304


Esr1:   0%|          | 0/32 [00:00<?, ?it/s]

chr4_GL456216_random
chr4_JH584292_random
chrM
chrUn_GL456370
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304


Ets1:   0%|          | 0/39 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chr4_JH584292_random
chrM
chrUn_GL456359
chrUn_GL456370
chrUn_GL456372
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random
chrY_JH584300_random


Ezh2:   0%|          | 0/31 [00:00<?, ?it/s]

chr4_GL456216_random
chr4_JH584294_random
chrM
chrUn_GL456370
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304


Hdac1:   0%|          | 0/43 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chr4_JH584292_random
chr4_JH584295_random
chrM
chrUn_GL456359
chrUn_GL456367
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random


Hdac2:   0%|          | 0/41 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chr4_JH584292_random
chr5_JH584299_random
chrM
chrUn_GL456359
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random


Ikzf1:   0%|          | 0/41 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chr4_JH584294_random
chrM
chrUn_GL456359
chrUn_GL456367
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random


Irf4:   0%|          | 0/36 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456213_random
chr4_GL456216_random
chrM
chrUn_GL456370
chrUn_GL456372
chrUn_GL456378
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304


Irf8:   0%|          | 0/33 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456212_random
chr4_GL456216_random
chr5_GL456354_random
chrUn_GL456370
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304


Kat2a:   0%|          | 0/40 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456213_random
chr4_GL456216_random
chr4_JH584292_random
chr4_JH584295_random
chrM
chrUn_GL456359
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random


Kdm4a:   0%|          | 0/26 [00:00<?, ?it/s]

chr4_GL456216_random
chr4_JH584292_random
chrUn_GL456370
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304


Kdm4c:   0%|          | 0/24 [00:00<?, ?it/s]

chr4_GL456216_random
chr4_JH584292_random
chrUn_GL456370
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304


Klf4:   0%|          | 0/34 [00:00<?, ?it/s]

chr4_GL456216_random
chr5_GL456354_random
chr5_JH584299_random
chrUn_GL456367
chrUn_GL456370
chrUn_GL456372
chrUn_GL456378
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304


Kmt2a:   0%|          | 0/41 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chr4_JH584292_random
chr5_JH584299_random
chrM
chrUn_GL456359
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random


Mef2c:   0%|          | 0/33 [00:00<?, ?it/s]

chr4_GL456216_random
chr4_JH584292_random
chrM
chrUn_GL456367
chrUn_GL456370
chrUn_GL456383
chrUn_GL456387
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304


Men1:   0%|          | 0/29 [00:00<?, ?it/s]

chr4_GL456216_random
chrUn_GL456370
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304


Myc:   0%|          | 0/48 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chr4_JH584292_random
chr4_JH584293_random
chr4_JH584294_random
chrM
chrUn_GL456239
chrUn_GL456359
chrUn_GL456367
chrUn_GL456370
chrUn_GL456372
chrUn_GL456381
chrUn_GL456383
chrUn_GL456387
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random
chrY_JH584300_random


Nbn:   0%|          | 0/39 [00:00<?, ?it/s]

chr4_GL456216_random
chr4_GL456350_random
chr4_JH584292_random
chr4_JH584293_random
chr4_JH584294_random
chr5_JH584296_random
chrM
chrUn_GL456370
chrUn_GL456381
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random


Nipbl:   0%|          | 0/50 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chr4_GL456350_random
chr4_JH584292_random
chr4_JH584295_random
chr5_GL456354_random
chr5_JH584296_random
chr5_JH584297_random
chr5_JH584298_random
chr5_JH584299_random
chrM
chrUn_GL456239
chrUn_GL456359
chrUn_GL456370
chrUn_GL456372
chrUn_GL456378
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random
chrY_JH584300_random


Pax5:   0%|          | 0/37 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456213_random
chr4_GL456216_random
chr5_JH584297_random
chr5_JH584298_random
chr5_JH584299_random
chrM
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304


Paxip1:   0%|          | 0/38 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chrM
chrUn_GL456359
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304
chrY_JH584302_random


Rad21:   0%|          | 0/54 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chr4_GL456350_random
chr4_JH584292_random
chr4_JH584293_random
chr4_JH584294_random
chr5_GL456354_random
chr5_JH584296_random
chr5_JH584297_random
chr5_JH584298_random
chr5_JH584299_random
chrM
chrUn_GL456359
chrUn_GL456366
chrUn_GL456367
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456385
chrUn_GL456387
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random
chrY_JH584300_random
chrY_JH584303_random


Rad51:   0%|          | 0/41 [00:00<?, ?it/s]

chr1_GL456211_random
chr4_GL456216_random
chr4_JH584294_random
chr5_JH584298_random
chr5_JH584299_random
chrM
chrUn_GL456367
chrUn_GL456370
chrUn_GL456372
chrUn_GL456378
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random
chrY_JH584301_random


Rag1:   0%|          | 0/49 [00:00<?, ?it/s]

chr1_GL456221_random
chr4_GL456216_random
chr4_GL456350_random
chr4_JH584293_random
chr4_JH584294_random
chr5_JH584296_random
chr5_JH584297_random
chr5_JH584298_random
chr5_JH584299_random
chr7_GL456219_random
chrM
chrUn_GL456239
chrUn_GL456370
chrUn_GL456378
chrUn_GL456379
chrUn_GL456381
chrUn_GL456382
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random
chrY_JH584300_random
chrY_JH584301_random


Rag2:   0%|          | 0/43 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chr4_GL456350_random
chr4_JH584295_random
chr5_JH584296_random
chr5_JH584297_random
chrM
chrUn_GL456359
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random


Rpa1:   0%|          | 0/53 [00:00<?, ?it/s]

chr1_GL456211_random
chr1_GL456212_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chr4_GL456350_random
chr4_JH584293_random
chr4_JH584294_random
chr5_JH584298_random
chr5_JH584299_random
chr7_GL456219_random
chrM
chrUn_GL456239
chrUn_GL456367
chrUn_GL456370
chrUn_GL456372
chrUn_GL456378
chrUn_GL456379
chrUn_GL456382
chrUn_GL456383
chrUn_GL456387
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random
chrY_JH584300_random
chrY_JH584301_random
chrY_JH584303_random


Rpa2:   0%|          | 0/38 [00:00<?, ?it/s]

chr4_GL456216_random
chr5_JH584299_random
chrM
chrUn_GL456239
chrUn_GL456367
chrUn_GL456370
chrUn_GL456372
chrUn_GL456378
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random


Runx1:   0%|          | 0/34 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456213_random
chr4_GL456216_random
chrM
chrUn_GL456367
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304


Smarca4:   0%|          | 0/44 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chr4_JH584293_random
chr4_JH584295_random
chr5_JH584296_random
chr5_JH584297_random
chr5_JH584299_random
chrM
chrUn_GL456359
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random


Smc1a:   0%|          | 0/37 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456221_random
chr4_GL456216_random
chr4_JH584292_random
chrUn_GL456359
chrUn_GL456370
chrUn_GL456381
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304


Smc3:   0%|          | 0/41 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456221_random
chr4_GL456216_random
chr4_JH584292_random
chr4_JH584295_random
chrM
chrUn_GL456359
chrUn_GL456366
chrUn_GL456367
chrUn_GL456370
chrUn_GL456381
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304


Smc5:   0%|          | 0/33 [00:00<?, ?it/s]

chr4_GL456216_random
chrM
chrUn_GL456370
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random
chrY_JH584302_random


Sox4:   0%|          | 0/32 [00:00<?, ?it/s]

chr4_GL456216_random
chr4_JH584292_random
chrM
chrUn_GL456370
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304


Spi1:   0%|          | 0/51 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chr4_GL456350_random
chr4_JH584293_random
chr4_JH584294_random
chr4_JH584295_random
chr5_GL456354_random
chr5_JH584299_random
chr7_GL456219_random
chrM
chrUn_GL456359
chrUn_GL456370
chrUn_GL456372
chrUn_GL456378
chrUn_GL456383
chrUn_GL456385
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random
chrY_JH584300_random
chrY_JH584301_random
chrY_JH584302_random


Stat1:   0%|          | 0/27 [00:00<?, ?it/s]

chrM
chrUn_GL456370
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456396
chrUn_JH584304


Stat2:   0%|          | 0/32 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456221_random
chr4_GL456216_random
chrM
chrUn_GL456370
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456396
chrUn_JH584304


Supt5:   0%|          | 0/30 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456221_random
chr4_GL456216_random
chrM
chrUn_GL456370
chrUn_GL456372
chrUn_GL456389
chrUn_GL456392
chrUn_GL456396
chrUn_JH584304


Tcf3:   0%|          | 0/38 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chr4_JH584292_random
chrM
chrUn_GL456367
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304


Trim28:   0%|          | 0/32 [00:00<?, ?it/s]

chr4_GL456216_random
chrM
chrUn_GL456370
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304


Wdr5:   0%|          | 0/39 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chr4_JH584292_random
chrM
chrUn_GL456359
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random


Zbtb17:   0%|          | 0/38 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456212_random
chr1_GL456221_random
chr4_GL456216_random
chr4_JH584292_random
chr5_JH584299_random
chr7_GL456219_random
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random


Zmynd8:   0%|          | 0/34 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr4_GL456216_random
chrM
chrUn_GL456367
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304


his.bed:   0%|          | 0/40 [00:00<?, ?it/s]

H2A.XS139ph:   0%|          | 0/40 [00:00<?, ?it/s]

chr1_GL456212_random
chr4_GL456216_random
chr4_JH584294_random
chr5_JH584299_random
chrM
chrUn_GL456239
chrUn_GL456370
chrUn_GL456372
chrUn_GL456382
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random
chrY_JH584300_random


H2A.Z:   0%|          | 0/45 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456221_random
chr4_GL456216_random
chr4_GL456350_random
chr4_JH584293_random
chr4_JH584294_random
chr5_JH584296_random
chr5_JH584297_random
chr5_JH584299_random
chrM
chrUn_GL456359
chrUn_GL456370
chrUn_GL456372
chrUn_GL456378
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random


H2AK119Ub:   0%|          | 0/38 [00:00<?, ?it/s]

chr1_GL456210_random
chr4_GL456216_random
chr4_JH584292_random
chrM
chrUn_GL456239
chrUn_GL456367
chrUn_GL456370
chrUn_GL456372
chrUn_GL456381
chrUn_GL456383
chrUn_GL456387
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304


H2AK9ac:   0%|          | 0/44 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456221_random
chr4_GL456216_random
chr4_GL456350_random
chr4_JH584293_random
chr4_JH584294_random
chr4_JH584295_random
chr5_JH584296_random
chr5_JH584297_random
chrM
chrUn_GL456359
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random


H2BK120ac:   0%|          | 0/39 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456221_random
chr4_GL456216_random
chr4_JH584295_random
chr5_JH584297_random
chrUn_GL456359
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random


H2BK12ac:   0%|          | 0/33 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456221_random
chr4_GL456216_random
chrUn_GL456370
chrUn_GL456372
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random


H2BK20ac:   0%|          | 0/25 [00:00<?, ?it/s]

chrUn_GL456370
chrUn_GL456389
chrUn_GL456392
chrUn_GL456396
chrUn_JH584304


H2BK5ac:   0%|          | 0/39 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456221_random
chr4_GL456216_random
chr4_JH584295_random
chr5_JH584297_random
chrUn_GL456370
chrUn_GL456372
chrUn_GL456378
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random
chrY_JH584300_random


H3:   0%|          | 0/38 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456221_random
chr4_GL456216_random
chr5_JH584297_random
chrM
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random


H3K14ac:   0%|          | 0/41 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456221_random
chr4_GL456216_random
chr4_GL456350_random
chr4_JH584295_random
chr5_JH584296_random
chr5_JH584297_random
chrUn_GL456359
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random


H3K18ac:   0%|          | 0/47 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chr4_JH584292_random
chr4_JH584293_random
chr4_JH584294_random
chr4_JH584295_random
chrM
chrUn_GL456239
chrUn_GL456359
chrUn_GL456360
chrUn_GL456366
chrUn_GL456370
chrUn_GL456372
chrUn_GL456381
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304


H3K23ac:   0%|          | 0/32 [00:00<?, ?it/s]

chr1_GL456211_random
chr4_GL456216_random
chr4_JH584292_random
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304


H3K27ac:   0%|          | 0/55 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chr4_GL456350_random
chr4_JH584292_random
chr4_JH584293_random
chr4_JH584294_random
chr4_JH584295_random
chr5_GL456354_random
chr5_JH584296_random
chr5_JH584297_random
chr5_JH584299_random
chrM
chrUn_GL456239
chrUn_GL456359
chrUn_GL456367
chrUn_GL456368
chrUn_GL456370
chrUn_GL456372
chrUn_GL456378
chrUn_GL456381
chrUn_GL456383
chrUn_GL456387
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random


H3K27me1:   0%|          | 0/40 [00:00<?, ?it/s]

chr1_GL456211_random
chr4_GL456216_random
chr4_JH584292_random
chr5_JH584299_random
chrUn_GL456239
chrUn_GL456367
chrUn_GL456368
chrUn_GL456370
chrUn_GL456372
chrUn_GL456378
chrUn_GL456381
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304


H3K27me2:   0%|          | 0/36 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr4_GL456216_random
chr4_JH584292_random
chr4_JH584295_random
chrUn_GL456359
chrUn_GL456367
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304


H3K27me3:   0%|          | 0/46 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chr4_GL456350_random
chr4_JH584292_random
chr4_JH584293_random
chr4_JH584294_random
chrM
chrUn_GL456239
chrUn_GL456359
chrUn_GL456360
chrUn_GL456370
chrUn_GL456372
chrUn_GL456378
chrUn_GL456381
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304


H3K36ac:   0%|          | 0/35 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456221_random
chr4_GL456216_random
chr4_JH584295_random
chr5_JH584297_random
chrUn_GL456359
chrUn_GL456370
chrUn_GL456372
chrUn_GL456389
chrUn_GL456392
chrUn_GL456396
chrUn_JH584304


H3K36me1:   0%|          | 0/31 [00:00<?, ?it/s]

chr4_GL456216_random
chr4_JH584292_random
chr4_JH584294_random
chrUn_GL456370
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304


H3K36me2:   0%|          | 0/27 [00:00<?, ?it/s]

chrUn_GL456370
chrUn_GL456372
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456396
chrUn_JH584304


H3K36me3:   0%|          | 0/38 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chr4_JH584292_random
chr4_JH584295_random
chrUn_GL456367
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304


H3K4ac:   0%|          | 0/33 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456221_random
chr4_GL456216_random
chrUn_GL456359
chrUn_GL456370
chrUn_GL456372
chrUn_GL456389
chrUn_GL456392
chrUn_GL456396
chrUn_JH584304


H3K4me1:   0%|          | 0/51 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chr4_GL456350_random
chr4_JH584292_random
chr4_JH584293_random
chr4_JH584294_random
chr5_GL456354_random
chr5_JH584296_random
chr5_JH584297_random
chrM
chrUn_GL456239
chrUn_GL456359
chrUn_GL456367
chrUn_GL456370
chrUn_GL456372
chrUn_GL456381
chrUn_GL456383
chrUn_GL456387
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random


H3K4me2:   0%|          | 0/49 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chr4_GL456350_random
chr4_JH584292_random
chr4_JH584293_random
chr4_JH584294_random
chr4_JH584295_random
chr5_JH584296_random
chr5_JH584297_random
chrM
chrUn_GL456359
chrUn_GL456370
chrUn_GL456372
chrUn_GL456378
chrUn_GL456382
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random
chrY_JH584300_random


H3K4me3:   0%|          | 0/57 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456221_random
chr4_GL456216_random
chr4_GL456350_random
chr4_JH584292_random
chr4_JH584293_random
chr4_JH584294_random
chr4_JH584295_random
chr5_JH584296_random
chr5_JH584297_random
chr5_JH584299_random
chr7_GL456219_random
chrM
chrUn_GL456239
chrUn_GL456359
chrUn_GL456367
chrUn_GL456368
chrUn_GL456370
chrUn_GL456372
chrUn_GL456378
chrUn_GL456381
chrUn_GL456382
chrUn_GL456383
chrUn_GL456387
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random
chrY_JH584300_random
chrY_JH584301_random


H3K56ac:   0%|          | 0/37 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456221_random
chr4_GL456216_random
chr4_JH584292_random
chr4_JH584295_random
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random


H3K79me1:   0%|          | 0/35 [00:00<?, ?it/s]

chr4_GL456216_random
chr4_JH584295_random
chr5_GL456354_random
chr5_JH584296_random
chr5_JH584297_random
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304


H3K79me2:   0%|          | 0/34 [00:00<?, ?it/s]

chr1_GL456210_random
chr4_GL456216_random
chr4_JH584292_random
chrUn_GL456370
chrUn_GL456372
chrUn_GL456378
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304


H3K79me3:   0%|          | 0/35 [00:00<?, ?it/s]

chr1_GL456213_random
chr4_GL456216_random
chr4_JH584292_random
chr4_JH584295_random
chrUn_GL456367
chrUn_GL456370
chrUn_GL456378
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304


H3K9ac:   0%|          | 0/44 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456221_random
chr4_GL456216_random
chr4_GL456350_random
chr4_JH584293_random
chr4_JH584294_random
chr4_JH584295_random
chr5_JH584296_random
chr5_JH584297_random
chrM
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random
chrY_JH584301_random


H3K9me1:   0%|          | 0/34 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456221_random
chr4_GL456216_random
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304


H3K9me2:   0%|          | 0/41 [00:00<?, ?it/s]

chr1_GL456211_random
chr4_GL456216_random
chr4_JH584292_random
chr4_JH584295_random
chrM
chrUn_GL456239
chrUn_GL456367
chrUn_GL456368
chrUn_GL456370
chrUn_GL456372
chrUn_GL456378
chrUn_GL456381
chrUn_GL456383
chrUn_GL456387
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304


H3K9me3:   0%|          | 0/59 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chr4_GL456350_random
chr4_JH584292_random
chr4_JH584293_random
chr4_JH584294_random
chr4_JH584295_random
chr5_GL456354_random
chr5_JH584296_random
chr5_JH584297_random
chr5_JH584298_random
chr5_JH584299_random
chr7_GL456219_random
chrUn_GL456239
chrUn_GL456359
chrUn_GL456360
chrUn_GL456366
chrUn_GL456367
chrUn_GL456368
chrUn_GL456370
chrUn_GL456372
chrUn_GL456378
chrUn_GL456381
chrUn_GL456383
chrUn_GL456387
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random
chrY_JH584303_random


H3S28ph:   0%|          | 0/31 [00:00<?, ?it/s]

chr4_GL456216_random
chrM
chrUn_GL456370
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304


H4K12ac:   0%|          | 0/44 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456221_random
chr4_GL456216_random
chr4_GL456350_random
chr4_JH584293_random
chr4_JH584294_random
chr4_JH584295_random
chr5_JH584296_random
chr5_JH584297_random
chr5_JH584299_random
chrUn_GL456359
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random


H4K16ac:   0%|          | 0/38 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456221_random
chr4_GL456216_random
chr4_JH584292_random
chrUn_GL456359
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304


H4K20me1:   0%|          | 0/32 [00:00<?, ?it/s]

chr4_GL456216_random
chr4_JH584292_random
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304


H4K20me3:   0%|          | 0/30 [00:00<?, ?it/s]

chr4_GL456216_random
chr4_JH584292_random
chrUn_GL456367
chrUn_GL456370
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304


H4K5ac:   0%|          | 0/24 [00:00<?, ?it/s]

chrUn_GL456370
chrUn_GL456389
chrUn_GL456392
chrUn_GL456396
chrUn_JH584304


H4K8ac:   0%|          | 0/42 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456221_random
chr4_GL456216_random
chr4_GL456350_random
chr4_JH584293_random
chr5_JH584297_random
chrUn_GL456359
chrUn_GL456370
chrUn_GL456372
chrUn_GL456378
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random


H4K91ac:   0%|          | 0/38 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456221_random
chr4_GL456216_random
chr5_JH584296_random
chr5_JH584297_random
chrUn_GL456370
chrUn_GL456372
chrUn_GL456383
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random


pol.bed:   0%|          | 0/1 [00:00<?, ?it/s]

RNA:   0%|          | 0/51 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chr4_GL456350_random
chr4_JH584292_random
chr4_JH584293_random
chr4_JH584294_random
chr4_JH584295_random
chr5_JH584297_random
chrM
chrUn_GL456239
chrUn_GL456359
chrUn_GL456367
chrUn_GL456370
chrUn_GL456372
chrUn_GL456381
chrUn_GL456383
chrUn_GL456387
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random
chrY_JH584300_random


dns.bed:   0%|          | 0/1 [00:00<?, ?it/s]

DNase-Seq:   0%|          | 0/50 [00:00<?, ?it/s]

chr1_GL456210_random
chr1_GL456211_random
chr1_GL456212_random
chr1_GL456213_random
chr1_GL456221_random
chr4_GL456216_random
chr4_GL456350_random
chr4_JH584292_random
chr4_JH584293_random
chr4_JH584294_random
chr4_JH584295_random
chr5_JH584297_random
chrM
chrUn_GL456239
chrUn_GL456359
chrUn_GL456370
chrUn_GL456372
chrUn_GL456381
chrUn_GL456383
chrUn_GL456387
chrUn_GL456389
chrUn_GL456390
chrUn_GL456392
chrUn_GL456393
chrUn_GL456394
chrUn_GL456396
chrUn_JH584304
chrX_GL456233_random
chrY_JH584300_random


### Adding group to files' names

In [None]:
factors_df = pd.DataFrame(factors, columns=['Group', 'Factor'])
factors_df.to_csv('/content/drive/MyDrive/Triplexes/mm/data/factors_df.bed')
factors_df.head()

Unnamed: 0,Group,Factor
0,tf,Rag1
1,tf,Rpa1
2,tf,Nipbl
3,tf,Spi1
4,tf,Rad51


In [None]:
os.chdir('/content/drive/MyDrive/Triplexes/mm/data/omics_sparse/')
for file in os.listdir('/content/drive/MyDrive/Triplexes/mm/data/omics_sparse/'):
  if file[-3:] == 'pkl':
    group = factors_df.loc[factors_df.Factor == file[:-4], 'Group'].values[0]
    name = factors_df.loc[factors_df.Factor == file[:-4], 'Factor'].values[0]
    new_name = group + '_' + name + '.pkl'
    os.rename(file, new_name)

### Comparison of human and mouse omics features

In [None]:
Omics_features_h = [feature[:-4] for feature in os.listdir('/content/drive/MyDrive/Triplexes/data/omics_sparse/') if feature[-3:] == 'pkl']

In [None]:
Omics_features_m = [feature[:-4] for feature in os.listdir('/content/drive/MyDrive/Triplexes/mm/data/omics_sparse/') if feature[-3:] == 'pkl']

In [None]:
Omics_features_h = set(Omics_features_h)
Omics_features_m = set(Omics_features_m)

In [None]:
print('human:', len(Omics_features_h))
print('mouse:', len(Omics_features_m))
print(len(Omics_features_h & Omics_features_m))
print((Omics_features_h & Omics_features_m)) #from 20 features excluding tf

human: 149
mouse: 99
17
{'his_H3K79me2', 'his_H3K36me3', 'pol_RNA', 'his_H3K27ac', 'his_H3K9ac', 'his_H3K4me1', 'his_H3', 'dns_DNase-Seq', 'tf_Epitope', 'his_H3K4me2', 'his_H2BK20ac', 'his_H2A.XS139ph', 'his_H2A.Z', 'his_H4K20me1', 'his_H3K4me3', 'his_H3K9me3', 'his_H3K27me3'}
