In [1]:
import pandas as pd
import csv
import numpy as np
import seaborn as sns
from tqdm import tqdm
import math
from scipy.stats import mannwhitneyu
import time
import os

In [2]:
######## DHS----------------
def isInRange(n, tbl):
    #https://stackoverflow.com/questions/9019581/what-does-numpy-apply-along-axis-perform-exactly
    #https://stackoverflow.com/questions/71377816/check-if-value-is-between-two-values-in-numpy-array
    # n: is sgRNA location in the tiling/phenotypic dataset
    # tbl: numpy file of dataframe of bigbed with chro, start, and end of peak filtered_peak_file.to_numpy()
    #apply_along_axis applies the supplied function along 1D slices of the input array, 
    #with the slices taken along the axis you specify. 
    return sum(np.apply_along_axis(lambda row: row[0] <= n <= row[1], 1, tbl)\
        .tolist())

def dhs_peak_overlap(actual_tiling_gene , dhs_chromo_txt, sgrna_location_col):
    # This function find whether guide overlaps with a peak in one chromosome 
    
    # actual_tiling: CRISPRi data (or any CRISPR data) with at least three columns 
    #                       [a location to indicate sgRNA, Gene name, Chromosome]
    #                       Note this DOES NOT require unique sgRNA location 
    # dhs_chromo_txt: ATAC seq from ENCODE in bigBed format and read through pyBigWig
    # chrom: string of chromosome such as 'chr1'
    # sgrna_location_col: string of column for sgRNA location
    
    # returns a dataframe, same as the actual_tilling but with an additional column overlap with peak to indicate 
    # whether theres an overlap between start and end of a peak and sgRNA location for one chromosome
    
    #----------------------------------------------------------------------------------------------------
        
    # select unique pam coord and chr and gene - remove duplicate 
    # tiling_lib: CRISPRi data (or any CRISPR data) with at least three columns 
    #                       [a location to indicate sgRNA, Gene name, Chromosome]
    #                       Note this should has unique sgRNA location 
    tiling_lib = actual_tiling_gene.copy()
    
    # change from string to int
    tiling_lib[sgrna_location_col] = list(map(int,tiling_lib[sgrna_location_col]))
    
    #obtain the smallest pam coord in a specific chromosome
    smallest_pam_coord = tiling_lib[sgrna_location_col].min()
    #obtain the largest pam coord in a specific chromosome
    largestest_pam_coord = tiling_lib[sgrna_location_col].max()
    
    #subset chrom number and having the end coord to be larger than the smallest pam coord
    
    # Retrieving bigBed entries in https://github.com/deeptools/pyBigWig explains
    # filtered_peak_file returns a list of tuple of (Start position in chromosome, End position in chromosome)
    # filtered_peak_file = dhs_chromo_txt.entries(chrom, smallest_pam_coord, largestest_pam_coord, withString=False) 
    filtered_peak_file = dhs_chromo_txt[(dhs_chromo_txt["start"] >= int(smallest_pam_coord)) & (dhs_chromo_txt["end"] <= int(largestest_pam_coord))].copy()
    
    # only kept unique peaks and make it into dataframe because of https://www.biostars.org/p/464618/
    # filtered_peak_file = pd.DataFrame(set(filtered_peak_file))
    filtered_peak_file = filtered_peak_file.drop_duplicates(subset = ["start", "end"])
    
    if len(filtered_peak_file) == 0:
        print('This gene does not has any peaks in DHS')
        tiling_lib.loc[:, 'overlap with peak'] = 0
    else:
    # iterating over every single sgRNA location in the tiling library/a dataset with phenotypic data
        peak_list = [isInRange(x, filtered_peak_file.to_numpy()) 
                     for x in np.nditer(tiling_lib[sgrna_location_col].to_numpy())]  

        #https://stackoverflow.com/questions/32573452/settingwithcopywarning-even-when-using-locrow-indexer-col-indexer-value
        tiling_lib.loc[:, 'overlap with peak'] = peak_list

    return tiling_lib
def get_gene_chr_combo(library_path):
    dir_list = os.listdir(library_path)
    file_split_remove_csv = [file.split('.')[0] for file in dir_list if file.endswith('.csv')]# remove files that doesnt end with csv
    files_combo = [file.split('_') for file in file_split_remove_csv]
    return(files_combo)

def dhs_all_genes(file_combo,library_gene_path, dhs_chrom_path, sgrna_location_col = 'sgRNA \'Cut\' Position'):
    library_dhs = pd.DataFrame()
    for comb in tqdm(file_combo ,desc='number of gene symbol'):
        print(comb)
        chrom_num = comb[0]
        gene = comb[1]
        chr_gene_library = pd.read_csv(f'{library_gene_path}{chrom_num}_{gene}.csv')
        chr_gene_library['chromosome'] = chrom_num
        chr_gene_library['Target Gene Symbol'] = gene
        chr_dhs = pd.read_csv(f'{dhs_chrom_path}{chrom_num}.csv')
        chr_dhs['chromosome'] = chrom_num
        gene_check = dhs_peak_overlap(chr_gene_library , chr_dhs, sgrna_location_col = sgrna_location_col)
        library_dhs = pd.concat([library_dhs,gene_check])
    return library_dhs

def standardize_chromosome(chromosome):
    chromosome = str(chromosome).replace('.0', '')
    if not chromosome.startswith('chr'):
        chromosome = 'chr' + chromosome
    return chromosome


In [3]:
inpath = '../../Data/CleanedExternalData/'
NewDatasetCombine = pd.read_csv(inpath + 'DatasetCombineCleaned_v3.csv', low_memory=False)
NewDatasetCombine['chromosome'] = NewDatasetCombine['chromosome'].apply(standardize_chromosome)

NewDatasetCombine['chromosome'] = NewDatasetCombine['chromosome'].replace('chr24','chrY')
NewDatasetCombine['chromosome'] = NewDatasetCombine['chromosome'].replace('chr23','chrX')

NewDatasetCombine['orginal phenotype'] = NewDatasetCombine['Phenotype scores-ave_Rep1_Rep2'].combine_first(NewDatasetCombine['z_scored_avg_lfc']).combine_first(NewDatasetCombine['rhoSignChangedAndAvged'])
NewDatasetCombine = NewDatasetCombine[['Target Gene Symbol', 'chromosome', 'sgRNA Context Sequence','sgRNA \'Cut\' Position',
       'condition', 'Domain', 'DataSet','orginal phenotype']]

In [4]:
processedCRISPRiOutpath = '../../Data/DHS/SectionedCRISPRi/'

CRISPRi_fileCombo = get_gene_chr_combo(processedCRISPRiOutpath)
len(CRISPRi_fileCombo)


658

In [5]:
dhs_out = '../../Data/DHS/SectionedCRISPRi/'
peakOverlapOut = '../../Data/DHS/PeakOverlap/'

In [6]:
CRISPRiFiles_DHS_HCT116 = dhs_all_genes(CRISPRi_fileCombo,processedCRISPRiOutpath, dhs_out+'HCT116/')

number of gene symbol:   1%|▏                   | 6/658 [00:00<00:11, 56.16it/s]

['chr14', 'HAUS4']
['chr19', 'OR10H3']
This gene does not has any peaks in DHS
['chr11', 'SF3B2']
['chr17', 'OR4D1']
This gene does not has any peaks in DHS
['chr7', 'MRPS24']
['chr20', 'DDX27']
['chr9', 'DCTN3']
['chr12', 'POP5']
['chr8', 'POLR2K']
['chrX', 'SAGE1']
This gene does not has any peaks in DHS
['chr19', 'MRPL34']


number of gene symbol:   3%|▌                  | 18/658 [00:00<00:13, 48.85it/s]

['chr11', 'RAD9A']
['chr2', 'MRPL19']
['chr1', 'FDPS']
['chr1', 'HEATR1']
['chr17', 'COX10']
['chr7', 'TRRAP']
['chr19', 'CYP2A13']
This gene does not has any peaks in DHS
['chr13', 'ARGLU1']
['chr13', 'TM9SF2']


number of gene symbol:   5%|▊                  | 30/658 [00:00<00:12, 51.74it/s]

['chr17', 'C17orf49']
['chr15', 'MFAP1']
['chrX', 'TEX13A']
This gene does not has any peaks in DHS
['chr16', 'RSL1D1']
['chr12', 'APOBEC1']
This gene does not has any peaks in DHS
['chr6', 'RPL7L1']
['chr4', 'DSPP']
This gene does not has any peaks in DHS
['chr6', 'DHX16']
['chr4', 'SDAD1']
['chr2', 'CHMP3']
['chr19', 'ZNF574']
['chr1', 'MTOR']


number of gene symbol:   5%|█                  | 36/658 [00:00<00:12, 51.78it/s]

['chr2', 'DYNC1I2']
['chr5', 'RPS14']
['chr10', 'EIF3A']
['chr21', 'KRTAP13-3']
This gene does not has any peaks in DHS
['chr14', 'YY1']
['chr7', 'MRPS33']
['chr11', 'TAF10']
['chr20', 'CSE1L']
['chr1', 'SF3B4']
['chr1', 'CCT3']


number of gene symbol:   7%|█▎                 | 47/658 [00:00<00:13, 46.45it/s]

['chr5', 'ZMAT2']
['chr8', 'ATP6V1H']
['chr19', 'ILF3']
['chr7', 'GET4']
['chr12', 'FGF6']
This gene does not has any peaks in DHS
['chr15', 'KNL1']
['chr12', 'GCN1']
['chr2', 'NCAPH']
['chr6', 'RPP40']


number of gene symbol:   9%|█▋                 | 57/658 [00:01<00:12, 46.56it/s]

['chr1', 'VPS72']
['chr11', 'OR51D1']
['chr11', 'OR8G5']
This gene does not has any peaks in DHS
['chr17', 'CDK12']
['chr1', 'PMVK']
['chr6', 'POLR1H']
['chr12', 'TAS2R50']
This gene does not has any peaks in DHS
['chr17', 'PIGS']
['chr17', 'KRT28']
This gene does not has any peaks in DHS
['chr9', 'SPATA31D1']
This gene does not has any peaks in DHS
['chr7', 'SSBP1']
['chr2', 'MRPL33']


number of gene symbol:  11%|██                 | 71/658 [00:01<00:10, 56.52it/s]

['chr11', 'RRM1']
['chr11', 'OR2D2']
This gene does not has any peaks in DHS
['chr3', 'RPL14']
['chr8', 'CYP11B2']
This gene does not has any peaks in DHS
['chr1', 'CLSPN']
['chrX', 'BEND2']
This gene does not has any peaks in DHS
['chr4', 'COPS4']
['chr1', 'MCL1']
['chr17', 'CDC27']
['chr19', 'ERCC2']
['chr7', 'TAS2R40']
This gene does not has any peaks in DHS
['chr20', 'PRPF6']
['chr21', 'U2AF1']
This gene does not has any peaks in DHS
['chr14', 'C14orf178']


number of gene symbol:  13%|██▍                | 84/658 [00:01<00:10, 54.07it/s]

['chr19', 'CACTIN']
['chr1', 'TBCE']
['chr17', 'INTS2']
['chr18', 'TRAPPC8']
['chr4', 'NDST4']
This gene does not has any peaks in DHS
['chr17', 'LUC7L3']
['chr2', 'IMMT']
['chr13', 'TUBGCP3']
['chr11', 'OR52B6']
This gene does not has any peaks in DHS
['chr2', 'SPC25']
['chr19', 'SPC24']


number of gene symbol:  15%|██▊                | 96/658 [00:01<00:11, 50.71it/s]

['chr2', 'XRCC5']
['chr20', 'DHX35']
['chr10', 'TLX1']
This gene does not has any peaks in DHS
['chr17', 'SIRT7']
['chr20', 'EIF6']
['chr2', 'WDR33']
['chr10', 'SMNDC1']
['chr6', 'RPF2']
['chr10', 'SEC24C']


number of gene symbol:  16%|██▊               | 102/658 [00:02<00:11, 48.81it/s]

['chr16', 'COG8']
['chr3', 'SEC13']
['chr3', 'EIF4G1']
['chr14', 'RNASE8']
This gene does not has any peaks in DHS
['chr1', 'MRPS15']
['chr2', 'ERCC3']
['chr17', 'KRTAP4-7']
['chr1', 'MRPS14']
['chr1', 'RPA2']
['chr2', 'PSMD1']


number of gene symbol:  17%|███               | 113/658 [00:02<00:11, 49.07it/s]

['chr11', 'INTS4']
['chr21', 'CCT8']
['chr1', 'CDC20']
['chr20', 'MYBL2']
['chr5', 'HMGCS1']
['chr1', 'RPL5']
['chr13', 'COG3']
['chr1', 'CDCP2']
This gene does not has any peaks in DHS
['chr17', 'POLG2']
['chr10', 'RPS24']
['chr16', 'CACNG3']
This gene does not has any peaks in DHS
['chr7', 'RFC2']


number of gene symbol:  19%|███▍              | 125/658 [00:02<00:10, 50.75it/s]

['chr1', 'C8B']
This gene does not has any peaks in DHS
['chr5', 'BNIP1']
['chr20', 'RPN2']
['chr18', 'SKA1']
['chr2', 'CWC22']
['chr9', 'GNE']
['chr10', 'BMS1']
['chr10', 'POLR3A']
['chr3', 'TSEN2']
['chr11', 'OR5W2']
This gene does not has any peaks in DHS
['chr7', 'DLD']


number of gene symbol:  21%|███▋              | 137/658 [00:02<00:10, 50.08it/s]

['chr9', 'DNLZ']
['chr17', 'MED9']
['chr14', 'EIF2B2']
['chr17', 'UTP6']
['chr9', 'IFNA16']
This gene does not has any peaks in DHS
['chr1', 'INTS3']
['chr2', 'UGP2']
['chr1', 'PPP1R8']
['chr1', 'BCAS2']
['chr16', 'SRCAP']


number of gene symbol:  22%|███▉              | 143/658 [00:02<00:12, 42.08it/s]

['chr2', 'VPS54']
['chr17', 'BIRC5']
['chr11', 'ST3GAL4']
['chr14', 'PRMT5']
['chr14', 'TEDC1']
['chr11', 'MRGPRD']
This gene does not has any peaks in DHS
['chr17', 'NMT1']


number of gene symbol:  24%|████▏             | 155/658 [00:03<00:10, 46.97it/s]

['chr12', 'TIMELESS']
['chr1', 'LIN28A']
This gene does not has any peaks in DHS
['chr20', 'SEC23B']
['chr5', 'WDR55']
['chr9', 'IFNA17']
This gene does not has any peaks in DHS
['chr2', 'MRPL35']
['chr14', 'CEBPE']
This gene does not has any peaks in DHS
['chr12', 'TUBA1B']
['chr8', 'PUF60']
['chr20', 'DEFB126']
This gene does not has any peaks in DHS
['chr6', 'TAAR6']
This gene does not has any peaks in DHS
['chr2', 'NCL']


number of gene symbol:  25%|████▌             | 165/658 [00:03<00:10, 47.64it/s]

['chr1', 'SNIP1']
['chr2', 'NRBP1']
['chr3', 'PSMD6']
['chr20', 'WFDC9']
This gene does not has any peaks in DHS
['chr2', 'DDX18']
['chr5', 'MED7']
['chr5', 'TTC1']
['chr11', 'SART1']
['chr22', 'EIF3D']
['chr11', 'BUD13']
['chr6', 'LSM2']


number of gene symbol:  27%|████▊             | 176/658 [00:03<00:10, 47.44it/s]

['chr10', 'SUPV3L1']
['chr3', 'SRPRB']
['chr5', 'MRPS30']
['chr2', 'MRPS5']
['chr6', 'RPP21']
['chr5', 'DDX41']
['chr1', 'NUF2']
['chr10', 'GDF2']
This gene does not has any peaks in DHS
['chr12', 'IAPP']
This gene does not has any peaks in DHS
['chr20', 'DSN1']
['chr19', 'MBD3L2']
This gene does not has any peaks in DHS
['chr7', 'ORC5']


number of gene symbol:  29%|█████▏            | 189/658 [00:03<00:08, 52.44it/s]

['chr1', 'RPS27']
['chr17', 'PRPF8']
['chr6', 'TTK']
['chr7', 'EXOC4']
['chr14', 'MIS18BP1']
['chr9', 'NUP214']
['chr10', 'WDR11']
['chrX', 'CXorf66']
This gene does not has any peaks in DHS
['chr9', 'IARS1']
['chrX', 'RHOXF2']
This gene does not has any peaks in DHS
['chr1', 'DAP3']
['chr8', 'DEFB106A']
This gene does not has any peaks in DHS


number of gene symbol:  30%|█████▎            | 196/658 [00:03<00:08, 56.43it/s]

['chr8', 'CYP7A1']
This gene does not has any peaks in DHS
['chr5', 'MRPL22']
['chr10', 'TUBGCP2']
['chr6', 'GJA10']
This gene does not has any peaks in DHS
['chr10', 'SAR1A']
['chr17', 'RPL23']
['chr19', 'CARM1']
['chr7', 'POLR1F']


number of gene symbol:  32%|█████▋            | 209/658 [00:04<00:08, 52.42it/s]

['chr2', 'SRBD1']
['chr2', 'RAB1A']
['chr22', 'POTEH']
This gene does not has any peaks in DHS
['chr4', 'CCKAR']
This gene does not has any peaks in DHS
['chrX', 'LUZP4']
This gene does not has any peaks in DHS
['chr20', 'CST4']
This gene does not has any peaks in DHS
['chr1', 'RPF1']
['chr5', 'IL13']
This gene does not has any peaks in DHS
['chr12', 'ARL1']
['chr12', 'DDX23']
['chr1', 'GPN2']
['chr5', 'MRPS27']
['chr2', 'POLR1B']


number of gene symbol:  34%|██████            | 221/658 [00:04<00:08, 48.97it/s]

['chr8', 'INTS8']
['chr2', 'STPG4']
This gene does not has any peaks in DHS
['chr5', 'PHAX']
['chr2', 'ITGAV']
['chr14', 'DYNC1H1']
['chr1', 'RABGGTB']
['chr6', 'MCM3']
['chr9', 'MRPS2']


number of gene symbol:  35%|██████▎           | 233/658 [00:04<00:08, 47.44it/s]

['chr1', 'SNRPE']
['chr14', 'PSMC1']
['chr1', 'PARS2']
['chr14', 'PSMB5']
['chr3', 'TRMT10C']
['chr6', 'TFAP2D']
This gene does not has any peaks in DHS
['chr3', 'MRPL3']
['chr1', 'PIGV']
['chr10', 'RPP30']
['chr17', 'NOL11']
['chr17', 'AATF']


number of gene symbol:  36%|██████▌           | 239/658 [00:04<00:08, 47.09it/s]

['chr11', 'UBQLN3']
This gene does not has any peaks in DHS
['chr14', 'MED6']
['chr15', 'SNUPN']
['chr9', 'PSMB7']
['chr18', 'SEH1L']
['chr5', 'GABRA6']
This gene does not has any peaks in DHS
['chr17', 'MYBBP1A']
['chr5', 'SLCO6A1']
This gene does not has any peaks in DHS
['chr1', 'NVL']
['chr17', 'CCL1']
This gene does not has any peaks in DHS
['chr12', 'GARIN6']
This gene does not has any peaks in DHS
['chr17', 'TBCD']


number of gene symbol:  38%|██████▉           | 252/658 [00:05<00:08, 48.40it/s]

['chr6', 'RPL10A']
['chr6', 'SYNCRIP']
['chr1', 'RPE65']
This gene does not has any peaks in DHS
['chr13', 'GSX1']
This gene does not has any peaks in DHS
['chr20', 'WFDC11']
This gene does not has any peaks in DHS
['chr1', 'FH']
['chr8', 'CHRNB3']
This gene does not has any peaks in DHS
['chr11', 'DDB1']
['chr11', 'COPB1']
['chr12', 'CMAS']


number of gene symbol:  40%|███████▏          | 264/658 [00:05<00:07, 53.01it/s]

['chr12', 'POLR3B']
['chr6', 'MRPS10']
['chr6', 'GSTA5']
This gene does not has any peaks in DHS
['chr2', 'NBAS']
['chr10', 'NPFFR1']
This gene does not has any peaks in DHS
['chr17', 'RPL27']
['chr2', 'IL1F10']
This gene does not has any peaks in DHS
['chr6', 'GPR31']
This gene does not has any peaks in DHS
['chr14', 'KCNK10']
This gene does not has any peaks in DHS
['chr5', 'ZCCHC9']
['chr16', 'PRSS33']
This gene does not has any peaks in DHS
['chr17', 'SPEM1']
This gene does not has any peaks in DHS
['chr8', 'RPL8']


number of gene symbol:  42%|███████▌          | 278/658 [00:05<00:06, 57.51it/s]

['chrY', 'HSFY1']
This gene does not has any peaks in DHS
['chr14', 'PSMC6']
['chr19', 'RDH8']
This gene does not has any peaks in DHS
['chr15', 'RPAP1']
['chr17', 'RPL19']
['chr1', 'MED18']
['chr19', 'OR7G2']
This gene does not has any peaks in DHS
['chr7', 'RINT1']
['chr12', 'KRR1']
['chr16', 'PLK1']
['chr6', 'GTF2H4']
['chr4', 'NUP54']


number of gene symbol:  44%|███████▉          | 290/658 [00:05<00:06, 55.07it/s]

['chr12', 'CIT']
['chr18', 'CEP192']
['chr6', 'SLC35A1']
['chr3', 'DBR1']
['chr1', 'INSRR']
This gene does not has any peaks in DHS
['chr12', 'NOP2']
['chr3', 'PSMD2']
['chr9', 'IFNW1']
This gene does not has any peaks in DHS
['chr12', 'EP400']
['chr3', 'UROC1']
This gene does not has any peaks in DHS
['chr10', 'GBF1']


number of gene symbol:  45%|████████          | 296/658 [00:05<00:07, 49.60it/s]

['chr3', 'GMPPB']
['chr17', 'TOP2A']
['chr11', 'SLC22A6']
This gene does not has any peaks in DHS
['chr14', 'FSCB']
This gene does not has any peaks in DHS
['chr15', 'CEP152']
['chr12', 'ACTR6']
['chr19', 'PSMC4']
['chr15', 'KIF23']
['chr10', 'SMC3']


number of gene symbol:  47%|████████▍         | 307/658 [00:06<00:08, 41.39it/s]

['chr6', 'OPN5']
This gene does not has any peaks in DHS
['chr4', 'ANAPC4']
['chr11', 'CKAP5']
['chr17', 'AURKB']
['chr11', 'EIF4G2']
['chr1', 'MRPL55']
['chr4', 'TMPRSS11F']
['chr19', 'RPL36']


number of gene symbol:  47%|████████▌         | 312/658 [00:06<00:09, 37.01it/s]

['chr1', 'ARF1']
['chr17', 'SRSF2']
['chr19', 'EIF3G']
['chr1', 'ALX3']
This gene does not has any peaks in DHS
['chr11', 'C11orf40']
This gene does not has any peaks in DHS
['chr19', 'IFNL3']
This gene does not has any peaks in DHS
['chr9', 'LHX3']
This gene does not has any peaks in DHS
['chr9', 'FOXB2']
This gene does not has any peaks in DHS
['chr6', 'MUCL3']
This gene does not has any peaks in DHS
['chr17', 'PSMB6']
['chr11', 'ACCSL']
This gene does not has any peaks in DHS


number of gene symbol:  50%|████████▉         | 328/658 [00:06<00:06, 51.67it/s]

['chr1', 'ATP6V1G3']
This gene does not has any peaks in DHS
['chr17', 'NSRP1']
['chr20', 'DYNLRB1']
['chr7', 'NUP205']
['chr7', 'NPSR1']
This gene does not has any peaks in DHS
['chr12', 'CCT2']
['chr14', 'GEMIN2']
['chr10', 'C10orf53']
This gene does not has any peaks in DHS
['chr2', 'RPL37A']
['chrX', 'IGBP1']
['chr15', 'TICRR']
['chr11', 'MMP27']


number of gene symbol:  52%|█████████▎        | 340/658 [00:06<00:06, 49.03it/s]

This gene does not has any peaks in DHS
['chr11', 'NUP160']
['chr2', 'POLR2D']
['chr11', 'SSRP1']
['chr20', 'TPX2']
['chr11', 'POLR2G']
['chr6', 'PRIM2']
['chr1', 'PRPF3']
['chr16', 'RBBP6']
['chr18', 'SERPINB12']
This gene does not has any peaks in DHS
['chr17', 'STAT5B']
This gene does not has any peaks in DHS
['chr19', 'LSM4']


number of gene symbol:  53%|█████████▌        | 351/658 [00:07<00:06, 48.44it/s]

['chr14', 'SCFD1']
['chr19', 'EEF2']
['chr14', 'METTL17']
['chr19', 'IFNL2']
This gene does not has any peaks in DHS
['chr16', 'NSMCE1']
['chrY', 'DAZ1']
This gene does not has any peaks in DHS
['chr14', 'PSMA3']
['chr8', 'RPL7']
['chr15', 'RAD51']


number of gene symbol:  54%|█████████▋        | 356/658 [00:07<00:07, 42.57it/s]

['chr9', 'QSOX2']
['chr1', 'ZNHIT6']
['chr4', 'COQ2']
['chr7', 'SEPTIN7P2']
['chr12', 'MARS1']
['chr19', 'COX6B1']
['chr1', 'ILF2']


number of gene symbol:  55%|█████████▉        | 361/658 [00:07<00:07, 42.22it/s]

['chr11', 'FGF3']
This gene does not has any peaks in DHS
['chr3', 'TAMM41']
['chr1', 'DDOST']
['chr2', 'RRM2']
['chr1', 'TARS2']
['chr19', 'RPS19']
['chr6', 'PNISR']


number of gene symbol:  56%|██████████        | 370/658 [00:07<00:08, 34.20it/s]

['chr4', 'DHX15']
['chr22', 'PNPLA5']
['chr16', 'PSMD7']
['chr16', 'CREBBP']
['chr17', 'MRPL45']
['chr2', 'NOP58']
['chr2', 'WDR75']


number of gene symbol:  58%|██████████▍       | 382/658 [00:07<00:06, 43.72it/s]

['chr5', 'CDC23']
['chr11', 'WEE1']
['chr8', 'ATP6V1B2']
['chr9', 'SPATA31A7']
This gene does not has any peaks in DHS
['chr10', 'CHAT']
This gene does not has any peaks in DHS
['chr14', 'FNTB']
['chr8', 'REXO1L1P']
This gene does not has any peaks in DHS
['chr17', 'OR1G1']
This gene does not has any peaks in DHS
['chr12', 'LHX5']
This gene does not has any peaks in DHS
['chr4', 'UTP3']
['chr12', 'KRT71']
This gene does not has any peaks in DHS
['chrX', 'MAGEC3']


number of gene symbol:  60%|██████████▊       | 393/658 [00:08<00:05, 44.97it/s]

This gene does not has any peaks in DHS
['chr18', 'NAPG']
['chr11', 'OR9Q2']
This gene does not has any peaks in DHS
['chr3', 'EIF2B5']
['chr6', 'VARS2']
['chr1', 'CELA2A']
This gene does not has any peaks in DHS
['chr1', 'DNTTIP2']
['chr1', 'ATP1A1']
['chr15', 'RSL24D1']
['chr3', 'MRPS25']


number of gene symbol:  61%|███████████       | 404/658 [00:08<00:05, 47.98it/s]

['chr3', 'GFM1']
['chr20', 'HM13']
['chr1', 'OR2T4']
This gene does not has any peaks in DHS
['chr21', 'MRPL39']
['chr2', 'LRPPRC']
['chr5', 'SPDL1']
['chr11', 'OR52E8']
This gene does not has any peaks in DHS
['chrX', 'CT45A3']
This gene does not has any peaks in DHS
['chr1', 'RPL11']
['chr11', 'EIF3M']
['chr9', 'GOLGA1']


number of gene symbol:  63%|███████████▍      | 416/658 [00:08<00:04, 51.08it/s]

['chr19', 'OR7D4']
This gene does not has any peaks in DHS
['chr17', 'NUP85']
['chrX', 'OGT']
['chr22', 'PIWIL3']
This gene does not has any peaks in DHS
['chr10', 'NOC3L']
['chr15', 'LEO1']
['chr22', 'RTCB']
['chr5', 'BDP1']
['chr19', 'PGLYRP2']
This gene does not has any peaks in DHS
['chrX', 'CT45A2']
This gene does not has any peaks in DHS
['chr12', 'RACGAP1']
['chr19', 'SNRNP70']


number of gene symbol:  64%|███████████▌      | 422/658 [00:08<00:04, 48.59it/s]

['chr10', 'UPF2']
['chr11', 'TAF6L']
['chr10', 'DRGX']
This gene does not has any peaks in DHS
['chr4', 'TMPRSS11A']
This gene does not has any peaks in DHS
['chr11', 'RPS13']
['chr10', 'PNLIPRP1']
This gene does not has any peaks in DHS
['chr18', 'MEP1B']
This gene does not has any peaks in DHS
['chr9', 'IFNA4']
This gene does not has any peaks in DHS
['chr7', 'PSMC2']
['chr1', 'CELA3B']
This gene does not has any peaks in DHS
['chr6', 'ADGRF2']
['chr2', 'ZSWIM2']


number of gene symbol:  66%|███████████▉      | 435/658 [00:08<00:04, 50.95it/s]

This gene does not has any peaks in DHS
['chr17', 'KRTAP9-3']
This gene does not has any peaks in DHS
['chr17', 'RPL38']
['chr13', 'RNF17']
This gene does not has any peaks in DHS
['chr12', 'SNRPF']
['chr9', 'EXOSC2']
['chr11', 'HINFP']
['chr9', 'FOXD4L4']
This gene does not has any peaks in DHS
['chr11', 'CWF19L2']
['chr12', 'KRT76']
This gene does not has any peaks in DHS
['chr8', 'ATP6V1C1']
['chr1', 'OR2T12']


number of gene symbol:  68%|████████████▏     | 447/658 [00:09<00:03, 53.90it/s]

This gene does not has any peaks in DHS
['chr20', 'SYS1']
['chr20', 'CRNKL1']
['chr12', 'ESPL1']
['chr8', 'UQCRB']
['chr1', 'IARS2']
['chr22', 'RFPL3']
This gene does not has any peaks in DHS
['chr9', 'CDKN2A-DT']
This gene does not has any peaks in DHS
['chr14', 'DICER1']
['chr13', 'SLITRK1']
This gene does not has any peaks in DHS
['chr3', 'AP2M1']


number of gene symbol:  69%|████████████▍     | 453/658 [00:09<00:04, 50.47it/s]

['chr5', 'SRFBP1']
['chr14', 'MNAT1']
['chr3', 'ATR']
['chr12', 'RPLP0']
['chr11', 'CABP2']
This gene does not has any peaks in DHS
['chr2', 'MRPS9']
['chr9', 'POLR1E']
['chr12', 'MED21']
['chr7', 'TNPO3']


number of gene symbol:  71%|████████████▋     | 464/658 [00:09<00:04, 42.34it/s]

['chr11', 'RAB1B']
['chr16', 'CTRB1']
This gene does not has any peaks in DHS
['chr4', 'RPL34']
['chr20', 'PDYN']
This gene does not has any peaks in DHS
['chr9', 'EXOSC3']
['chr12', 'LRRC10']
This gene does not has any peaks in DHS
['chr11', 'OOSP2']
This gene does not has any peaks in DHS
['chr19', 'KASH5']
This gene does not has any peaks in DHS
['chr19', 'NAPA']


number of gene symbol:  72%|█████████████     | 476/658 [00:09<00:03, 46.06it/s]

['chr12', 'RPS26']
['chr16', 'AQP8']
This gene does not has any peaks in DHS
['chr16', 'NIP7']
This gene does not has any peaks in DHS
['chr2', 'MCM6']
['chr12', 'ATP5F1B']
['chr9', 'IFNA5']
This gene does not has any peaks in DHS
['chr10', 'NOLC1']
['chr16', 'E4F1']
['chr7', 'SRRT']
['chr17', 'FTSJ3']


number of gene symbol:  74%|█████████████▎    | 486/658 [00:10<00:03, 44.39it/s]

['chr11', 'OR5T2']
This gene does not has any peaks in DHS
['chr15', 'TBC1D21']
['chr6', 'PGK2']
['chr3', 'COPB2']
['chr6', 'HBS1L']
['chr6', 'MDN1']
['chr5', 'BRIX1']
['chr12', 'NEDD1']
['chr3', 'UBA3']
['chr15', 'CDAN1']


number of gene symbol:  75%|█████████████▍    | 491/658 [00:10<00:04, 38.24it/s]

['chr4', 'SPCS3']
['chr9', 'SEC16A']
['chrX', 'ERCC6L']
['chr3', 'LARS2']
['chr9', 'SURF6']
['chr9', 'NOL6']
['chr2', 'ATIC']


number of gene symbol:  76%|█████████████▋    | 502/658 [00:10<00:03, 44.07it/s]

['chrY', 'RBMY1E']
This gene does not has any peaks in DHS
['chr16', 'POLR2C']
['chr22', 'CENPM']
['chr6', 'MMS22L']
['chr20', 'NKX2-2']
This gene does not has any peaks in DHS
['chr3', 'RNF168']
['chr16', 'MT1B']
This gene does not has any peaks in DHS
['chr4', 'RBM46']
This gene does not has any peaks in DHS
['chr8', 'EFR3A']
['chr2', 'RPS7']
['chr14', 'SNW1']
['chr10', 'SLC25A28']


number of gene symbol:  78%|██████████████    | 513/658 [00:10<00:03, 47.71it/s]

['chr17', 'KRT36']
This gene does not has any peaks in DHS
['chr1', 'OR6K6']
This gene does not has any peaks in DHS
['chr15', 'POLR2M']
['chr9', 'PUM3']
['chr9', 'CYLC2']
This gene does not has any peaks in DHS
['chr19', 'ZIM3']
This gene does not has any peaks in DHS
['chr8', 'MROH6']
['chr2', 'IL36A']
This gene does not has any peaks in DHS
['chr17', 'TBC1D29P']
This gene does not has any peaks in DHS
['chr7', 'COPS6']
['chr10', 'DDX21']


number of gene symbol:  79%|██████████████▎   | 523/658 [00:10<00:03, 44.72it/s]

['chr17', 'COPS3']
['chr17', 'CLTC']
['chr11', 'OR5T3']
This gene does not has any peaks in DHS
['chr17', 'VPS53']
['chrX', 'GPR101']
This gene does not has any peaks in DHS
['chr1', 'MRPL37']
['chr2', 'HJURP']
['chr5', 'RPS23']
['chr17', 'MRPS23']


number of gene symbol:  81%|██████████████▌   | 534/658 [00:11<00:02, 46.54it/s]

['chr8', 'RPS20']
['chr3', 'SLC2A2']
This gene does not has any peaks in DHS
['chr1', 'TCHHL1']
This gene does not has any peaks in DHS
['chr7', 'SLC13A1']
This gene does not has any peaks in DHS
['chr16', 'DHODH']
['chr9', 'RPS6']
['chr7', 'DDX56']
['chr1', 'ALG14']
['chr4', 'CENPC']
['chr6', 'IGF2R']


number of gene symbol:  83%|██████████████▉   | 544/658 [00:11<00:02, 44.38it/s]

['chr19', 'VN1R2']
This gene does not has any peaks in DHS
['chr8', 'VPS28']
['chr19', 'GPR32']
This gene does not has any peaks in DHS
['chr1', 'PSMD4']
['chr13', 'NUP58']
['chr5', 'HSPA9']
['chr10', 'CYP2C19']
This gene does not has any peaks in DHS
['chr1', 'GTF2B']
['chr8', 'RPL30']
['chr5', 'ZNF131']


number of gene symbol:  84%|███████████████▏  | 554/658 [00:11<00:02, 43.04it/s]

['chr12', 'SNRNP35']
['chr5', 'GARIN3']
This gene does not has any peaks in DHS
['chr9', 'OR5C1']
This gene does not has any peaks in DHS
['chr9', 'TEX10']
['chr3', 'H1-8']
This gene does not has any peaks in DHS
['chr13', 'RFC3']
['chr9', 'VCP']
['chr1', 'PRAMEF1']
This gene does not has any peaks in DHS
['chr8', 'RAB2A']


number of gene symbol:  86%|███████████████▍  | 564/658 [00:11<00:02, 42.70it/s]

['chr7', 'YAE1']
['chr1', 'F13B']
['chr9', 'PPP3R2']
This gene does not has any peaks in DHS
['chr17', 'DHX8']
['chr8', 'RAD21']
['chr15', 'RPL4']
['chr16', 'C16orf78']
This gene does not has any peaks in DHS
['chr1', 'RRP15']
['chr19', 'CNOT3']


number of gene symbol:  86%|███████████████▌  | 569/658 [00:12<00:02, 42.59it/s]

['chr21', 'SOD1']
['chr17', 'DDX52']
['chr19', 'CHMP2A']
['chr4', 'TRIML1']
This gene does not has any peaks in DHS
['chr3', 'RFC4']
['chr3', 'ELP6']
['chr17', 'KRT33A']
This gene does not has any peaks in DHS
['chr8', 'DEFB103B']
This gene does not has any peaks in DHS
['chr8', 'RRS1']


number of gene symbol:  88%|███████████████▊  | 579/658 [00:12<00:01, 42.02it/s]

['chr2', 'FARSB']
['chr22', 'SAMM50']
['chr19', 'FFAR1']
This gene does not has any peaks in DHS
['chr12', 'YEATS4']
['chr3', 'IMPDH2']
['chr13', 'CDX2']
['chr19', 'DNM2']
['chr15', 'PSMA4']


number of gene symbol:  89%|███████████████▉  | 584/658 [00:12<00:02, 36.43it/s]

['chr17', 'COG1']
['chr16', 'RPL13']
['chr8', 'C8orf17']
This gene does not has any peaks in DHS
['chr21', 'PAXBP1']
['chr5', 'NEUROG1']
This gene does not has any peaks in DHS
['chr4', 'HMX1']
This gene does not has any peaks in DHS
['chr6', 'GPX5']
This gene does not has any peaks in DHS
['chr1', 'MRPS21']
['chr7', 'POLD2']


number of gene symbol:  91%|████████████████▎ | 596/658 [00:12<00:01, 41.77it/s]

['chr10', 'PRPF18']
['chr22', 'VPREB1']
This gene does not has any peaks in DHS
['chr18', 'KDSR']
['chr20', 'SNRPB']
['chr1', 'PRAMEF4']
This gene does not has any peaks in DHS
['chr2', 'SNRNP200']
['chr2', 'EIF5B']
['chr19', 'RPS5']


number of gene symbol:  92%|████████████████▌ | 607/658 [00:12<00:01, 42.33it/s]

['chr12', 'CLEC6A']
This gene does not has any peaks in DHS
['chr17', 'CHMP6']
['chr1', 'SPRR4']
This gene does not has any peaks in DHS
['chr1', 'CATSPER4']
['chr2', 'GKN2']
This gene does not has any peaks in DHS
['chr22', 'ATXN10']
['chrY', 'BPY2']
This gene does not has any peaks in DHS
['chr12', 'IL22']
This gene does not has any peaks in DHS
['chr3', 'RPL35A']
['chr3', 'RPL24']


number of gene symbol:  93%|████████████████▊ | 613/658 [00:13<00:01, 44.47it/s]

['chr2', 'GTF3C2']
['chrX', 'FAM47A']
This gene does not has any peaks in DHS
['chr4', 'TRIM60']
This gene does not has any peaks in DHS
['chr13', 'RPL21']
['chr1', 'ATP6V0B']
['chr3', 'UMPS']
['chr3', 'ANAPC13']
['chr4', 'YTHDC1']
['chr8', 'CPSF1']


number of gene symbol:  95%|█████████████████ | 623/658 [00:13<00:00, 41.27it/s]

['chr11', 'MRPL16']
['chr5', 'GEMIN5']
['chr8', 'MAK16']
['chr4', 'CENPE']
['chrX', 'F9']
This gene does not has any peaks in DHS
['chr19', 'VN1R4']
This gene does not has any peaks in DHS
['chr5', 'RIOK2']
['chr6', 'LTV1']
['chr10', 'MTPAP']


number of gene symbol:  96%|█████████████████▎| 633/658 [00:13<00:00, 43.06it/s]

['chr15', 'FOXB1']
This gene does not has any peaks in DHS
['chr5', 'TCERG1']
['chr10', 'BUB3']
['chr20', 'C20orf173']
This gene does not has any peaks in DHS
['chr1', 'EIF2B3']
['chr6', 'GRM4']
This gene does not has any peaks in DHS
['chr19', 'NANOS2']
['chr11', 'CCDC86']
['chr3', 'ECT2']


number of gene symbol:  98%|█████████████████▌| 643/658 [00:13<00:00, 42.47it/s]

['chr16', 'RPS15A']
['chr5', 'THG1L']
['chr6', 'CDC5L']
['chr1', 'GNL2']
['chr6', 'SNRNP48']
['chr19', 'UQCRFS1']
['chr12', 'MRPS35']
['chr19', 'FBL']
['chr18', 'AFG3L2']


number of gene symbol:  99%|█████████████████▉| 654/658 [00:14<00:00, 43.32it/s]

['chr11', 'CHEK1']
['chr5', 'LARS1']
['chr11', 'OR52A1']
This gene does not has any peaks in DHS
['chr17', 'MED1']
['chr7', 'NEUROD6']
This gene does not has any peaks in DHS
['chr13', 'RNF113B']
This gene does not has any peaks in DHS
['chr2', 'CARF']
['chr9', 'SMC2']
['chr11', 'MRPL17']
['chr17', 'ZNF830']


number of gene symbol: 100%|██████████████████| 658/658 [00:14<00:00, 46.44it/s]

['chr22', 'BCR']
['chr17', 'NLE1']
['chr11', 'OR4C11']
This gene does not has any peaks in DHS





In [7]:
CRISPRiFiles_DHS_HCT116['overlap with peak'].value_counts()

overlap with peak
0    140846
1     25778
Name: count, dtype: int64

In [8]:
CRISPRiFiles_DHS_HCT116.to_csv(f'{peakOverlapOut}HCT116.csv')

In [9]:
CRISPRiFiles_DHS_A549 = dhs_all_genes(CRISPRi_fileCombo,processedCRISPRiOutpath, dhs_out+'A549/')

number of gene symbol:   1%|▏                   | 6/658 [00:00<00:13, 46.67it/s]

['chr14', 'HAUS4']
['chr19', 'OR10H3']
This gene does not has any peaks in DHS
['chr11', 'SF3B2']
['chr17', 'OR4D1']
This gene does not has any peaks in DHS
['chr7', 'MRPS24']
['chr20', 'DDX27']
['chr9', 'DCTN3']
['chr12', 'POP5']
['chr8', 'POLR2K']


number of gene symbol:   2%|▎                  | 11/658 [00:00<00:17, 36.98it/s]

['chrX', 'SAGE1']
['chr19', 'MRPL34']
['chr11', 'RAD9A']
['chr2', 'MRPL19']
['chr1', 'FDPS']
['chr1', 'HEATR1']


number of gene symbol:   3%|▌                  | 19/658 [00:00<00:17, 35.62it/s]

['chr17', 'COX10']
['chr7', 'TRRAP']
['chr19', 'CYP2A13']
This gene does not has any peaks in DHS
['chr13', 'ARGLU1']
['chr13', 'TM9SF2']
['chr17', 'C17orf49']
['chr15', 'MFAP1']


number of gene symbol:   4%|▊                  | 28/658 [00:00<00:17, 36.53it/s]

['chrX', 'TEX13A']
This gene does not has any peaks in DHS
['chr16', 'RSL1D1']
['chr12', 'APOBEC1']
['chr6', 'RPL7L1']
['chr4', 'DSPP']
This gene does not has any peaks in DHS
['chr6', 'DHX16']
['chr4', 'SDAD1']
['chr2', 'CHMP3']
['chr19', 'ZNF574']


number of gene symbol:   6%|█                  | 37/658 [00:01<00:16, 36.79it/s]

['chr1', 'MTOR']
['chr2', 'DYNC1I2']
['chr5', 'RPS14']
['chr10', 'EIF3A']
['chr21', 'KRTAP13-3']
This gene does not has any peaks in DHS
['chr14', 'YY1']
['chr7', 'MRPS33']
['chr11', 'TAF10']


number of gene symbol:   6%|█▏                 | 41/658 [00:01<00:17, 35.66it/s]

['chr20', 'CSE1L']
['chr1', 'SF3B4']
['chr1', 'CCT3']
['chr5', 'ZMAT2']
['chr8', 'ATP6V1H']
['chr19', 'ILF3']


number of gene symbol:   7%|█▍                 | 49/658 [00:01<00:20, 29.73it/s]

['chr7', 'GET4']
['chr12', 'FGF6']
This gene does not has any peaks in DHS
['chr15', 'KNL1']
['chr12', 'GCN1']
['chr2', 'NCAPH']
['chr6', 'RPP40']
['chr1', 'VPS72']


number of gene symbol:   9%|█▋                 | 57/658 [00:01<00:18, 33.35it/s]

['chr11', 'OR51D1']
['chr11', 'OR8G5']
This gene does not has any peaks in DHS
['chr17', 'CDK12']
['chr1', 'PMVK']
['chr6', 'POLR1H']
['chr12', 'TAS2R50']
This gene does not has any peaks in DHS
['chr17', 'PIGS']
['chr17', 'KRT28']
This gene does not has any peaks in DHS
['chr9', 'SPATA31D1']
This gene does not has any peaks in DHS
['chr7', 'SSBP1']


number of gene symbol:  10%|█▉                 | 68/658 [00:01<00:15, 38.95it/s]

['chr2', 'MRPL33']
['chr11', 'RRM1']
['chr11', 'OR2D2']
This gene does not has any peaks in DHS
['chr3', 'RPL14']
['chr8', 'CYP11B2']
This gene does not has any peaks in DHS
['chr1', 'CLSPN']
['chrX', 'BEND2']
This gene does not has any peaks in DHS
['chr4', 'COPS4']
['chr1', 'MCL1']


number of gene symbol:  12%|██▏                | 77/658 [00:02<00:14, 39.99it/s]

['chr17', 'CDC27']
['chr19', 'ERCC2']
['chr7', 'TAS2R40']
This gene does not has any peaks in DHS
['chr20', 'PRPF6']
['chr21', 'U2AF1']
This gene does not has any peaks in DHS
['chr14', 'C14orf178']
['chr19', 'CACTIN']
['chr1', 'TBCE']


number of gene symbol:  12%|██▎                | 82/658 [00:02<00:14, 38.46it/s]

['chr17', 'INTS2']
['chr18', 'TRAPPC8']
['chr4', 'NDST4']
['chr17', 'LUC7L3']
['chr2', 'IMMT']
['chr13', 'TUBGCP3']
['chr11', 'OR52B6']
This gene does not has any peaks in DHS
['chr2', 'SPC25']


number of gene symbol:  14%|██▋                | 93/658 [00:02<00:15, 37.46it/s]

['chr19', 'SPC24']
['chr2', 'XRCC5']
['chr20', 'DHX35']
['chr10', 'TLX1']
This gene does not has any peaks in DHS
['chr17', 'SIRT7']
['chr20', 'EIF6']
['chr2', 'WDR33']
['chr10', 'SMNDC1']


number of gene symbol:  15%|██▊               | 101/658 [00:02<00:15, 36.45it/s]

['chr6', 'RPF2']
['chr10', 'SEC24C']
['chr16', 'COG8']
['chr3', 'SEC13']
['chr3', 'EIF4G1']
['chr14', 'RNASE8']
This gene does not has any peaks in DHS
['chr1', 'MRPS15']


number of gene symbol:  16%|██▉               | 106/658 [00:02<00:14, 37.52it/s]

['chr2', 'ERCC3']
['chr17', 'KRTAP4-7']
This gene does not has any peaks in DHS
['chr1', 'MRPS14']
['chr1', 'RPA2']
['chr2', 'PSMD1']
['chr11', 'INTS4']
['chr21', 'CCT8']
['chr1', 'CDC20']


number of gene symbol:  17%|███               | 114/658 [00:03<00:15, 35.28it/s]

['chr20', 'MYBL2']
['chr5', 'HMGCS1']
['chr1', 'RPL5']
['chr13', 'COG3']
['chr1', 'CDCP2']
This gene does not has any peaks in DHS
['chr17', 'POLG2']
['chr10', 'RPS24']
['chr16', 'CACNG3']


number of gene symbol:  19%|███▎              | 123/658 [00:03<00:13, 38.28it/s]

['chr7', 'RFC2']
['chr1', 'C8B']
This gene does not has any peaks in DHS
['chr5', 'BNIP1']
['chr20', 'RPN2']
['chr18', 'SKA1']
['chr2', 'CWC22']
['chr9', 'GNE']
['chr10', 'BMS1']


number of gene symbol:  20%|███▌              | 131/658 [00:03<00:14, 36.61it/s]

['chr10', 'POLR3A']
['chr3', 'TSEN2']
['chr11', 'OR5W2']
This gene does not has any peaks in DHS
['chr7', 'DLD']
['chr9', 'DNLZ']
['chr17', 'MED9']
['chr14', 'EIF2B2']
['chr17', 'UTP6']


number of gene symbol:  21%|███▊              | 140/658 [00:03<00:14, 35.11it/s]

['chr9', 'IFNA16']
This gene does not has any peaks in DHS
['chr1', 'INTS3']
['chr2', 'UGP2']
['chr1', 'PPP1R8']
['chr1', 'BCAS2']
['chr16', 'SRCAP']
['chr2', 'VPS54']


number of gene symbol:  22%|███▉              | 144/658 [00:04<00:17, 29.29it/s]

['chr17', 'BIRC5']
['chr11', 'ST3GAL4']
['chr14', 'PRMT5']
['chr14', 'TEDC1']
['chr11', 'MRGPRD']
This gene does not has any peaks in DHS
['chr17', 'NMT1']


number of gene symbol:  23%|████▏             | 153/658 [00:04<00:14, 35.32it/s]

['chr12', 'TIMELESS']
['chr1', 'LIN28A']
This gene does not has any peaks in DHS
['chr20', 'SEC23B']
['chr5', 'WDR55']
['chr9', 'IFNA17']
This gene does not has any peaks in DHS
['chr2', 'MRPL35']
['chr14', 'CEBPE']
This gene does not has any peaks in DHS
['chr12', 'TUBA1B']
['chr8', 'PUF60']


number of gene symbol:  24%|████▍             | 161/658 [00:04<00:14, 34.24it/s]

['chr20', 'DEFB126']
['chr6', 'TAAR6']
This gene does not has any peaks in DHS
['chr2', 'NCL']
['chr1', 'SNIP1']
['chr2', 'NRBP1']
['chr3', 'PSMD6']
['chr20', 'WFDC9']
This gene does not has any peaks in DHS
['chr2', 'DDX18']
['chr5', 'MED7']


number of gene symbol:  26%|████▋             | 170/658 [00:04<00:13, 37.36it/s]

['chr5', 'TTC1']
['chr11', 'SART1']
['chr22', 'EIF3D']
['chr11', 'BUD13']
['chr6', 'LSM2']
['chr10', 'SUPV3L1']
['chr3', 'SRPRB']
['chr5', 'MRPS30']


number of gene symbol:  27%|████▉             | 180/658 [00:04<00:12, 39.54it/s]

['chr2', 'MRPS5']
['chr6', 'RPP21']
['chr5', 'DDX41']
['chr1', 'NUF2']
['chr10', 'GDF2']
This gene does not has any peaks in DHS
['chr12', 'IAPP']
This gene does not has any peaks in DHS
['chr20', 'DSN1']
['chr19', 'MBD3L2']
This gene does not has any peaks in DHS
['chr7', 'ORC5']
['chr1', 'RPS27']


number of gene symbol:  29%|█████▏            | 190/658 [00:05<00:10, 43.20it/s]

['chr17', 'PRPF8']
['chr6', 'TTK']
['chr7', 'EXOC4']
['chr14', 'MIS18BP1']
['chr9', 'NUP214']
['chr10', 'WDR11']
['chrX', 'CXorf66']
This gene does not has any peaks in DHS
['chr9', 'IARS1']
['chrX', 'RHOXF2']
This gene does not has any peaks in DHS
['chr1', 'DAP3']


number of gene symbol:  30%|█████▎            | 196/658 [00:05<00:10, 44.86it/s]

['chr8', 'DEFB106A']
This gene does not has any peaks in DHS
['chr8', 'CYP7A1']
This gene does not has any peaks in DHS
['chr5', 'MRPL22']
['chr10', 'TUBGCP2']
['chr6', 'GJA10']
This gene does not has any peaks in DHS
['chr10', 'SAR1A']
['chr17', 'RPL23']
['chr19', 'CARM1']


number of gene symbol:  31%|█████▌            | 205/658 [00:05<00:12, 36.34it/s]

['chr7', 'POLR1F']
['chr2', 'SRBD1']
['chr2', 'RAB1A']
['chr22', 'POTEH']
This gene does not has any peaks in DHS
['chr4', 'CCKAR']
This gene does not has any peaks in DHS
['chrX', 'LUZP4']
This gene does not has any peaks in DHS
['chr20', 'CST4']
This gene does not has any peaks in DHS
['chr1', 'RPF1']
['chr5', 'IL13']
This gene does not has any peaks in DHS
['chr12', 'ARL1']


number of gene symbol:  33%|█████▉            | 216/658 [00:05<00:10, 40.80it/s]

['chr12', 'DDX23']
['chr1', 'GPN2']
['chr5', 'MRPS27']
['chr2', 'POLR1B']
['chr8', 'INTS8']
['chr2', 'STPG4']
This gene does not has any peaks in DHS
['chr5', 'PHAX']
['chr2', 'ITGAV']


number of gene symbol:  34%|██████            | 221/658 [00:06<00:11, 39.12it/s]

['chr14', 'DYNC1H1']
['chr1', 'RABGGTB']
['chr6', 'MCM3']
['chr9', 'MRPS2']
['chr1', 'SNRPE']
['chr14', 'PSMC1']
['chr1', 'PARS2']


number of gene symbol:  35%|██████▎           | 231/658 [00:06<00:10, 40.10it/s]

['chr14', 'PSMB5']
['chr3', 'TRMT10C']
['chr6', 'TFAP2D']
This gene does not has any peaks in DHS
['chr3', 'MRPL3']
['chr1', 'PIGV']
['chr10', 'RPP30']
['chr17', 'NOL11']
['chr17', 'AATF']
['chr11', 'UBQLN3']
This gene does not has any peaks in DHS
['chr14', 'MED6']
['chr15', 'SNUPN']


number of gene symbol:  37%|██████▌           | 242/658 [00:06<00:09, 42.58it/s]

['chr9', 'PSMB7']
['chr18', 'SEH1L']
['chr5', 'GABRA6']
This gene does not has any peaks in DHS
['chr17', 'MYBBP1A']
['chr5', 'SLCO6A1']
This gene does not has any peaks in DHS
['chr1', 'NVL']
['chr17', 'CCL1']
This gene does not has any peaks in DHS
['chr12', 'GARIN6']
This gene does not has any peaks in DHS
['chr17', 'TBCD']


number of gene symbol:  38%|██████▉           | 252/658 [00:06<00:10, 37.47it/s]

['chr6', 'RPL10A']
['chr6', 'SYNCRIP']
['chr1', 'RPE65']
This gene does not has any peaks in DHS
['chr13', 'GSX1']
['chr20', 'WFDC11']
This gene does not has any peaks in DHS
['chr1', 'FH']
['chr8', 'CHRNB3']


number of gene symbol:  39%|███████           | 256/658 [00:06<00:10, 36.79it/s]

This gene does not has any peaks in DHS
['chr11', 'DDB1']
['chr11', 'COPB1']
['chr12', 'CMAS']
['chr12', 'POLR3B']
['chr6', 'MRPS10']
['chr6', 'GSTA5']
This gene does not has any peaks in DHS
['chr2', 'NBAS']


number of gene symbol:  40%|███████▎          | 266/658 [00:07<00:09, 42.14it/s]

['chr10', 'NPFFR1']
This gene does not has any peaks in DHS
['chr17', 'RPL27']
['chr2', 'IL1F10']
This gene does not has any peaks in DHS
['chr6', 'GPR31']
This gene does not has any peaks in DHS
['chr14', 'KCNK10']
This gene does not has any peaks in DHS
['chr5', 'ZCCHC9']
['chr16', 'PRSS33']
This gene does not has any peaks in DHS
['chr17', 'SPEM1']
This gene does not has any peaks in DHS
['chr8', 'RPL8']
['chrY', 'HSFY1']
This gene does not has any peaks in DHS
['chr14', 'PSMC6']


number of gene symbol:  42%|███████▌          | 277/658 [00:07<00:08, 44.66it/s]

['chr19', 'RDH8']
This gene does not has any peaks in DHS
['chr15', 'RPAP1']
['chr17', 'RPL19']
['chr1', 'MED18']
['chr19', 'OR7G2']
This gene does not has any peaks in DHS
['chr7', 'RINT1']
['chr12', 'KRR1']
['chr16', 'PLK1']
['chr6', 'GTF2H4']
['chr4', 'NUP54']


number of gene symbol:  44%|███████▊          | 287/658 [00:07<00:09, 40.16it/s]

['chr12', 'CIT']
['chr18', 'CEP192']
['chr6', 'SLC35A1']
['chr3', 'DBR1']
['chr1', 'INSRR']
['chr12', 'NOP2']
['chr3', 'PSMD2']
['chr9', 'IFNW1']


number of gene symbol:  44%|███████▉          | 292/658 [00:07<00:10, 36.10it/s]

['chr12', 'EP400']
['chr3', 'UROC1']
This gene does not has any peaks in DHS
['chr10', 'GBF1']
['chr3', 'GMPPB']
['chr17', 'TOP2A']


number of gene symbol:  46%|████████▏         | 300/658 [00:08<00:11, 32.03it/s]

['chr11', 'SLC22A6']
This gene does not has any peaks in DHS
['chr14', 'FSCB']
This gene does not has any peaks in DHS
['chr15', 'CEP152']
['chr12', 'ACTR6']
['chr19', 'PSMC4']
['chr15', 'KIF23']
['chr10', 'SMC3']


number of gene symbol:  46%|████████▎         | 304/658 [00:08<00:11, 29.69it/s]

['chr6', 'OPN5']
This gene does not has any peaks in DHS
['chr4', 'ANAPC4']
['chr11', 'CKAP5']
['chr17', 'AURKB']
['chr11', 'EIF4G2']
['chr1', 'MRPL55']


number of gene symbol:  47%|████████▍         | 308/658 [00:08<00:11, 29.27it/s]

['chr4', 'TMPRSS11F']
This gene does not has any peaks in DHS
['chr19', 'RPL36']
['chr1', 'ARF1']
['chr17', 'SRSF2']


number of gene symbol:  48%|████████▋         | 316/658 [00:08<00:12, 27.12it/s]

['chr19', 'EIF3G']
['chr1', 'ALX3']
This gene does not has any peaks in DHS
['chr11', 'C11orf40']
This gene does not has any peaks in DHS
['chr19', 'IFNL3']
This gene does not has any peaks in DHS
['chr9', 'LHX3']
['chr9', 'FOXB2']
['chr6', 'MUCL3']


number of gene symbol:  49%|████████▊         | 323/658 [00:08<00:11, 29.92it/s]

This gene does not has any peaks in DHS
['chr17', 'PSMB6']
['chr11', 'ACCSL']
This gene does not has any peaks in DHS
['chr1', 'ATP6V1G3']
This gene does not has any peaks in DHS
['chr17', 'NSRP1']
['chr20', 'DYNLRB1']
['chr7', 'NUP205']
['chr7', 'NPSR1']
This gene does not has any peaks in DHS
['chr12', 'CCT2']


number of gene symbol:  50%|█████████         | 331/658 [00:09<00:09, 32.90it/s]

['chr14', 'GEMIN2']
['chr10', 'C10orf53']
This gene does not has any peaks in DHS
['chr2', 'RPL37A']
['chrX', 'IGBP1']
['chr15', 'TICRR']
['chr11', 'MMP27']
This gene does not has any peaks in DHS
['chr11', 'NUP160']
['chr2', 'POLR2D']


number of gene symbol:  52%|█████████▎        | 340/658 [00:09<00:09, 34.96it/s]

['chr11', 'SSRP1']
['chr20', 'TPX2']
['chr11', 'POLR2G']
['chr6', 'PRIM2']
['chr1', 'PRPF3']
['chr16', 'RBBP6']
['chr18', 'SERPINB12']
This gene does not has any peaks in DHS
['chr17', 'STAT5B']


number of gene symbol:  53%|█████████▌        | 348/658 [00:09<00:08, 35.62it/s]

['chr19', 'LSM4']
['chr14', 'SCFD1']
['chr19', 'EEF2']
['chr14', 'METTL17']
['chr19', 'IFNL2']
This gene does not has any peaks in DHS
['chr16', 'NSMCE1']
['chrY', 'DAZ1']
This gene does not has any peaks in DHS
['chr14', 'PSMA3']


number of gene symbol:  53%|█████████▋        | 352/658 [00:09<00:09, 33.61it/s]

['chr8', 'RPL7']
['chr15', 'RAD51']
['chr9', 'QSOX2']
['chr1', 'ZNHIT6']


number of gene symbol:  54%|█████████▋        | 356/658 [00:09<00:10, 29.41it/s]

['chr4', 'COQ2']
['chr7', 'SEPTIN7P2']
['chr12', 'MARS1']
['chr19', 'COX6B1']
['chr1', 'ILF2']
['chr11', 'FGF3']


number of gene symbol:  55%|█████████▉        | 364/658 [00:10<00:10, 27.93it/s]

['chr3', 'TAMM41']
['chr1', 'DDOST']
['chr2', 'RRM2']
['chr1', 'TARS2']
['chr19', 'RPS19']
['chr6', 'PNISR']


number of gene symbol:  57%|██████████▏       | 372/658 [00:10<00:09, 30.06it/s]

['chr4', 'DHX15']
['chr22', 'PNPLA5']
This gene does not has any peaks in DHS
['chr16', 'PSMD7']
['chr16', 'CREBBP']
['chr17', 'MRPL45']
['chr2', 'NOP58']
['chr2', 'WDR75']


number of gene symbol:  57%|██████████▎       | 376/658 [00:10<00:09, 29.46it/s]

['chr5', 'CDC23']
['chr11', 'WEE1']
['chr8', 'ATP6V1B2']
['chr9', 'SPATA31A7']
This gene does not has any peaks in DHS
['chr10', 'CHAT']
This gene does not has any peaks in DHS
['chr14', 'FNTB']
['chr8', 'REXO1L1P']
This gene does not has any peaks in DHS
['chr17', 'OR1G1']
This gene does not has any peaks in DHS
['chr12', 'LHX5']


number of gene symbol:  59%|██████████▌       | 388/658 [00:10<00:06, 39.01it/s]

['chr4', 'UTP3']
['chr12', 'KRT71']
This gene does not has any peaks in DHS
['chrX', 'MAGEC3']
This gene does not has any peaks in DHS
['chr18', 'NAPG']
['chr11', 'OR9Q2']
This gene does not has any peaks in DHS
['chr3', 'EIF2B5']
['chr6', 'VARS2']
['chr1', 'CELA2A']
This gene does not has any peaks in DHS
['chr1', 'DNTTIP2']
['chr1', 'ATP1A1']


number of gene symbol:  60%|██████████▊       | 396/658 [00:11<00:07, 37.30it/s]

['chr15', 'RSL24D1']
['chr3', 'MRPS25']
['chr3', 'GFM1']
['chr20', 'HM13']
['chr1', 'OR2T4']
This gene does not has any peaks in DHS
['chr21', 'MRPL39']
['chr2', 'LRPPRC']
['chr5', 'SPDL1']


number of gene symbol:  62%|███████████       | 406/658 [00:11<00:06, 41.08it/s]

['chr11', 'OR52E8']
This gene does not has any peaks in DHS
['chrX', 'CT45A3']
This gene does not has any peaks in DHS
['chr1', 'RPL11']
['chr11', 'EIF3M']
['chr9', 'GOLGA1']
['chr19', 'OR7D4']
This gene does not has any peaks in DHS
['chr17', 'NUP85']
['chrX', 'OGT']
['chr22', 'PIWIL3']
This gene does not has any peaks in DHS
['chr10', 'NOC3L']


number of gene symbol:  63%|███████████▍      | 416/658 [00:11<00:05, 42.49it/s]

['chr15', 'LEO1']
['chr22', 'RTCB']
['chr5', 'BDP1']
['chr19', 'PGLYRP2']
['chrX', 'CT45A2']
This gene does not has any peaks in DHS
['chr12', 'RACGAP1']
['chr19', 'SNRNP70']
['chr10', 'UPF2']


number of gene symbol:  65%|███████████▋      | 428/658 [00:11<00:04, 46.79it/s]

['chr11', 'TAF6L']
['chr10', 'DRGX']
This gene does not has any peaks in DHS
['chr4', 'TMPRSS11A']
This gene does not has any peaks in DHS
['chr11', 'RPS13']
['chr10', 'PNLIPRP1']
This gene does not has any peaks in DHS
['chr18', 'MEP1B']
This gene does not has any peaks in DHS
['chr9', 'IFNA4']
This gene does not has any peaks in DHS
['chr7', 'PSMC2']
['chr1', 'CELA3B']
This gene does not has any peaks in DHS
['chr6', 'ADGRF2']
['chr2', 'ZSWIM2']
['chr17', 'KRTAP9-3']


number of gene symbol:  67%|███████████▉      | 438/658 [00:12<00:04, 46.36it/s]

This gene does not has any peaks in DHS
['chr17', 'RPL38']
['chr13', 'RNF17']
This gene does not has any peaks in DHS
['chr12', 'SNRPF']
['chr9', 'EXOSC2']
['chr11', 'HINFP']
['chr9', 'FOXD4L4']
This gene does not has any peaks in DHS
['chr11', 'CWF19L2']
['chr12', 'KRT76']
This gene does not has any peaks in DHS
['chr8', 'ATP6V1C1']


number of gene symbol:  67%|████████████      | 443/658 [00:12<00:04, 45.49it/s]

['chr1', 'OR2T12']
This gene does not has any peaks in DHS
['chr20', 'SYS1']
['chr20', 'CRNKL1']
['chr12', 'ESPL1']
['chr8', 'UQCRB']
['chr1', 'IARS2']
['chr22', 'RFPL3']
This gene does not has any peaks in DHS
['chr9', 'CDKN2A-DT']
This gene does not has any peaks in DHS
['chr14', 'DICER1']


number of gene symbol:  69%|████████████▍     | 453/658 [00:12<00:04, 43.12it/s]

['chr13', 'SLITRK1']
['chr3', 'AP2M1']
['chr5', 'SRFBP1']
['chr14', 'MNAT1']
['chr3', 'ATR']
['chr12', 'RPLP0']
['chr11', 'CABP2']
This gene does not has any peaks in DHS
['chr2', 'MRPS9']
['chr9', 'POLR1E']


number of gene symbol:  70%|████████████▋     | 463/658 [00:12<00:05, 37.71it/s]

['chr12', 'MED21']
['chr7', 'TNPO3']
['chr11', 'RAB1B']
['chr16', 'CTRB1']
This gene does not has any peaks in DHS
['chr4', 'RPL34']
['chr20', 'PDYN']
This gene does not has any peaks in DHS
['chr9', 'EXOSC3']


number of gene symbol:  72%|████████████▉     | 473/658 [00:12<00:04, 40.94it/s]

['chr12', 'LRRC10']
This gene does not has any peaks in DHS
['chr11', 'OOSP2']
This gene does not has any peaks in DHS
['chr19', 'KASH5']
['chr19', 'NAPA']
['chr12', 'RPS26']
['chr16', 'AQP8']
This gene does not has any peaks in DHS
['chr16', 'NIP7']
This gene does not has any peaks in DHS
['chr2', 'MCM6']
['chr12', 'ATP5F1B']
['chr9', 'IFNA5']
This gene does not has any peaks in DHS
['chr10', 'NOLC1']


number of gene symbol:  73%|█████████████     | 478/658 [00:13<00:04, 39.92it/s]

['chr16', 'E4F1']
['chr7', 'SRRT']
['chr17', 'FTSJ3']
['chr11', 'OR5T2']
This gene does not has any peaks in DHS
['chr15', 'TBC1D21']
['chr6', 'PGK2']
['chr3', 'COPB2']
['chr6', 'HBS1L']


number of gene symbol:  74%|█████████████▎    | 488/658 [00:13<00:04, 38.04it/s]

['chr6', 'MDN1']
['chr5', 'BRIX1']
['chr12', 'NEDD1']
['chr3', 'UBA3']
['chr15', 'CDAN1']
['chr4', 'SPCS3']
['chr9', 'SEC16A']


number of gene symbol:  76%|█████████████▌    | 497/658 [00:13<00:04, 36.81it/s]

['chrX', 'ERCC6L']
['chr3', 'LARS2']
['chr9', 'SURF6']
['chr9', 'NOL6']
['chr2', 'ATIC']
['chrY', 'RBMY1E']
This gene does not has any peaks in DHS
['chr16', 'POLR2C']
['chr22', 'CENPM']
['chr6', 'MMS22L']


number of gene symbol:  76%|█████████████▋    | 501/658 [00:13<00:04, 35.89it/s]

['chr20', 'NKX2-2']
['chr3', 'RNF168']
['chr16', 'MT1B']
This gene does not has any peaks in DHS
['chr4', 'RBM46']
This gene does not has any peaks in DHS
['chr8', 'EFR3A']
['chr2', 'RPS7']
['chr14', 'SNW1']
['chr10', 'SLC25A28']


number of gene symbol:  78%|██████████████    | 514/658 [00:13<00:03, 42.25it/s]

['chr17', 'KRT36']
This gene does not has any peaks in DHS
['chr1', 'OR6K6']
This gene does not has any peaks in DHS
['chr15', 'POLR2M']
['chr9', 'PUM3']
['chr9', 'CYLC2']
This gene does not has any peaks in DHS
['chr19', 'ZIM3']
This gene does not has any peaks in DHS
['chr8', 'MROH6']
['chr2', 'IL36A']
This gene does not has any peaks in DHS
['chr17', 'TBC1D29P']
This gene does not has any peaks in DHS
['chr7', 'COPS6']
['chr10', 'DDX21']


number of gene symbol:  80%|██████████████▎   | 524/658 [00:14<00:03, 42.17it/s]

['chr17', 'COPS3']
['chr17', 'CLTC']
['chr11', 'OR5T3']
This gene does not has any peaks in DHS
['chr17', 'VPS53']
['chrX', 'GPR101']
['chr1', 'MRPL37']
['chr2', 'HJURP']
['chr5', 'RPS23']


number of gene symbol:  81%|██████████████▌   | 534/658 [00:14<00:02, 43.04it/s]

['chr17', 'MRPS23']
['chr8', 'RPS20']
['chr3', 'SLC2A2']
This gene does not has any peaks in DHS
['chr1', 'TCHHL1']
This gene does not has any peaks in DHS
['chr7', 'SLC13A1']
This gene does not has any peaks in DHS
['chr16', 'DHODH']
['chr9', 'RPS6']
['chr7', 'DDX56']
['chr1', 'ALG14']


number of gene symbol:  82%|██████████████▋   | 539/658 [00:14<00:03, 38.64it/s]

['chr4', 'CENPC']
['chr6', 'IGF2R']
['chr19', 'VN1R2']
['chr8', 'VPS28']
['chr19', 'GPR32']
This gene does not has any peaks in DHS
['chr1', 'PSMD4']
['chr13', 'NUP58']


number of gene symbol:  83%|██████████████▉   | 548/658 [00:14<00:02, 39.85it/s]

['chr5', 'HSPA9']
['chr10', 'CYP2C19']
This gene does not has any peaks in DHS
['chr1', 'GTF2B']
['chr8', 'RPL30']
['chr5', 'ZNF131']
['chr12', 'SNRNP35']
['chr5', 'GARIN3']
This gene does not has any peaks in DHS
['chr9', 'OR5C1']
This gene does not has any peaks in DHS
['chr9', 'TEX10']
['chr3', 'H1-8']


number of gene symbol:  85%|███████████████▎  | 558/658 [00:15<00:02, 38.79it/s]

This gene does not has any peaks in DHS
['chr13', 'RFC3']
['chr9', 'VCP']
['chr1', 'PRAMEF1']
This gene does not has any peaks in DHS
['chr8', 'RAB2A']
['chr7', 'YAE1']
['chr1', 'F13B']
['chr9', 'PPP3R2']


number of gene symbol:  86%|███████████████▍  | 566/658 [00:15<00:02, 38.65it/s]

['chr17', 'DHX8']
['chr8', 'RAD21']
['chr15', 'RPL4']
['chr16', 'C16orf78']
This gene does not has any peaks in DHS
['chr1', 'RRP15']
['chr19', 'CNOT3']
['chr21', 'SOD1']
['chr17', 'DDX52']
['chr19', 'CHMP2A']


number of gene symbol:  88%|███████████████▊  | 576/658 [00:15<00:01, 41.24it/s]

['chr4', 'TRIML1']
This gene does not has any peaks in DHS
['chr3', 'RFC4']
['chr3', 'ELP6']
['chr17', 'KRT33A']
This gene does not has any peaks in DHS
['chr8', 'DEFB103B']
This gene does not has any peaks in DHS
['chr8', 'RRS1']
['chr2', 'FARSB']
['chr22', 'SAMM50']
['chr19', 'FFAR1']
This gene does not has any peaks in DHS
['chr12', 'YEATS4']


number of gene symbol:  88%|███████████████▉  | 581/658 [00:15<00:02, 38.27it/s]

['chr3', 'IMPDH2']
['chr13', 'CDX2']
['chr19', 'DNM2']
['chr15', 'PSMA4']
['chr17', 'COG1']
['chr16', 'RPL13']


number of gene symbol:  90%|████████████████  | 589/658 [00:15<00:01, 34.97it/s]

['chr8', 'C8orf17']
This gene does not has any peaks in DHS
['chr21', 'PAXBP1']
['chr5', 'NEUROG1']
['chr4', 'HMX1']
['chr6', 'GPX5']
This gene does not has any peaks in DHS
['chr1', 'MRPS21']
['chr7', 'POLD2']
['chr10', 'PRPF18']


number of gene symbol:  91%|████████████████▎ | 598/658 [00:16<00:01, 34.99it/s]

['chr22', 'VPREB1']
This gene does not has any peaks in DHS
['chr18', 'KDSR']
['chr20', 'SNRPB']
['chr1', 'PRAMEF4']
This gene does not has any peaks in DHS
['chr2', 'SNRNP200']
['chr2', 'EIF5B']
['chr19', 'RPS5']


number of gene symbol:  92%|████████████████▌ | 607/658 [00:16<00:01, 37.67it/s]

['chr12', 'CLEC6A']
This gene does not has any peaks in DHS
['chr17', 'CHMP6']
['chr1', 'SPRR4']
This gene does not has any peaks in DHS
['chr1', 'CATSPER4']
['chr2', 'GKN2']
This gene does not has any peaks in DHS
['chr22', 'ATXN10']
['chrY', 'BPY2']
This gene does not has any peaks in DHS
['chr12', 'IL22']
This gene does not has any peaks in DHS
['chr3', 'RPL35A']


number of gene symbol:  93%|████████████████▋ | 612/658 [00:16<00:01, 40.44it/s]

['chr3', 'RPL24']
['chr2', 'GTF3C2']
['chrX', 'FAM47A']
This gene does not has any peaks in DHS
['chr4', 'TRIM60']
This gene does not has any peaks in DHS
['chr13', 'RPL21']
['chr1', 'ATP6V0B']
['chr3', 'UMPS']
['chr3', 'ANAPC13']


number of gene symbol:  94%|████████████████▉ | 621/658 [00:16<00:00, 37.60it/s]

['chr4', 'YTHDC1']
['chr8', 'CPSF1']
['chr11', 'MRPL16']
['chr5', 'GEMIN5']
['chr8', 'MAK16']
['chr4', 'CENPE']
['chrX', 'F9']
This gene does not has any peaks in DHS
['chr19', 'VN1R4']
This gene does not has any peaks in DHS
['chr5', 'RIOK2']


number of gene symbol:  96%|█████████████████▎| 631/658 [00:16<00:00, 39.36it/s]

['chr6', 'LTV1']
['chr10', 'MTPAP']
['chr15', 'FOXB1']
['chr5', 'TCERG1']
['chr10', 'BUB3']
['chr20', 'C20orf173']
This gene does not has any peaks in DHS
['chr1', 'EIF2B3']
['chr6', 'GRM4']


number of gene symbol:  97%|█████████████████▍| 639/658 [00:17<00:00, 37.60it/s]

['chr19', 'NANOS2']
['chr11', 'CCDC86']
['chr3', 'ECT2']
['chr16', 'RPS15A']
['chr5', 'THG1L']
['chr6', 'CDC5L']
['chr1', 'GNL2']
['chr6', 'SNRNP48']


number of gene symbol:  99%|█████████████████▊| 649/658 [00:17<00:00, 39.44it/s]

['chr19', 'UQCRFS1']
['chr12', 'MRPS35']
['chr19', 'FBL']
['chr18', 'AFG3L2']
['chr11', 'CHEK1']
['chr5', 'LARS1']
['chr11', 'OR52A1']
This gene does not has any peaks in DHS
['chr17', 'MED1']
['chr7', 'NEUROD6']


number of gene symbol:  99%|█████████████████▉| 654/658 [00:17<00:00, 39.67it/s]

This gene does not has any peaks in DHS
['chr13', 'RNF113B']
This gene does not has any peaks in DHS
['chr2', 'CARF']
['chr9', 'SMC2']
['chr11', 'MRPL17']
['chr17', 'ZNF830']
['chr22', 'BCR']


number of gene symbol: 100%|██████████████████| 658/658 [00:17<00:00, 37.07it/s]

['chr17', 'NLE1']
['chr11', 'OR4C11']
This gene does not has any peaks in DHS





In [10]:
CRISPRiFiles_DHS_A549['overlap with peak'].value_counts()

overlap with peak
0    133532
1     33092
Name: count, dtype: int64

In [11]:
CRISPRiFiles_DHS_A549.to_csv(f'{peakOverlapOut}A549.csv')

In [12]:
CRISPRiFiles_DHS_K562 = dhs_all_genes(CRISPRi_fileCombo,processedCRISPRiOutpath, dhs_out+'K562/')

number of gene symbol:   1%|▏                   | 5/658 [00:00<00:13, 49.40it/s]

['chr14', 'HAUS4']
['chr19', 'OR10H3']
This gene does not has any peaks in DHS
['chr11', 'SF3B2']
['chr17', 'OR4D1']
This gene does not has any peaks in DHS
['chr7', 'MRPS24']
['chr20', 'DDX27']
['chr9', 'DCTN3']
['chr12', 'POP5']
['chr8', 'POLR2K']
['chrX', 'SAGE1']


number of gene symbol:   2%|▍                  | 15/658 [00:00<00:18, 34.78it/s]

['chr19', 'MRPL34']
['chr11', 'RAD9A']
['chr2', 'MRPL19']
['chr1', 'FDPS']
['chr1', 'HEATR1']
['chr17', 'COX10']
['chr7', 'TRRAP']


number of gene symbol:   3%|▋                  | 23/658 [00:00<00:19, 32.32it/s]

['chr19', 'CYP2A13']
This gene does not has any peaks in DHS
['chr13', 'ARGLU1']
['chr13', 'TM9SF2']
['chr17', 'C17orf49']
['chr15', 'MFAP1']
['chrX', 'TEX13A']
['chr16', 'RSL1D1']


number of gene symbol:   5%|▉                  | 33/658 [00:00<00:16, 37.70it/s]

['chr12', 'APOBEC1']
This gene does not has any peaks in DHS
['chr6', 'RPL7L1']
['chr4', 'DSPP']
This gene does not has any peaks in DHS
['chr6', 'DHX16']
['chr4', 'SDAD1']
['chr2', 'CHMP3']
['chr19', 'ZNF574']
['chr1', 'MTOR']
['chr2', 'DYNC1I2']
['chr5', 'RPS14']


number of gene symbol:   6%|█▏                 | 41/658 [00:01<00:16, 37.12it/s]

['chr10', 'EIF3A']
['chr21', 'KRTAP13-3']
This gene does not has any peaks in DHS
['chr14', 'YY1']
['chr7', 'MRPS33']
['chr11', 'TAF10']
['chr20', 'CSE1L']
['chr1', 'SF3B4']
['chr1', 'CCT3']


number of gene symbol:   7%|█▎                 | 45/658 [00:01<00:18, 33.96it/s]

['chr5', 'ZMAT2']
['chr8', 'ATP6V1H']
['chr19', 'ILF3']
['chr7', 'GET4']
['chr12', 'FGF6']
This gene does not has any peaks in DHS
['chr15', 'KNL1']


number of gene symbol:   8%|█▌                 | 53/658 [00:01<00:17, 34.00it/s]

['chr12', 'GCN1']
['chr2', 'NCAPH']
['chr6', 'RPP40']
['chr1', 'VPS72']
['chr11', 'OR51D1']
['chr11', 'OR8G5']
This gene does not has any peaks in DHS
['chr17', 'CDK12']
['chr1', 'PMVK']


number of gene symbol:  10%|█▊                 | 63/658 [00:01<00:14, 40.15it/s]

['chr6', 'POLR1H']
['chr12', 'TAS2R50']
This gene does not has any peaks in DHS
['chr17', 'PIGS']
['chr17', 'KRT28']
This gene does not has any peaks in DHS
['chr9', 'SPATA31D1']
This gene does not has any peaks in DHS
['chr7', 'SSBP1']
['chr2', 'MRPL33']
['chr11', 'RRM1']
['chr11', 'OR2D2']
This gene does not has any peaks in DHS
['chr3', 'RPL14']


number of gene symbol:  11%|██                 | 73/658 [00:01<00:13, 42.36it/s]

['chr8', 'CYP11B2']
This gene does not has any peaks in DHS
['chr1', 'CLSPN']
['chrX', 'BEND2']
This gene does not has any peaks in DHS
['chr4', 'COPS4']
['chr1', 'MCL1']
['chr17', 'CDC27']
['chr19', 'ERCC2']
['chr7', 'TAS2R40']
This gene does not has any peaks in DHS
['chr20', 'PRPF6']
['chr21', 'U2AF1']


number of gene symbol:  13%|██▍                | 83/658 [00:02<00:13, 41.81it/s]

['chr14', 'C14orf178']
['chr19', 'CACTIN']
['chr1', 'TBCE']
['chr17', 'INTS2']
['chr18', 'TRAPPC8']
['chr4', 'NDST4']
This gene does not has any peaks in DHS
['chr17', 'LUC7L3']
['chr2', 'IMMT']
['chr13', 'TUBGCP3']


number of gene symbol:  13%|██▌                | 88/658 [00:02<00:14, 40.52it/s]

['chr11', 'OR52B6']
['chr2', 'SPC25']
['chr19', 'SPC24']
['chr2', 'XRCC5']
['chr20', 'DHX35']
['chr10', 'TLX1']
['chr17', 'SIRT7']
['chr20', 'EIF6']


number of gene symbol:  15%|██▊                | 97/658 [00:02<00:15, 36.53it/s]

['chr2', 'WDR33']
['chr10', 'SMNDC1']
['chr6', 'RPF2']
['chr10', 'SEC24C']
['chr16', 'COG8']
['chr3', 'SEC13']
['chr3', 'EIF4G1']


number of gene symbol:  16%|██▉               | 106/658 [00:02<00:14, 38.37it/s]

['chr14', 'RNASE8']
This gene does not has any peaks in DHS
['chr1', 'MRPS15']
['chr2', 'ERCC3']
['chr17', 'KRTAP4-7']
This gene does not has any peaks in DHS
['chr1', 'MRPS14']
['chr1', 'RPA2']
['chr2', 'PSMD1']
['chr11', 'INTS4']
['chr21', 'CCT8']
['chr1', 'CDC20']


number of gene symbol:  17%|███▏              | 115/658 [00:03<00:14, 38.05it/s]

['chr20', 'MYBL2']
['chr5', 'HMGCS1']
['chr1', 'RPL5']
['chr13', 'COG3']
['chr1', 'CDCP2']
This gene does not has any peaks in DHS
['chr17', 'POLG2']
['chr10', 'RPS24']
['chr16', 'CACNG3']


number of gene symbol:  19%|███▍              | 125/658 [00:03<00:12, 41.87it/s]

['chr7', 'RFC2']
['chr1', 'C8B']
This gene does not has any peaks in DHS
['chr5', 'BNIP1']
['chr20', 'RPN2']
['chr18', 'SKA1']
['chr2', 'CWC22']
['chr9', 'GNE']
['chr10', 'BMS1']
['chr10', 'POLR3A']
['chr3', 'TSEN2']


number of gene symbol:  21%|███▋              | 135/658 [00:03<00:11, 44.48it/s]

['chr11', 'OR5W2']
This gene does not has any peaks in DHS
['chr7', 'DLD']
['chr9', 'DNLZ']
['chr17', 'MED9']
['chr14', 'EIF2B2']
['chr17', 'UTP6']
['chr9', 'IFNA16']
This gene does not has any peaks in DHS
['chr1', 'INTS3']
['chr2', 'UGP2']


number of gene symbol:  21%|███▊              | 140/658 [00:03<00:13, 39.20it/s]

['chr1', 'PPP1R8']
['chr1', 'BCAS2']
['chr16', 'SRCAP']
['chr2', 'VPS54']
['chr17', 'BIRC5']
['chr11', 'ST3GAL4']


number of gene symbol:  23%|████              | 150/658 [00:03<00:15, 33.83it/s]

['chr14', 'PRMT5']
['chr14', 'TEDC1']
['chr11', 'MRGPRD']
This gene does not has any peaks in DHS
['chr17', 'NMT1']
['chr12', 'TIMELESS']
['chr1', 'LIN28A']
['chr20', 'SEC23B']
['chr5', 'WDR55']


number of gene symbol:  24%|████▎             | 159/658 [00:04<00:13, 37.37it/s]

['chr9', 'IFNA17']
This gene does not has any peaks in DHS
['chr2', 'MRPL35']
['chr14', 'CEBPE']
This gene does not has any peaks in DHS
['chr12', 'TUBA1B']
['chr8', 'PUF60']
['chr20', 'DEFB126']
This gene does not has any peaks in DHS
['chr6', 'TAAR6']
This gene does not has any peaks in DHS
['chr2', 'NCL']
['chr1', 'SNIP1']


number of gene symbol:  26%|████▌             | 169/658 [00:04<00:11, 41.04it/s]

['chr2', 'NRBP1']
['chr3', 'PSMD6']
['chr20', 'WFDC9']
This gene does not has any peaks in DHS
['chr2', 'DDX18']
['chr5', 'MED7']
['chr5', 'TTC1']
['chr11', 'SART1']
['chr22', 'EIF3D']
['chr11', 'BUD13']
['chr6', 'LSM2']


number of gene symbol:  26%|████▊             | 174/658 [00:04<00:11, 42.22it/s]

['chr10', 'SUPV3L1']
['chr3', 'SRPRB']
['chr5', 'MRPS30']
['chr2', 'MRPS5']
['chr6', 'RPP21']
['chr5', 'DDX41']
['chr1', 'NUF2']
['chr10', 'GDF2']


number of gene symbol:  28%|█████             | 185/658 [00:04<00:10, 43.47it/s]

['chr12', 'IAPP']
This gene does not has any peaks in DHS
['chr20', 'DSN1']
['chr19', 'MBD3L2']
This gene does not has any peaks in DHS
['chr7', 'ORC5']
['chr1', 'RPS27']
['chr17', 'PRPF8']
['chr6', 'TTK']
['chr7', 'EXOC4']
['chr14', 'MIS18BP1']
['chr9', 'NUP214']
['chr10', 'WDR11']


number of gene symbol:  30%|█████▍            | 197/658 [00:05<00:09, 46.61it/s]

['chrX', 'CXorf66']
['chr9', 'IARS1']
['chrX', 'RHOXF2']
This gene does not has any peaks in DHS
['chr1', 'DAP3']
['chr8', 'DEFB106A']
This gene does not has any peaks in DHS
['chr8', 'CYP7A1']
This gene does not has any peaks in DHS
['chr5', 'MRPL22']
['chr10', 'TUBGCP2']
['chr6', 'GJA10']
This gene does not has any peaks in DHS
['chr10', 'SAR1A']


number of gene symbol:  31%|█████▌            | 202/658 [00:05<00:11, 41.15it/s]

['chr17', 'RPL23']
['chr19', 'CARM1']
['chr7', 'POLR1F']
['chr2', 'SRBD1']
['chr2', 'RAB1A']
['chr22', 'POTEH']
This gene does not has any peaks in DHS
['chr4', 'CCKAR']
This gene does not has any peaks in DHS
['chrX', 'LUZP4']


number of gene symbol:  32%|█████▊            | 213/658 [00:05<00:09, 45.23it/s]

['chr20', 'CST4']
This gene does not has any peaks in DHS
['chr1', 'RPF1']
['chr5', 'IL13']
This gene does not has any peaks in DHS
['chr12', 'ARL1']
['chr12', 'DDX23']
['chr1', 'GPN2']
['chr5', 'MRPS27']
['chr2', 'POLR1B']
['chr8', 'INTS8']
['chr2', 'STPG4']
This gene does not has any peaks in DHS
['chr5', 'PHAX']


number of gene symbol:  34%|██████            | 223/658 [00:05<00:11, 37.74it/s]

['chr2', 'ITGAV']
['chr14', 'DYNC1H1']
['chr1', 'RABGGTB']
['chr6', 'MCM3']
['chr9', 'MRPS2']
['chr1', 'SNRPE']


number of gene symbol:  35%|██████▎           | 233/658 [00:05<00:10, 41.44it/s]

['chr14', 'PSMC1']
['chr1', 'PARS2']
['chr14', 'PSMB5']
['chr3', 'TRMT10C']
['chr6', 'TFAP2D']
This gene does not has any peaks in DHS
['chr3', 'MRPL3']
['chr1', 'PIGV']
['chr10', 'RPP30']
['chr17', 'NOL11']
['chr17', 'AATF']


number of gene symbol:  36%|██████▌           | 238/658 [00:06<00:10, 41.34it/s]

['chr11', 'UBQLN3']
This gene does not has any peaks in DHS
['chr14', 'MED6']
['chr15', 'SNUPN']
['chr9', 'PSMB7']
['chr18', 'SEH1L']
['chr5', 'GABRA6']
This gene does not has any peaks in DHS
['chr17', 'MYBBP1A']
['chr5', 'SLCO6A1']


number of gene symbol:  38%|██████▊           | 247/658 [00:06<00:11, 36.89it/s]

['chr1', 'NVL']
['chr17', 'CCL1']
This gene does not has any peaks in DHS
['chr12', 'GARIN6']
This gene does not has any peaks in DHS
['chr17', 'TBCD']
['chr6', 'RPL10A']
['chr6', 'SYNCRIP']


number of gene symbol:  38%|██████▊           | 251/658 [00:06<00:11, 35.80it/s]

['chr1', 'RPE65']
This gene does not has any peaks in DHS
['chr13', 'GSX1']
['chr20', 'WFDC11']
This gene does not has any peaks in DHS
['chr1', 'FH']
['chr8', 'CHRNB3']
This gene does not has any peaks in DHS
['chr11', 'DDB1']


number of gene symbol:  39%|██████▉           | 255/658 [00:06<00:15, 26.24it/s]

['chr11', 'COPB1']
['chr12', 'CMAS']
['chr12', 'POLR3B']
['chr6', 'MRPS10']


number of gene symbol:  40%|███████▏          | 264/658 [00:06<00:12, 30.95it/s]

['chr6', 'GSTA5']
['chr2', 'NBAS']
['chr10', 'NPFFR1']
This gene does not has any peaks in DHS
['chr17', 'RPL27']
['chr2', 'IL1F10']
This gene does not has any peaks in DHS
['chr6', 'GPR31']
This gene does not has any peaks in DHS
['chr14', 'KCNK10']
This gene does not has any peaks in DHS
['chr5', 'ZCCHC9']
['chr16', 'PRSS33']
This gene does not has any peaks in DHS
['chr17', 'SPEM1']
This gene does not has any peaks in DHS
['chr8', 'RPL8']


number of gene symbol:  42%|███████▍          | 274/658 [00:07<00:10, 35.02it/s]

['chrY', 'HSFY1']
This gene does not has any peaks in DHS
['chr14', 'PSMC6']
['chr19', 'RDH8']
This gene does not has any peaks in DHS
['chr15', 'RPAP1']
['chr17', 'RPL19']
['chr1', 'MED18']
['chr19', 'OR7G2']
This gene does not has any peaks in DHS
['chr7', 'RINT1']
['chr12', 'KRR1']


number of gene symbol:  43%|███████▋          | 283/658 [00:07<00:09, 37.52it/s]

['chr16', 'PLK1']
['chr6', 'GTF2H4']
['chr4', 'NUP54']
['chr12', 'CIT']
['chr18', 'CEP192']
['chr6', 'SLC35A1']
['chr3', 'DBR1']
['chr1', 'INSRR']


number of gene symbol:  44%|███████▉          | 292/658 [00:07<00:10, 36.01it/s]

['chr12', 'NOP2']
['chr3', 'PSMD2']
['chr9', 'IFNW1']
This gene does not has any peaks in DHS
['chr12', 'EP400']
['chr3', 'UROC1']
This gene does not has any peaks in DHS
['chr10', 'GBF1']
['chr3', 'GMPPB']


number of gene symbol:  45%|████████          | 297/658 [00:07<00:09, 37.34it/s]

['chr17', 'TOP2A']
['chr11', 'SLC22A6']
This gene does not has any peaks in DHS
['chr14', 'FSCB']
This gene does not has any peaks in DHS
['chr15', 'CEP152']
['chr12', 'ACTR6']
['chr19', 'PSMC4']
['chr15', 'KIF23']
['chr10', 'SMC3']


number of gene symbol:  46%|████████▎         | 305/658 [00:08<00:10, 34.46it/s]

['chr6', 'OPN5']
This gene does not has any peaks in DHS
['chr4', 'ANAPC4']
['chr11', 'CKAP5']
['chr17', 'AURKB']
['chr11', 'EIF4G2']
['chr1', 'MRPL55']
['chr4', 'TMPRSS11F']


number of gene symbol:  47%|████████▍         | 309/658 [00:08<00:10, 32.41it/s]

['chr19', 'RPL36']
['chr1', 'ARF1']
['chr17', 'SRSF2']
['chr19', 'EIF3G']
['chr1', 'ALX3']


number of gene symbol:  48%|████████▋         | 319/658 [00:08<00:09, 36.05it/s]

This gene does not has any peaks in DHS
['chr11', 'C11orf40']
['chr19', 'IFNL3']
This gene does not has any peaks in DHS
['chr9', 'LHX3']
This gene does not has any peaks in DHS
['chr9', 'FOXB2']
This gene does not has any peaks in DHS
['chr6', 'MUCL3']
['chr17', 'PSMB6']
['chr11', 'ACCSL']
['chr1', 'ATP6V1G3']
This gene does not has any peaks in DHS
['chr17', 'NSRP1']
['chr20', 'DYNLRB1']
['chr7', 'NUP205']


number of gene symbol:  50%|█████████         | 329/658 [00:08<00:07, 41.23it/s]

['chr7', 'NPSR1']
This gene does not has any peaks in DHS
['chr12', 'CCT2']
['chr14', 'GEMIN2']
['chr10', 'C10orf53']
['chr2', 'RPL37A']
['chrX', 'IGBP1']
['chr15', 'TICRR']
['chr11', 'MMP27']
This gene does not has any peaks in DHS
['chr11', 'NUP160']
['chr2', 'POLR2D']


number of gene symbol:  52%|█████████▎        | 339/658 [00:08<00:07, 41.69it/s]

['chr11', 'SSRP1']
['chr20', 'TPX2']
['chr11', 'POLR2G']
['chr6', 'PRIM2']
['chr1', 'PRPF3']
['chr16', 'RBBP6']
['chr18', 'SERPINB12']
This gene does not has any peaks in DHS
['chr17', 'STAT5B']


number of gene symbol:  53%|█████████▌        | 349/658 [00:09<00:07, 40.41it/s]

['chr19', 'LSM4']
['chr14', 'SCFD1']
['chr19', 'EEF2']
['chr14', 'METTL17']
['chr19', 'IFNL2']
This gene does not has any peaks in DHS
['chr16', 'NSMCE1']
['chrY', 'DAZ1']
This gene does not has any peaks in DHS
['chr14', 'PSMA3']
['chr8', 'RPL7']


number of gene symbol:  54%|█████████▋        | 354/658 [00:09<00:09, 33.02it/s]

['chr15', 'RAD51']
['chr9', 'QSOX2']
['chr1', 'ZNHIT6']
['chr4', 'COQ2']
['chr7', 'SEPTIN7P2']


number of gene symbol:  55%|█████████▉        | 362/658 [00:09<00:09, 32.15it/s]

['chr12', 'MARS1']
['chr19', 'COX6B1']
['chr1', 'ILF2']
['chr11', 'FGF3']
This gene does not has any peaks in DHS
['chr3', 'TAMM41']
['chr1', 'DDOST']
['chr2', 'RRM2']


number of gene symbol:  56%|██████████        | 366/658 [00:09<00:09, 29.61it/s]

['chr1', 'TARS2']
['chr19', 'RPS19']
['chr6', 'PNISR']
['chr4', 'DHX15']
['chr22', 'PNPLA5']
['chr16', 'PSMD7']
['chr16', 'CREBBP']


number of gene symbol:  57%|██████████▎       | 375/658 [00:10<00:09, 29.55it/s]

['chr17', 'MRPL45']
['chr2', 'NOP58']
['chr2', 'WDR75']
['chr5', 'CDC23']
['chr11', 'WEE1']
['chr8', 'ATP6V1B2']
['chr9', 'SPATA31A7']
This gene does not has any peaks in DHS
['chr10', 'CHAT']
This gene does not has any peaks in DHS


number of gene symbol:  59%|██████████▌       | 386/658 [00:10<00:07, 37.52it/s]

['chr14', 'FNTB']
['chr8', 'REXO1L1P']
This gene does not has any peaks in DHS
['chr17', 'OR1G1']
This gene does not has any peaks in DHS
['chr12', 'LHX5']
This gene does not has any peaks in DHS
['chr4', 'UTP3']
['chr12', 'KRT71']
This gene does not has any peaks in DHS
['chrX', 'MAGEC3']
This gene does not has any peaks in DHS
['chr18', 'NAPG']
['chr11', 'OR9Q2']
This gene does not has any peaks in DHS
['chr3', 'EIF2B5']


number of gene symbol:  59%|██████████▋       | 390/658 [00:10<00:07, 37.47it/s]

['chr6', 'VARS2']
['chr1', 'CELA2A']
['chr1', 'DNTTIP2']
['chr1', 'ATP1A1']
['chr15', 'RSL24D1']
['chr3', 'MRPS25']


number of gene symbol:  61%|██████████▉       | 399/658 [00:10<00:07, 35.84it/s]

['chr3', 'GFM1']
['chr20', 'HM13']
['chr1', 'OR2T4']
This gene does not has any peaks in DHS
['chr21', 'MRPL39']
['chr2', 'LRPPRC']
['chr5', 'SPDL1']
['chr11', 'OR52E8']
This gene does not has any peaks in DHS
['chrX', 'CT45A3']
This gene does not has any peaks in DHS
['chr1', 'RPL11']


number of gene symbol:  62%|███████████▏      | 408/658 [00:10<00:06, 37.73it/s]

['chr11', 'EIF3M']
['chr9', 'GOLGA1']
['chr19', 'OR7D4']
This gene does not has any peaks in DHS
['chr17', 'NUP85']
['chrX', 'OGT']
['chr22', 'PIWIL3']
This gene does not has any peaks in DHS
['chr10', 'NOC3L']
['chr15', 'LEO1']
['chr22', 'RTCB']


number of gene symbol:  64%|███████████▍      | 418/658 [00:11<00:06, 36.97it/s]

['chr5', 'BDP1']
['chr19', 'PGLYRP2']
['chrX', 'CT45A2']
This gene does not has any peaks in DHS
['chr12', 'RACGAP1']
['chr19', 'SNRNP70']
['chr10', 'UPF2']
['chr11', 'TAF6L']


number of gene symbol:  65%|███████████▋      | 429/658 [00:11<00:05, 43.35it/s]

['chr10', 'DRGX']
This gene does not has any peaks in DHS
['chr4', 'TMPRSS11A']
This gene does not has any peaks in DHS
['chr11', 'RPS13']
['chr10', 'PNLIPRP1']
This gene does not has any peaks in DHS
['chr18', 'MEP1B']
This gene does not has any peaks in DHS
['chr9', 'IFNA4']
This gene does not has any peaks in DHS
['chr7', 'PSMC2']
['chr1', 'CELA3B']
This gene does not has any peaks in DHS
['chr6', 'ADGRF2']
['chr2', 'ZSWIM2']
['chr17', 'KRTAP9-3']
This gene does not has any peaks in DHS
['chr17', 'RPL38']


number of gene symbol:  66%|███████████▊      | 434/658 [00:11<00:05, 38.93it/s]

['chr13', 'RNF17']
['chr12', 'SNRPF']
['chr9', 'EXOSC2']
['chr11', 'HINFP']
['chr9', 'FOXD4L4']
This gene does not has any peaks in DHS
['chr11', 'CWF19L2']
['chr12', 'KRT76']
This gene does not has any peaks in DHS
['chr8', 'ATP6V1C1']


number of gene symbol:  67%|████████████▏     | 444/658 [00:11<00:05, 36.51it/s]

['chr1', 'OR2T12']
This gene does not has any peaks in DHS
['chr20', 'SYS1']
['chr20', 'CRNKL1']
['chr12', 'ESPL1']
['chr8', 'UQCRB']
['chr1', 'IARS2']
['chr22', 'RFPL3']


number of gene symbol:  69%|████████████▎     | 452/658 [00:12<00:05, 35.91it/s]

This gene does not has any peaks in DHS
['chr9', 'CDKN2A-DT']
This gene does not has any peaks in DHS
['chr14', 'DICER1']
['chr13', 'SLITRK1']
['chr3', 'AP2M1']
['chr5', 'SRFBP1']
['chr14', 'MNAT1']
['chr3', 'ATR']


number of gene symbol:  69%|████████████▍     | 456/658 [00:12<00:05, 36.21it/s]

['chr12', 'RPLP0']
['chr11', 'CABP2']
This gene does not has any peaks in DHS
['chr2', 'MRPS9']
['chr9', 'POLR1E']
['chr12', 'MED21']
['chr7', 'TNPO3']


number of gene symbol:  70%|████████████▌     | 460/658 [00:12<00:07, 27.45it/s]

['chr11', 'RAB1B']
['chr16', 'CTRB1']
This gene does not has any peaks in DHS
['chr4', 'RPL34']
['chr20', 'PDYN']
This gene does not has any peaks in DHS
['chr9', 'EXOSC3']
['chr12', 'LRRC10']


number of gene symbol:  71%|████████████▊     | 469/658 [00:12<00:05, 31.76it/s]

['chr11', 'OOSP2']
This gene does not has any peaks in DHS
['chr19', 'KASH5']
['chr19', 'NAPA']
['chr12', 'RPS26']
['chr16', 'AQP8']
['chr16', 'NIP7']
This gene does not has any peaks in DHS
['chr2', 'MCM6']
['chr12', 'ATP5F1B']


number of gene symbol:  72%|█████████████     | 477/658 [00:12<00:05, 32.74it/s]

['chr9', 'IFNA5']
This gene does not has any peaks in DHS
['chr10', 'NOLC1']
['chr16', 'E4F1']
['chr7', 'SRRT']
['chr17', 'FTSJ3']
['chr11', 'OR5T2']
This gene does not has any peaks in DHS
['chr15', 'TBC1D21']
['chr6', 'PGK2']


number of gene symbol:  74%|█████████████▎    | 485/658 [00:13<00:05, 34.28it/s]

['chr3', 'COPB2']
['chr6', 'HBS1L']
['chr6', 'MDN1']
['chr5', 'BRIX1']
['chr12', 'NEDD1']
['chr3', 'UBA3']
['chr15', 'CDAN1']


number of gene symbol:  75%|█████████████▍    | 493/658 [00:13<00:05, 29.47it/s]

['chr4', 'SPCS3']
['chr9', 'SEC16A']
['chrX', 'ERCC6L']
['chr3', 'LARS2']
['chr9', 'SURF6']
['chr9', 'NOL6']


number of gene symbol:  76%|█████████████▌    | 498/658 [00:13<00:04, 32.32it/s]

['chr2', 'ATIC']
['chrY', 'RBMY1E']
This gene does not has any peaks in DHS
['chr16', 'POLR2C']
['chr22', 'CENPM']
['chr6', 'MMS22L']
['chr20', 'NKX2-2']
This gene does not has any peaks in DHS
['chr3', 'RNF168']
['chr16', 'MT1B']
This gene does not has any peaks in DHS
['chr4', 'RBM46']


number of gene symbol:  77%|█████████████▊    | 507/658 [00:13<00:04, 32.80it/s]

['chr8', 'EFR3A']
['chr2', 'RPS7']
['chr14', 'SNW1']
['chr10', 'SLC25A28']
['chr17', 'KRT36']
This gene does not has any peaks in DHS
['chr1', 'OR6K6']
This gene does not has any peaks in DHS
['chr15', 'POLR2M']
['chr9', 'PUM3']


number of gene symbol:  79%|██████████████▏   | 517/658 [00:14<00:03, 38.16it/s]

['chr9', 'CYLC2']
This gene does not has any peaks in DHS
['chr19', 'ZIM3']
This gene does not has any peaks in DHS
['chr8', 'MROH6']
['chr2', 'IL36A']
This gene does not has any peaks in DHS
['chr17', 'TBC1D29P']
This gene does not has any peaks in DHS
['chr7', 'COPS6']
['chr10', 'DDX21']
['chr17', 'COPS3']
['chr17', 'CLTC']


number of gene symbol:  79%|██████████████▎   | 521/658 [00:14<00:03, 37.26it/s]

['chr11', 'OR5T3']
This gene does not has any peaks in DHS
['chr17', 'VPS53']
['chrX', 'GPR101']
This gene does not has any peaks in DHS
['chr1', 'MRPL37']
['chr2', 'HJURP']


number of gene symbol:  80%|██████████████▍   | 529/658 [00:14<00:04, 30.76it/s]

['chr5', 'RPS23']
['chr17', 'MRPS23']
['chr8', 'RPS20']
['chr3', 'SLC2A2']
This gene does not has any peaks in DHS
['chr1', 'TCHHL1']
This gene does not has any peaks in DHS
['chr7', 'SLC13A1']
This gene does not has any peaks in DHS
['chr16', 'DHODH']
['chr9', 'RPS6']


number of gene symbol:  82%|██████████████▋   | 538/658 [00:14<00:03, 31.54it/s]

['chr7', 'DDX56']
['chr1', 'ALG14']
['chr4', 'CENPC']
['chr6', 'IGF2R']
['chr19', 'VN1R2']
['chr8', 'VPS28']


number of gene symbol:  83%|██████████████▉   | 546/658 [00:15<00:03, 33.53it/s]

['chr19', 'GPR32']
This gene does not has any peaks in DHS
['chr1', 'PSMD4']
['chr13', 'NUP58']
['chr5', 'HSPA9']
['chr10', 'CYP2C19']
This gene does not has any peaks in DHS
['chr1', 'GTF2B']
['chr8', 'RPL30']
['chr5', 'ZNF131']


number of gene symbol:  84%|███████████████▏  | 555/658 [00:15<00:02, 35.21it/s]

['chr12', 'SNRNP35']
['chr5', 'GARIN3']
This gene does not has any peaks in DHS
['chr9', 'OR5C1']
This gene does not has any peaks in DHS
['chr9', 'TEX10']
['chr3', 'H1-8']
This gene does not has any peaks in DHS
['chr13', 'RFC3']
['chr9', 'VCP']
['chr1', 'PRAMEF1']
This gene does not has any peaks in DHS
['chr8', 'RAB2A']


number of gene symbol:  86%|███████████████▍  | 563/658 [00:15<00:02, 35.98it/s]

['chr7', 'YAE1']
['chr1', 'F13B']
This gene does not has any peaks in DHS
['chr9', 'PPP3R2']
This gene does not has any peaks in DHS
['chr17', 'DHX8']
['chr8', 'RAD21']
['chr15', 'RPL4']
['chr16', 'C16orf78']
This gene does not has any peaks in DHS
['chr1', 'RRP15']
['chr19', 'CNOT3']


number of gene symbol:  87%|███████████████▌  | 571/658 [00:15<00:02, 35.15it/s]

['chr21', 'SOD1']
['chr17', 'DDX52']
['chr19', 'CHMP2A']
['chr4', 'TRIML1']
This gene does not has any peaks in DHS
['chr3', 'RFC4']
['chr3', 'ELP6']
['chr17', 'KRT33A']
This gene does not has any peaks in DHS
['chr8', 'DEFB103B']
This gene does not has any peaks in DHS
['chr8', 'RRS1']


number of gene symbol:  88%|███████████████▊  | 579/658 [00:15<00:02, 34.56it/s]

['chr2', 'FARSB']
['chr22', 'SAMM50']
['chr19', 'FFAR1']
This gene does not has any peaks in DHS
['chr12', 'YEATS4']
['chr3', 'IMPDH2']
['chr13', 'CDX2']
This gene does not has any peaks in DHS
['chr19', 'DNM2']


number of gene symbol:  89%|███████████████▉  | 583/658 [00:16<00:02, 30.99it/s]

['chr15', 'PSMA4']
['chr17', 'COG1']
['chr16', 'RPL13']
['chr8', 'C8orf17']
['chr21', 'PAXBP1']
['chr5', 'NEUROG1']
This gene does not has any peaks in DHS


number of gene symbol:  90%|████████████████▏ | 592/658 [00:16<00:01, 34.73it/s]

['chr4', 'HMX1']
This gene does not has any peaks in DHS
['chr6', 'GPX5']
['chr1', 'MRPS21']
['chr7', 'POLD2']
['chr10', 'PRPF18']
['chr22', 'VPREB1']
This gene does not has any peaks in DHS
['chr18', 'KDSR']
['chr20', 'SNRPB']


number of gene symbol:  91%|████████████████▍ | 600/658 [00:16<00:01, 32.20it/s]

['chr1', 'PRAMEF4']
This gene does not has any peaks in DHS
['chr2', 'SNRNP200']
['chr2', 'EIF5B']
['chr19', 'RPS5']
['chr12', 'CLEC6A']
This gene does not has any peaks in DHS
['chr17', 'CHMP6']


number of gene symbol:  93%|████████████████▋ | 609/658 [00:16<00:01, 35.89it/s]

['chr1', 'SPRR4']
This gene does not has any peaks in DHS
['chr1', 'CATSPER4']
['chr2', 'GKN2']
This gene does not has any peaks in DHS
['chr22', 'ATXN10']
['chrY', 'BPY2']
This gene does not has any peaks in DHS
['chr12', 'IL22']
This gene does not has any peaks in DHS
['chr3', 'RPL35A']
['chr3', 'RPL24']
['chr2', 'GTF3C2']


number of gene symbol:  93%|████████████████▊ | 614/658 [00:16<00:01, 36.62it/s]

['chrX', 'FAM47A']
This gene does not has any peaks in DHS
['chr4', 'TRIM60']
This gene does not has any peaks in DHS
['chr13', 'RPL21']
['chr1', 'ATP6V0B']
['chr3', 'UMPS']
['chr3', 'ANAPC13']
['chr4', 'YTHDC1']
['chr8', 'CPSF1']


number of gene symbol:  95%|█████████████████ | 622/658 [00:17<00:01, 34.22it/s]

['chr11', 'MRPL16']
['chr5', 'GEMIN5']
['chr8', 'MAK16']
['chr4', 'CENPE']
['chrX', 'F9']
This gene does not has any peaks in DHS
['chr19', 'VN1R4']
This gene does not has any peaks in DHS
['chr5', 'RIOK2']
['chr6', 'LTV1']


number of gene symbol:  96%|█████████████████▎| 631/658 [00:17<00:00, 34.39it/s]

['chr10', 'MTPAP']
['chr15', 'FOXB1']
This gene does not has any peaks in DHS
['chr5', 'TCERG1']
['chr10', 'BUB3']
['chr20', 'C20orf173']
['chr1', 'EIF2B3']
['chr6', 'GRM4']


number of gene symbol:  97%|█████████████████▍| 639/658 [00:17<00:00, 32.02it/s]

['chr19', 'NANOS2']
['chr11', 'CCDC86']
['chr3', 'ECT2']
['chr16', 'RPS15A']
['chr5', 'THG1L']
['chr6', 'CDC5L']
['chr1', 'GNL2']


number of gene symbol:  98%|█████████████████▌| 643/658 [00:17<00:00, 33.93it/s]

['chr6', 'SNRNP48']
['chr19', 'UQCRFS1']
['chr12', 'MRPS35']
['chr19', 'FBL']
['chr18', 'AFG3L2']
['chr11', 'CHEK1']
['chr5', 'LARS1']


number of gene symbol:  99%|█████████████████▊| 652/658 [00:18<00:00, 35.29it/s]

['chr11', 'OR52A1']
['chr17', 'MED1']
['chr7', 'NEUROD6']
This gene does not has any peaks in DHS
['chr13', 'RNF113B']
['chr2', 'CARF']
['chr9', 'SMC2']
['chr11', 'MRPL17']
['chr17', 'ZNF830']


number of gene symbol: 100%|██████████████████| 658/658 [00:18<00:00, 35.97it/s]

['chr22', 'BCR']
['chr17', 'NLE1']
['chr11', 'OR4C11']
This gene does not has any peaks in DHS





In [13]:
CRISPRiFiles_DHS_K562['overlap with peak'].value_counts()

overlap with peak
0    123680
1     42944
Name: count, dtype: int64

In [14]:
CRISPRiFiles_DHS_K562.to_csv(f'{peakOverlapOut}K562.csv')

In [16]:
DHS_K562_combined = pd.merge(NewDatasetCombine,CRISPRiFiles_DHS_K562, how = 'inner')

In [17]:
DHS_HCT116_combined = pd.merge(NewDatasetCombine,CRISPRiFiles_DHS_HCT116, how = 'inner')

In [18]:
DHS_A549_combined = pd.merge(NewDatasetCombine,CRISPRiFiles_DHS_A549, how = 'inner')

In [20]:
DHS_K562_combined['DHS'] = 'K562'
DHS_HCT116_combined['DHS'] = 'HCT116'
DHS_A549_combined['DHS'] = 'A549'

In [21]:
DHS_combined = pd.concat([DHS_K562_combined,DHS_HCT116_combined,DHS_A549_combined])


In [22]:
DHS_combined

Unnamed: 0,Target Gene Symbol,chromosome,sgRNA Context Sequence,sgRNA 'Cut' Position,condition,Domain,DataSet,orginal phenotype,overlap with peak,DHS
0,ACTR6,chr12,TGCTTCGAGCACATAGAGGGATCTGGGCCC,100198288.0,K562,Kox1,Nunez,-0.018881,0,K562
1,ACTR6,chr12,CTGCTTCGAGCACATAGAGGGATCTGGGCC,100198289.0,K562,Kox1,Nunez,0.009292,0,K562
2,ACTR6,chr12,GTCCACCTGCTTCGAGCACATAGAGGGATC,100198295.0,K562,Kox1,Nunez,0.000734,0,K562
3,ACTR6,chr12,GGTCCACCTGCTTCGAGCACATAGAGGGAT,100198296.0,K562,Kox1,Nunez,-0.008256,0,K562
4,ACTR6,chr12,GGGCTAGACTCTGTCACCTCCGAGGGGTCC,100198321.0,K562,Kox1,Nunez,0.001314,0,K562
...,...,...,...,...,...,...,...,...,...,...
431090,HMX1,chr4,GTGAGGGCGCGGGGAGGGTGGGTCAGGCGG,8871196.0,HCT116,Zim3,InHouse,-0.721298,0,A549
431091,SNUPN,chr15,GCGTGGGCGGGGCCTGGGGCGGGTCGGACT,75625191.0,HCT116,Zim3,InHouse,-1.293837,1,A549
431092,SNUPN,chr15,GGAGGGTGGAGCGTGGGCGGGGCCTGGGGC,75625201.0,HCT116,Zim3,InHouse,-1.423518,1,A549
431093,NKX2-2,chr20,CAACTCCCCCCTCCCCCGCCCGCCGGGGCA,21513745.0,HCT116,Zim3,InHouse,-1.040580,0,A549


In [23]:
DHS_combined['overlap with peak'].value_counts()

overlap with peak
0    1065460
1     227825
Name: count, dtype: int64

In [24]:
out_path = '../../Data/DHS/'
DHS_combined.to_csv(out_path+'DHS_PeakOverlap.csv', index = False)