## 3. Calculate mAP for bulk and single cell perturbseq data

Also calculate UMAP embeddings for each perturbation at the same time.

In [1]:
import pathlib

import umap
import numpy as np
import pandas as pd

from map_utils.map import calculate_map

In [2]:
np.random.seed(42)

gse_id = "GSE132080"
perturbseq_data_dir = pathlib.Path("data/")
perturbseq_screen_phenotypes = (
    "paper_supplement/Table_S16_perturb-seq_screen_phenotypes.txt"
)

### Calculate mAP for bulk data

In [3]:
# Load activities results (bulk)
file = perturbseq_data_dir / perturbseq_screen_phenotypes
activity_df = pd.read_csv(file, sep="\t").rename({"Unnamed: 0": "id"}, axis="columns")

# Create a perturbation column to match with other IDs
activity_df = activity_df.assign(perturbation=activity_df.gene + "_" + activity_df.id)

print(activity_df.shape)
activity_df.head()

(128, 8)


Unnamed: 0,id,sequence,gene,gamma_day5,gamma_day10,relative_activity_day5,relative_activity_day10,perturbation
0,ALDOA_+_30077139.23-P1P2_00,GGTCACCAGGACCCCTTCTG,ALDOA,-0.412746,-0.366469,1.0,1.0,ALDOA_ALDOA_+_30077139.23-P1P2_00
1,ALDOA_+_30077139.23-P1P2_06,GGTCACCAGGATCCCTTCTG,ALDOA,-0.396687,-0.348503,0.961091,0.950977,ALDOA_ALDOA_+_30077139.23-P1P2_06
2,ALDOA_+_30077139.23-P1P2_07,GGTCACCAGGCCCCCTTCTG,ALDOA,-0.360892,-0.335059,0.874369,0.914291,ALDOA_ALDOA_+_30077139.23-P1P2_07
3,ALDOA_+_30077139.23-P1P2_13,GGTCACCAGGACCCCTTTTG,ALDOA,0.017063,-0.00022,-0.04134,0.000601,ALDOA_ALDOA_+_30077139.23-P1P2_13
4,ALDOA_+_30077139.23-P1P2_14,GGTCACCAGGACCGCTTCTG,ALDOA,-0.175243,-0.156611,0.424579,0.427353,ALDOA_ALDOA_+_30077139.23-P1P2_14


In [4]:
bulk_file = pathlib.Path(f"{perturbseq_data_dir}/{gse_id}_bulk_final_analytical.tsv.gz")
bulk_df = pd.read_csv(bulk_file, sep="\t")

bulk_df = bulk_df.query("Metadata_gene_identity != '*'").reset_index(drop=True)
bulk_df["Metadata_reference_index"] = np.where(
    bulk_df["Metadata_gene_identity"] == "neg", bulk_df.index, -1
)
bulk_df

Unnamed: 0,Metadata_guide_identity,Metadata_gene_identity,ABCA1,ABCC3,ABI3BP,AC002331.1,AC002480.3,AC003092.1,AC005616.2,AC006262.5,...,YPEL5,ZBTB38,ZFAS1,ZFP36L1,ZNF365,ZNF43,ZNF483,ZNF556,ZYX,Metadata_reference_index
0,ALDOA_ALDOA_+_30077139.23-P1P2_00,ALDOA,-0.078689,-0.093494,-0.01406,-0.013653,-0.018973,-0.026806,-0.034066,-0.485621,...,-0.632468,-0.539091,-0.170790,0.014776,-0.012155,-0.017763,-0.026877,-0.0524,-0.452229,-1
1,ALDOA_ALDOA_+_30077139.23-P1P2_06,ALDOA,-0.078689,-0.093494,-0.01406,-0.013653,-0.018973,-0.026806,-0.034066,-0.485621,...,-0.632468,-0.539091,-0.045114,0.042050,-0.012155,-0.017763,-0.026877,-0.0524,-0.452229,-1
2,ALDOA_ALDOA_+_30077139.23-P1P2_07,ALDOA,-0.078689,-0.093494,-0.01406,-0.013653,-0.018973,-0.026806,-0.034066,-0.485621,...,-0.632468,-0.539091,-0.101183,0.027887,-0.012155,-0.017763,-0.026877,-0.0524,-0.452229,-1
3,ALDOA_ALDOA_+_30077139.23-P1P2_13,ALDOA,-0.078689,-0.093494,-0.01406,-0.013653,-0.018973,-0.026806,-0.034066,-0.485621,...,-0.632468,-0.539091,-0.050848,-0.107389,-0.012155,-0.017763,-0.026877,-0.0524,-0.452229,-1
4,ALDOA_ALDOA_+_30077139.23-P1P2_14,ALDOA,-0.078689,-0.093494,-0.01406,-0.013653,-0.018973,-0.026806,-0.034066,-0.485621,...,-0.632468,-0.539091,0.150701,0.051095,-0.012155,-0.017763,-0.026877,-0.0524,-0.452229,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
133,neg_ctrl_non-targeting_00283,neg,-0.078689,-0.093494,-0.01406,-0.013653,-0.018973,-0.026806,-0.034066,-0.485621,...,-0.632468,-0.539091,-0.115780,-0.179220,-0.012155,-0.017763,-0.026877,-0.0524,-0.452229,133
134,neg_ctrl_non-targeting_00406,neg,-0.078689,-0.093494,-0.01406,-0.013653,-0.018973,-0.026806,-0.034066,-0.485621,...,-0.632468,-0.539091,-0.090275,-0.027992,-0.012155,-0.017763,-0.026877,-0.0524,-0.452229,134
135,neg_ctrl_non-targeting_00527,neg,-0.078689,-0.093494,-0.01406,-0.013653,-0.018973,-0.026806,-0.034066,-0.485621,...,-0.632468,-0.539091,-0.084769,-0.130657,-0.012155,-0.017763,-0.026877,-0.0524,-0.452229,135
136,neg_ctrl_non-targeting_00802,neg,-0.078689,-0.093494,-0.01406,-0.013653,-0.018973,-0.026806,-0.034066,-0.485621,...,-0.632468,-0.539091,-0.007599,-0.058443,-0.012155,-0.017763,-0.026877,-0.0524,-0.452229,136


In [5]:
genes_to_retain = (
    pd.DataFrame(bulk_df.filter(regex="^(?!Metadata_)").var() > 0.001)
    .reset_index()
    .rename({"index": "gene", 0: "keep"}, axis="columns")
    .query("keep")
    .gene.tolist()
)
genes_to_retain

['AC006262.5',
 'AC016629.3',
 'AC079466.1',
 'ACP5',
 'ACSM3',
 'ACTB',
 'ACTG1',
 'AHNAK',
 'AIF1',
 'ALAS2',
 'AMHR2',
 'ANKRD10',
 'ANXA2',
 'ANXA2R',
 'ANXA3',
 'ANXA5',
 'APOC1',
 'APOE',
 'AQP10',
 'ARL6IP1',
 'ARMCX3',
 'ASNS',
 'ASPM',
 'ASS1',
 'ATF4',
 'ATF5',
 'ATF7IP2',
 'ATPIF1',
 'AURKA',
 'AURKB',
 'BAG1',
 'BBC3',
 'BCYRN1',
 'BEX2',
 'BEX4',
 'BEX5',
 'BHLHE40',
 'BIRC5',
 'BLVRB',
 'BRIX1',
 'BST2',
 'BTG1',
 'BTG2',
 'BUB1',
 'C10orf128',
 'C10orf54',
 'C1orf61',
 'C6orf48',
 'CACYBP',
 'CALM2',
 'CALR',
 'CARHSP1',
 'CCDC71L',
 'CCNA2',
 'CCNB1',
 'CCNB2',
 'CCNE1',
 'CCNF',
 'CCPG1',
 'CD47',
 'CD52',
 'CD53',
 'CD7',
 'CDC20',
 'CDC42EP1',
 'CDC45',
 'CDCA3',
 'CDCA8',
 'CDK1',
 'CDK2AP2',
 'CDT1',
 'CEBPB',
 'CENPA',
 'CENPE',
 'CENPF',
 'CFD',
 'CHI3L2',
 'CITED2',
 'CKAP2',
 'CKB',
 'CKS1B',
 'CKS2',
 'CLDN6',
 'CLEC2L',
 'CLIC1',
 'CLSPN',
 'CLTA',
 'COL2A1',
 'COLGALT2',
 'CORO1A',
 'CR1L',
 'CRELD2',
 'CREM',
 'CRYAB',
 'CRYM',
 'CSF3R',
 'CTH',
 'CTNNB1',


In [6]:
barcode_col = "Metadata_guide_identity"
gene_col = "Metadata_gene_identity"

replicate_group = {"profile_col": barcode_col, "replicate_group_col": gene_col}

neg_controls = [x for x in bulk_df.Metadata_guide_identity if "neg_ctrl" in x]
neg_controls

['neg_ctrl_non-targeting_00001',
 'neg_ctrl_non-targeting_00028',
 'neg_ctrl_non-targeting_00054',
 'neg_ctrl_non-targeting_00089',
 'neg_ctrl_non-targeting_00217',
 'neg_ctrl_non-targeting_00283',
 'neg_ctrl_non-targeting_00406',
 'neg_ctrl_non-targeting_00527',
 'neg_ctrl_non-targeting_00802',
 'neg_ctrl_non-targeting_01040']

In [7]:
pair_config = {
    "pos_sameby": {
        "all": ["Metadata_gene_identity", "Metadata_reference_index"],
        "any": [],
    },
    "pos_diffby": {"all": [], "any": []},
    "neg_sameby": {"all": [], "any": []},
    "neg_diffby": {
        "all": ["Metadata_gene_identity", "Metadata_reference_index"],
        "any": [],
    },
}

map_config = {
    "null_size": 500000,
    "groupby_columns": ["Metadata_guide_identity", "Metadata_gene_identity"],
}

metadata_cols = bulk_df.filter(regex="Metadata_").columns.tolist()
map_results = calculate_map(
    bulk_df.loc[:, metadata_cols + genes_to_retain], pair_config, map_config
)

map_results.rename(
    {"Metadata_guide_identity": "perturbation", "Metadata_gene_identity": "group"},
    axis="columns",
    inplace=True,
)
map_results

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/128 [00:00<?, ?it/s]

Unnamed: 0,perturbation,group,mAP,p_value,corrected_p_value,below_p,p < 0.05,-log10(mAP p-value)
0,ALDOA_ALDOA_+_30077139.23-P1P2_00,ALDOA,0.701923,0.047928,0.074814,True,False,1.126015
1,ALDOA_ALDOA_+_30077139.23-P1P2_06,ALDOA,0.678571,0.062910,0.094735,False,False,1.023490
2,ALDOA_ALDOA_+_30077139.23-P1P2_07,ALDOA,0.678571,0.062910,0.094735,False,False,1.023490
3,ALDOA_ALDOA_+_30077139.23-P1P2_13,ALDOA,0.193515,0.999032,0.999656,False,False,0.000149
4,ALDOA_ALDOA_+_30077139.23-P1P2_14,ALDOA,0.241883,0.916268,0.999656,False,False,0.000149
...,...,...,...,...,...,...,...,...
123,TUBB_TUBB_+_30688126.23-P1_00,TUBB,1.000000,0.000002,0.000005,True,True,5.299331
124,TUBB_TUBB_+_30688126.23-P1_01,TUBB,1.000000,0.000002,0.000005,True,True,5.299331
125,TUBB_TUBB_+_30688126.23-P1_03,TUBB,1.000000,0.000002,0.000005,True,True,5.299331
126,TUBB_TUBB_+_30688126.23-P1_06,TUBB,1.000000,0.000002,0.000005,True,True,5.299331


In [8]:
# Merge with activity results and output file
output_results_file = pathlib.Path(f"results/{gse_id}_map.tsv")

result = map_results.merge(activity_df, left_on="perturbation", right_on="perturbation")

result.to_csv(output_results_file, sep="\t", index=False)

print(result.shape)
result.head(3)

(128, 15)


Unnamed: 0,perturbation,group,mAP,p_value,corrected_p_value,below_p,p < 0.05,-log10(mAP p-value),id,sequence,gene,gamma_day5,gamma_day10,relative_activity_day5,relative_activity_day10
0,ALDOA_ALDOA_+_30077139.23-P1P2_00,ALDOA,0.701923,0.047928,0.074814,True,False,1.126015,ALDOA_+_30077139.23-P1P2_00,GGTCACCAGGACCCCTTCTG,ALDOA,-0.412746,-0.366469,1.0,1.0
1,ALDOA_ALDOA_+_30077139.23-P1P2_06,ALDOA,0.678571,0.06291,0.094735,False,False,1.02349,ALDOA_+_30077139.23-P1P2_06,GGTCACCAGGATCCCTTCTG,ALDOA,-0.396687,-0.348503,0.961091,0.950977
2,ALDOA_ALDOA_+_30077139.23-P1P2_07,ALDOA,0.678571,0.06291,0.094735,False,False,1.02349,ALDOA_+_30077139.23-P1P2_07,GGTCACCAGGCCCCCTTCTG,ALDOA,-0.360892,-0.335059,0.874369,0.914291


### Calculate mAP for single-cell data

In [9]:
# Load finalized single cell perturbseq data
gene_exp_file = pathlib.Path(f"{perturbseq_data_dir}/{gse_id}_final_analytical.tsv.gz")

sc_gene_exp_df = pd.read_csv(gene_exp_file, sep="\t")
gene_features = [x for x in sc_gene_exp_df if not x.startswith("Metadata_")]

print(sc_gene_exp_df.shape)
sc_gene_exp_df

  sc_gene_exp_df = pd.read_csv(gene_exp_file, sep="\t")


(23537, 1012)


Unnamed: 0,Metadata_cell_identity,Metadata_cell_barcode,Metadata_guide_identity,Metadata_read_count,Metadata_UMI_count,Metadata_coverage,Metadata_gemgroup,Metadata_good_coverage,Metadata_number_of_cells,Metadata_gene_identity,...,YPEL4,YPEL5,ZBTB38,ZFAS1,ZFP36L1,ZNF365,ZNF43,ZNF483,ZNF556,ZYX
0,sc_profile_0,AAACCTGAGAGTAATC-1,RAN_RAN_+_131356438.23-P1P2_12,544.0,34.0,16.000000,1.0,True,1.0,RAN,...,-0.09309,-0.632468,-0.539091,-1.633805,-0.876912,-0.012155,-0.017763,-0.026877,-0.0524,-0.452229
1,sc_profile_1,AAACCTGAGGGATCTG-1,neg_ctrl_non-targeting_00089,267.0,19.0,14.052632,1.0,True,1.0,neg,...,-0.09309,-0.632468,-0.539091,1.610220,1.322265,-0.012155,-0.017763,-0.026877,-0.0524,-0.452229
2,sc_profile_2,AAACCTGAGGTCATCT-1,POLR2H_POLR2H_+_184081251.23-P1P2_08,622.0,34.0,18.294118,1.0,True,1.0,POLR2H,...,-0.09309,-0.632468,-0.539091,-0.567252,0.057540,-0.012155,-0.017763,-0.026877,-0.0524,-0.452229
3,sc_profile_3,AAACCTGCAATGGAGC-1,TUBB_TUBB_+_30688126.23-P1_03,433.0,20.0,21.650000,1.0,True,1.0,TUBB,...,-0.09309,1.765279,-0.539091,-1.659420,-0.876912,-0.012155,-0.017763,-0.026877,-0.0524,-0.452229
4,sc_profile_4,AAACCTGCACCAGGCT-1,CDC23_CDC23_-_137548987.23-P1P2_04,136.0,8.0,17.000000,1.0,True,1.0,CDC23,...,-0.09309,0.686753,-0.539091,0.541260,0.934539,-0.012155,-0.017763,-0.026877,-0.0524,1.406718
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23532,sc_profile_23532,TTTGTCAGTTTGACTG-3,CDC23_CDC23_-_137548987.23-P1P2_11,1509.0,106.0,14.235849,3.0,True,3.0,CDC23,...,-0.09309,0.207130,-0.539091,-0.772589,-0.876912,-0.012155,-0.017763,-0.026877,-0.0524,-0.452229
23533,sc_profile_23533,TTTGTCAGTTTGTTGG-3,ATP5E_ATP5E_-_57607036.23-P1P2_16,1207.0,73.0,16.534247,3.0,True,1.0,ATP5E,...,-0.09309,-0.632468,-0.539091,-0.539488,-0.876912,-0.012155,-0.017763,-0.026877,-0.0524,-0.452229
23534,sc_profile_23534,TTTGTCATCACAAACC-3,HSPE1_HSPE1_+_198365089.23-P1P2_01,378.0,25.0,15.120000,3.0,True,1.0,HSPE1,...,-0.09309,-0.632468,0.738755,-0.341440,-0.876912,-0.012155,-0.017763,-0.026877,-0.0524,-0.452229
23535,sc_profile_23535,TTTGTCATCAGCATGT-3,neg_ctrl_non-targeting_00028,856.0,53.0,16.150943,3.0,True,1.0,neg,...,-0.09309,0.379025,0.571873,0.294543,-0.876912,-0.012155,-0.017763,-0.026877,-0.0524,-0.452229


In [10]:
sc_gene_exp_df = (
    sc_gene_exp_df.query("Metadata_gene_identity != '*'")
    .query("Metadata_good_coverage == True")
    .reset_index(drop=True)
)
sc_gene_exp_df["Metadata_reference_index"] = np.where(
    sc_gene_exp_df["Metadata_gene_identity"] == "neg", sc_gene_exp_df.index, -1
)
sc_gene_exp_df

Unnamed: 0,Metadata_cell_identity,Metadata_cell_barcode,Metadata_guide_identity,Metadata_read_count,Metadata_UMI_count,Metadata_coverage,Metadata_gemgroup,Metadata_good_coverage,Metadata_number_of_cells,Metadata_gene_identity,...,YPEL5,ZBTB38,ZFAS1,ZFP36L1,ZNF365,ZNF43,ZNF483,ZNF556,ZYX,Metadata_reference_index
0,sc_profile_0,AAACCTGAGAGTAATC-1,RAN_RAN_+_131356438.23-P1P2_12,544.0,34.0,16.000000,1.0,True,1.0,RAN,...,-0.632468,-0.539091,-1.633805,-0.876912,-0.012155,-0.017763,-0.026877,-0.0524,-0.452229,-1
1,sc_profile_1,AAACCTGAGGGATCTG-1,neg_ctrl_non-targeting_00089,267.0,19.0,14.052632,1.0,True,1.0,neg,...,-0.632468,-0.539091,1.610220,1.322265,-0.012155,-0.017763,-0.026877,-0.0524,-0.452229,1
2,sc_profile_2,AAACCTGAGGTCATCT-1,POLR2H_POLR2H_+_184081251.23-P1P2_08,622.0,34.0,18.294118,1.0,True,1.0,POLR2H,...,-0.632468,-0.539091,-0.567252,0.057540,-0.012155,-0.017763,-0.026877,-0.0524,-0.452229,-1
3,sc_profile_3,AAACCTGCAATGGAGC-1,TUBB_TUBB_+_30688126.23-P1_03,433.0,20.0,21.650000,1.0,True,1.0,TUBB,...,1.765279,-0.539091,-1.659420,-0.876912,-0.012155,-0.017763,-0.026877,-0.0524,-0.452229,-1
4,sc_profile_4,AAACCTGCACCAGGCT-1,CDC23_CDC23_-_137548987.23-P1P2_04,136.0,8.0,17.000000,1.0,True,1.0,CDC23,...,0.686753,-0.539091,0.541260,0.934539,-0.012155,-0.017763,-0.026877,-0.0524,1.406718,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22256,sc_profile_23532,TTTGTCAGTTTGACTG-3,CDC23_CDC23_-_137548987.23-P1P2_11,1509.0,106.0,14.235849,3.0,True,3.0,CDC23,...,0.207130,-0.539091,-0.772589,-0.876912,-0.012155,-0.017763,-0.026877,-0.0524,-0.452229,-1
22257,sc_profile_23533,TTTGTCAGTTTGTTGG-3,ATP5E_ATP5E_-_57607036.23-P1P2_16,1207.0,73.0,16.534247,3.0,True,1.0,ATP5E,...,-0.632468,-0.539091,-0.539488,-0.876912,-0.012155,-0.017763,-0.026877,-0.0524,-0.452229,-1
22258,sc_profile_23534,TTTGTCATCACAAACC-3,HSPE1_HSPE1_+_198365089.23-P1P2_01,378.0,25.0,15.120000,3.0,True,1.0,HSPE1,...,-0.632468,0.738755,-0.341440,-0.876912,-0.012155,-0.017763,-0.026877,-0.0524,-0.452229,-1
22259,sc_profile_23535,TTTGTCATCAGCATGT-3,neg_ctrl_non-targeting_00028,856.0,53.0,16.150943,3.0,True,1.0,neg,...,0.379025,0.571873,0.294543,-0.876912,-0.012155,-0.017763,-0.026877,-0.0524,-0.452229,22259


In [11]:
# Determine a proportion of negative control guide population
sc_neg_controls_df = sc_gene_exp_df.query(
    "Metadata_guide_identity in @neg_controls"
).sample(frac=0.2)

sc_neg_controls = sc_neg_controls_df.query(
    "Metadata_guide_identity in @neg_controls"
).Metadata_cell_identity.tolist()

replicate_group = {
    "profile_col": "Metadata_cell_identity",
    "replicate_group_col": "Metadata_guide_identity",
}

In [12]:
pair_config = {
    "pos_sameby": {
        "all": ["Metadata_guide_identity", "Metadata_reference_index"],
        "any": [],
    },
    "pos_diffby": {"all": [], "any": []},
    "neg_sameby": {"all": [], "any": []},
    "neg_diffby": {
        "all": ["Metadata_guide_identity", "Metadata_reference_index"],
        "any": [],
    },
}

map_config = {
    "null_size": 500000,
    "groupby_columns": ["Metadata_guide_identity", "Metadata_cell_identity"],
}

In [13]:
all_sc_map_results = []
all_sc_umap_embeddings = []

genes = sc_gene_exp_df.Metadata_gene_identity.unique()
for gene in genes:
    if gene not in ["neg", "*", "nan", np.nan]:
        print(f"Now analyzing {gene}...")
        subset_sc_df = sc_gene_exp_df.query("Metadata_gene_identity in @gene")
        guides = subset_sc_df.Metadata_guide_identity.unique()

        # use the same controls in every experiment
        subset_sc_df = pd.concat([subset_sc_df, sc_neg_controls_df]).reset_index(
            drop=True
        )

        # apply UMAP to single cell profiles (all profiles of one gene + neg controls)
        embedding = umap.UMAP().fit_transform(subset_sc_df.loc[:, genes_to_retain])

        # combine results with single cell dataframe
        embedding_df = pd.concat(
            [
                subset_sc_df.drop(gene_features, axis="columns").reset_index(drop=True),
                pd.DataFrame(embedding, columns=["umap_0", "umap_1"]),
            ],
            axis="columns",
        )
        all_sc_umap_embeddings.append(embedding_df.assign(map_gene=gene))

        # now calculate sc-mAP per guide
        for guide in guides:
            subset_guide_df = pd.concat(
                [
                    subset_sc_df.query("Metadata_guide_identity == @guide"),
                    sc_neg_controls_df,
                ]
            ).reset_index(drop=True)

            metadata_cols = subset_guide_df.filter(regex="Metadata_").columns.tolist()
            map_results = calculate_map(
                subset_guide_df.loc[:, metadata_cols + genes_to_retain],
                pair_config,
                map_config,
            )
            all_sc_map_results.append(
                map_results.assign(map_gene=gene, map_guide=guide)
            )

Now analyzing RAN...


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/193 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/112 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/156 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/259 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/120 [00:00<?, ?it/s]

Now analyzing POLR2H...


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/90 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/98 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/102 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/91 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/77 [00:00<?, ?it/s]

Now analyzing TUBB...


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/95 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/196 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/159 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/108 [00:00<?, ?it/s]

Now analyzing CDC23...


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/139 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/149 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/115 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/309 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/84 [00:00<?, ?it/s]

Now analyzing DUT...


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/440 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/304 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/246 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/142 [00:00<?, ?it/s]

Now analyzing HSPA5...


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/87 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/163 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/133 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/226 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/108 [00:00<?, ?it/s]

Now analyzing MTOR...


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/144 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/98 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/398 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/145 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/151 [00:00<?, ?it/s]

Now analyzing GATA1...


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/103 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/112 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/60 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/121 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/41 [00:00<?, ?it/s]

Now analyzing GINS1...


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/315 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/208 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/169 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/127 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/46 [00:00<?, ?it/s]

Now analyzing HSPE1...


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/249 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/194 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/176 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/130 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/204 [00:00<?, ?it/s]

Now analyzing RPS14...


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/202 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/197 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/66 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/140 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

Now analyzing EIF2S1...


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/144 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/144 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/105 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/122 [00:00<?, ?it/s]

Now analyzing DBR1...


  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/353 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/182 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/231 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/121 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

Now analyzing CAD...


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/125 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/133 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/227 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/244 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/114 [00:00<?, ?it/s]

Now analyzing SEC61A1...


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/111 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/121 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/131 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/213 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/217 [00:00<?, ?it/s]

Now analyzing RPL9...


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/110 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/107 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/121 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

Now analyzing HSPA9...


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/85 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/108 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/77 [00:00<?, ?it/s]

Now analyzing RPS18...


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/123 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/194 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/102 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Now analyzing ALDOA...


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/102 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/81 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/165 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/108 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/151 [00:00<?, ?it/s]

Now analyzing RPS15...


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/103 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/125 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/134 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/223 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/78 [00:00<?, ?it/s]

Now analyzing POLR1D...


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/247 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/384 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/167 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/324 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/198 [00:00<?, ?it/s]

Now analyzing ATP5E...


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/205 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/163 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/111 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/156 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/161 [00:00<?, ?it/s]

Now analyzing COX11...


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/296 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/206 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/138 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/107 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/149 [00:00<?, ?it/s]

Now analyzing BCR...


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/245 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/92 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/125 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/93 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/130 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/140 [00:00<?, ?it/s]

Now analyzing GNB2L1...


  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/19 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/561 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/111 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/133 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  ap_scores = np.add.reduceat(pr_k * rel_k_list, cutoffs) / num_pos


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

In [14]:
all_sc_umap_embeddings = pd.concat(all_sc_umap_embeddings).reset_index(drop=True)

output_results_file = pathlib.Path(
    f"results/{gse_id}_single_cell_umap_embeddings.tsv.gz"
)
all_sc_umap_embeddings.to_csv(
    output_results_file, sep="\t", compression="gzip", index=False
)

print(all_sc_umap_embeddings.shape)
all_sc_umap_embeddings.head()

(31701, 16)


Unnamed: 0,Metadata_cell_identity,Metadata_cell_barcode,Metadata_guide_identity,Metadata_read_count,Metadata_UMI_count,Metadata_coverage,Metadata_gemgroup,Metadata_good_coverage,Metadata_number_of_cells,Metadata_gene_identity,Metadata_barcode,Metadata_sequence,Metadata_reference_index,umap_0,umap_1,map_gene
0,sc_profile_0,AAACCTGAGAGTAATC-1,RAN_RAN_+_131356438.23-P1P2_12,544.0,34.0,16.0,1.0,True,1.0,RAN,AAACCTGAGAGTAATC-1,AAACCTGAGAGTAATC,-1,-1.786611,5.079599,RAN
1,sc_profile_165,AACGTTGAGAGTAATC-1,RAN_RAN_+_131356438.23-P1P2_00,625.0,36.0,17.361111,1.0,True,2.0,RAN,AACGTTGAGAGTAATC-1,AACGTTGAGAGTAATC,-1,-3.055113,7.493356,RAN
2,sc_profile_264,AACTTTCTCTAAGCCA-1,RAN_RAN_+_131356438.23-P1P2_04,334.0,19.0,17.578947,1.0,True,1.0,RAN,AACTTTCTCTAAGCCA-1,AACTTTCTCTAAGCCA,-1,-1.668168,7.734231,RAN
3,sc_profile_311,AAGGAGCCATGCGCAC-1,RAN_RAN_+_131356438.23-P1P2_02,405.0,22.0,18.409091,1.0,True,1.0,RAN,AAGGAGCCATGCGCAC-1,AAGGAGCCATGCGCAC,-1,-1.807011,4.466313,RAN
4,sc_profile_314,AAGGAGCTCCTGTAGA-1,RAN_RAN_+_131356438.23-P1P2_04,926.0,41.0,22.585366,1.0,True,1.0,RAN,AAGGAGCTCCTGTAGA-1,AAGGAGCTCCTGTAGA,-1,-1.323373,7.639339,RAN


In [15]:
all_sc_map_results = pd.concat(all_sc_map_results).reset_index(drop=True)

# Output file
output_results_file = pathlib.Path(f"results/{gse_id}_single_cell_map.tsv.gz")
all_sc_map_results.to_csv(
    output_results_file, sep="\t", compression="gzip", index=False
)

print(all_sc_map_results.shape)
all_sc_map_results.head()

(19901, 10)


Unnamed: 0,Metadata_guide_identity,Metadata_cell_identity,mAP,p_value,corrected_p_value,below_p,p < 0.05,-log10(mAP p-value),map_gene,map_guide
0,RAN_RAN_+_131356438.23-P1P2_12,sc_profile_0,0.289215,0.616595,0.973306,False,False,0.011751,RAN,RAN_RAN_+_131356438.23-P1P2_12
1,RAN_RAN_+_131356438.23-P1P2_12,sc_profile_10030,0.281967,0.765502,0.973306,False,False,0.011751,RAN,RAN_RAN_+_131356438.23-P1P2_12
2,RAN_RAN_+_131356438.23-P1P2_12,sc_profile_10094,0.294351,0.502929,0.973306,False,False,0.011751,RAN,RAN_RAN_+_131356438.23-P1P2_12
3,RAN_RAN_+_131356438.23-P1P2_12,sc_profile_10164,0.292132,0.552037,0.973306,False,False,0.011751,RAN,RAN_RAN_+_131356438.23-P1P2_12
4,RAN_RAN_+_131356438.23-P1P2_12,sc_profile_10280,0.295541,0.476729,0.973306,False,False,0.011751,RAN,RAN_RAN_+_131356438.23-P1P2_12
