# Genetables

In [1]:
import pandas as pd
import os
import numpy as np
import warnings
from tqdm.notebook import tqdm

In [18]:
def get_cgc_synonyms(cgc_df):

    cgc_df["Gene Symbol"] = cgc_df.apply(lambda x: x["Gene Symbol"] if pd.isna(x["Synonyms"]) 
                                         else f'{x["Gene Symbol"]},{x["Synonyms"]}', axis=1)
    cgc_genes = []
    for _, row in cgc_df.iterrows():
        gene_list = row["Gene Symbol"].split(',')
        for gene in gene_list:
            cgc_genes.append(gene)

    return cgc_genes


def load_3dclust(path_c_3dclust):
    """
    Load gene results from the 3d clustering method.
    """
    
    #cohort_3dclust_df = pd.read_csv(path_c_3dclust)
    cohort_3dclust_df = pd.read_csv(path_c_3dclust)
    
    # if "pval" not in cohort_3dclust_df.columns:
    #     cohort_3dclust_df["pval"] = np.nan
    cohort_3dclust_df= cohort_3dclust_df[["Gene", "Uniprot_ID", "Status", "pval", "qval"]].reset_index()
    
    return cohort_3dclust_df.rename(columns = {"Status" : "o3d_status", "index" : "o3d_rank",
                                               "pval" : "o3d_pval", "qval" : "o3d_qval"})


def load_cbase(path_c_cbase):
    """
    Load gene results from the cbase method.
    """
    
    cohort_cbase_df = pd.read_csv(path_c_cbase, sep="\t")
    cohort_cbase_df = cohort_cbase_df.sort_values("p_pos")
    cohort_cbase_df = cohort_cbase_df[["gene", "p_pos", "q_pos"]].reset_index(drop=True).reset_index()

    return cohort_cbase_df.rename(columns = {"index": "cbase_rank", "gene" : "Gene",
                                             "p_pos" : "cbase_pval", "q_pos" : "cbase_qval"})


def load_dndscv(path_c_dndscv):
    """
    Load gene results from the dndscv method.
    """
    
    cohort_dndscv_df = pd.read_csv(path_c_dndscv, sep = "\t")
    cohort_dndscv_df = cohort_dndscv_df[["gene_name", "pallsubs_cv", "qallsubs_cv"]]
    cohort_dndscv_df = cohort_dndscv_df.sort_values("pallsubs_cv").reset_index(drop=True).reset_index()
    
    return cohort_dndscv_df.rename(columns = {"index" : "dndscv_rank", "gene_name" : "Gene",
                                              "pallsubs_cv" : "dndscv_pval", "qallsubs_cv" : "dndscv_qval"})       


def load_hotmaps(path_c_hotmaps):
    """
    Load gene results from the hotmaps method.
    """
    
    cohort_hotmaps_df = pd.read_csv(path_c_hotmaps, sep = "\t")
    cohort_hotmaps_df = cohort_hotmaps_df.sort_values("Min p-value").reset_index(drop=True)
    cohort_hotmaps_df = cohort_hotmaps_df[["GENE", "Min p-value", "q-value"]].reset_index(drop=True).reset_index()
    
    return cohort_hotmaps_df.rename(columns = {"index" : "hotmaps_rank", "GENE" : "Gene",
                                               "Min p-value" : "hotmaps_pval",
                                               "q-value" : "hotmaps_qval"})   


def load_mutpanning(path_c_mutpanning):
    """
    Load gene results from the mutpanning method. 
    Genes are already sorted from most significant to least.
    If the same gene appears multiple times, the p-val and q-val 
    of the lowest rank is selected.
    """
    
    cohort_mutpanning_df = pd.read_csv(path_c_mutpanning, sep = "\t")
    cohort_mutpanning_df = cohort_mutpanning_df[~cohort_mutpanning_df["Significance"].isna()]
    cohort_mutpanning_df = cohort_mutpanning_df[["Name", "Significance", "FDR"]].reset_index()

    # Keep the lowest rank if there is a gene with multiple q-value
    idx = cohort_mutpanning_df.groupby('Name')['index'].idxmin()
    cohort_mutpanning_df = cohort_mutpanning_df.loc[idx].sort_values("index").drop(columns=["index"])
    cohort_mutpanning_df = cohort_mutpanning_df.reset_index(drop=True).reset_index()
    
    return cohort_mutpanning_df.rename(columns = {"index" : "mutpan_rank", "Name" : "Gene",
                                                  "Significance" : "mutpan_pval",
                                                  "FDR" : "mutpan_qval"})


def load_smreg(path_c_smreg):
    """
    Load gene results from smregions method. If the same gene
    appears multiple times, the row with minimum p-value is selected.
    """
    
    cohort_smreg_df = pd.read_csv(path_c_smreg, sep = "\t")
    cohort_smreg_df = cohort_smreg_df[~cohort_smreg_df["P_VALUE"].isna()]
    cohort_smreg_df = cohort_smreg_df[["HUGO_SYMBOL", "P_VALUE", "Q_VALUE"]]
    
    # Keep the lowest p-val if there is a gene with multiple p-value
    idx = cohort_smreg_df.groupby('HUGO_SYMBOL')['Q_VALUE'].idxmin()
    cohort_smreg_df = cohort_smreg_df.loc[idx].sort_values(["Q_VALUE", "P_VALUE"])
    cohort_smreg_df = cohort_smreg_df.reset_index(drop=True).reset_index()
    
    return cohort_smreg_df.rename(columns = {"index" : "smreg_rank",
                                             "HUGO_SYMBOL" : "Gene", 
                                             "P_VALUE" : "smreg_pval",
                                             "Q_VALUE" : "smreg_qval"})


def load_clustl(path_c_clustl):
    """ 
    Load gene results from OncodriveCLUSTL method.
    """
    
    cohort_clustl_df = pd.read_csv(path_c_clustl, sep = "\t")
    cohort_clustl_df = cohort_clustl_df[["SYMBOL", "P_ANALYTICAL", "Q_ANALYTICAL"]].sort_values("P_ANALYTICAL")
    cohort_clustl_df = cohort_clustl_df.reset_index(drop=True).reset_index()
    
    return cohort_clustl_df.rename(columns={"index" : "clustl_rank", "SYMBOL" : "Gene",
                                            "P_ANALYTICAL" : "clustl_pval",
                                            "Q_ANALYTICAL" : "clustl_qval"})


def load_fml(path_c_fml):
    """
    Load gene results from OncodriveFML method.
    """
    
    cohort_fml_df = pd.read_csv(path_c_fml, sep = "\t")
    cohort_fml_df = cohort_fml_df[["SYMBOL" , "P_VALUE" , "Q_VALUE"]]
    cohort_fml_df = cohort_fml_df.reset_index(drop=True).sort_values("P_VALUE").reset_index()
    
    return cohort_fml_df.rename(columns={"index" : "fml_rank", "SYMBOL" : "Gene",
                                         "P_VALUE" : "fml_pval", "Q_VALUE" : "fml_qval"})


def get_all_methods_results(cohort,
                            input_datasets,
                            o3d_output,
                            intogen_output,
                            cgc_df,
                            fishy_df,
                            ctype=None,
                            ch_genes=None,
                            aml_genes=None):
    """
    Load the result of each method and get the sorted gene ranks, the p-values and q-values.
    """
    
    # Load CGC and fishy genes
    cgc_genes = get_cgc_synonyms(cgc_df)

    # Paths to methods output
    path_3dclust = f"{o3d_output}/{cohort}/{cohort}.3d_clustering_genes.csv"
    path_cbase = f"{intogen_output}/cbase/{cohort}.cbase.tsv.gz"
    path_dndscv = f"{intogen_output}/dndscv/{cohort}.dndscv.tsv.gz"
    path_hotmaps = f"{intogen_output}/hotmaps/{cohort}.out.gz"
    path_mutpan = f"{intogen_output}/mutpanning/Significance{cohort}.txt"
    path_clustl = f"{intogen_output}/oncodriveclustl/{cohort}.elements_results.txt"
    path_fml = f"{intogen_output}/oncodrivefml/{cohort}-oncodrivefml.tsv.gz"
    path_smreg = f"{intogen_output}/smregions/{cohort}.smregions.tsv.gz"

    # Load all methods results
    
    cohort_df = load_3dclust(path_3dclust)

    if os.path.exists(path_cbase):
        cohort_cbase_df = load_cbase(path_cbase)
        cohort_df = cohort_df.merge(cohort_cbase_df, how="outer", on="Gene")
    else:
        print(f"Cant load cbase in {path_cbase}")

    if os.path.exists(path_dndscv):
        cohort_dndscv_df = load_dndscv(path_dndscv)
        cohort_df = cohort_df.merge(cohort_dndscv_df, how="outer", on="Gene")
    else:
        print(f"Cant load dndscv in {path_dndscv}")

    if os.path.exists(path_hotmaps):
        cohort_hotmaps_df = load_hotmaps(path_hotmaps)
        cohort_df = cohort_df.merge(cohort_hotmaps_df, how="outer", on="Gene")
    else:
        print(f"Cant load hotmaps in {path_hotmaps}")
        
    if os.path.exists(path_mutpan):
        cohort_mutpan_df = load_mutpanning(path_mutpan)
        cohort_df = cohort_df.merge(cohort_mutpan_df, how="outer", on="Gene")
    else:
        print(f"Cant load mutpanning in {path_mutpan}")
        
    if os.path.exists(path_clustl):
        cohort_clustl_df = load_clustl(path_clustl)
        cohort_df = cohort_df.merge(cohort_clustl_df, how="outer", on="Gene")
    else:
        print(f"Cant load oncrodriveclustl in {path_clustl}")
        
    if os.path.exists(path_fml):
        cohort_fml_df = load_fml(path_fml)
        cohort_df = cohort_df.merge(cohort_fml_df, how="outer", on="Gene")
    else:
        print(f"Cant load oncodrive fml in {path_fml}")
        
    if os.path.exists(path_smreg):
        cohort_smreg_df = load_smreg(path_smreg)
        cohort_df = cohort_df.merge(cohort_smreg_df, how="outer", on="Gene") 
    else:
        print(f"Cant load smregion in {path_smreg}")
        
    # Add CGC
    cgc = cohort_df["Gene"].isin(cgc_genes).astype(int)
    cohort_df.insert(3, "CGC", cgc)
    
    # Add fishy (Gene in Fishy list and not in CGC)
    fishy = np.nan
    if ctype:
        if ctype in fishy_df["Cancer_Type"].values:
            fishy = fishy_df[fishy_df["Cancer_Type"] == ctype].Fish.values[0].split(",")
            fishy = pd.concat((cohort_df["Gene"].isin(fishy), 
                               ~cohort_df["Gene"].isin(cgc_genes)), axis=1).all(1).astype(int)
        else:
            fishy = 0
    
    if ch_genes is not None and aml_genes is not None:
        # Add CH
        ch = cohort_df["Gene"].isin(ch_genes).astype(int)  
        cohort_df.insert(3, "CH", ch)
        aml = cohort_df["Gene"].isin(aml_genes).astype(int)  
        cohort_df.insert(4, "AML", aml)
        # Add fishy
        if ctype:
            fishy = fishy_df[fishy_df["Cancer_Type"] == "AML"].Fish.values[0].split(",")
            fishy = cohort_df["Gene"].isin(fishy).astype(int)
    
    cohort_df.insert(5, "Fish", fishy)
    cohort_df.insert(5, "o3d_rank", cohort_df.pop("o3d_rank"))
    cohort_df.insert(2, "CGC", cohort_df.pop("CGC"))
    
    return cohort_df


def get_genetable(cohort_df,
                  o3d_output,
                  intogen_output,
                  input_datasets,
                  cgc_df,
                  fishy_df,
                  filter_icgc=False,
                  ch_genes=None,
                  aml_genes=None,
                  genetable_output=None, 
                  filename = None,
                  save = False):
    """
    Get the sorted rank of each method, p-value, q-value. 
    Also, annotate genes with CGC and fishy label.
    """
    
    genetable_df = pd.DataFrame()

    for cohort in cohort_df["COHORT"]:
        print(cohort)

        if filter_icgc:
            if cohort.startswith("ICGC"):
                continue
            
        maf = f"{input_datasets}/maf/{cohort}.in.maf"
        mut_profile = f"{input_datasets}/mut_profile/{cohort}.sig.json"
        ctype = cohort_df[cohort_df["COHORT"] == cohort].CANCER_TYPE.values[0]

        if os.path.isfile(maf) and os.path.isfile(mut_profile):

            try:
                tmp_df = get_all_methods_results(cohort=cohort,
                                                 input_datasets=input_datasets, 
                                                 o3d_output=o3d_output,
                                                 intogen_output=intogen_output,
                                                 cgc_df=cgc_df,
                                                 fishy_df=fishy_df,
                                                 ctype=ctype, 
                                                 ch_genes=ch_genes,
                                                 aml_genes=aml_genes)
                tmp_df["Cancer"] = ctype
                tmp_df["Cohort"] = cohort
                genetable_df = pd.concat((genetable_df, tmp_df))
            except Exception as e:
                warnings.warn(f"{cohort} could not be processed")
                warnings.warn(f"Error: {e}")

        else:
            print(f"MAF or Mut rate not found for {cohort}")
            
    # Save
    if save and genetable_output and filename:
        output_path = f"{genetable_output}/{filename}.genetable.tsv"
        genetable_df.dropna(subset="Gene").to_csv(output_path, sep = "\t", index = False, header = True)
        print(f"Genetable saved in: {output_path}")

    return genetable_df

datasets = f"/workspace/projects/clustering_3d/o3d_analysys/datasets/"
cgc_df = pd.read_csv(f"{datasets}input/cancer_202404/CGC_list_v99.tsv", sep = "\t")
fishy_df = pd.read_csv(f"{datasets}/input/cancer_202404/negative_gene_set.tsv", sep = '\t', header = None)
fishy_df = fishy_df.rename(columns={0 : "Cancer_Type", 1 : "Fish"})

## Cancer

In [21]:
SUBDIR = "human_mane_raw"
RUN = "run_2024-07-01_16-04-14"

data = "/workspace/projects/clustering_3d/o3d_analysys/datasets"
datasets = f"{data}/input/cancer_202404"
input_path = f"{data}/input/cancer_202404"
cohort_df = pd.read_csv(f"{input_path}/cohorts.tsv", sep="\t")
o3d_output = f"{data}/output/cancer_202404/o3d_output/{SUBDIR}/{RUN}"
intogen_output = f"{data}/output/cancer_202404/intogen_output"
genetable_output = f"results/{SUBDIR}"

In [24]:
df = get_genetable(cohort_df=cohort_df,
                   o3d_output=o3d_output,
                   intogen_output=intogen_output,
                   input_datasets = input_path,
                   cgc_df=cgc_df,
                   fishy_df=fishy_df,
                   filter_icgc=True,
                   ch_genes=None,
                   genetable_output=genetable_output, 
                   filename = RUN,
                   save = True)
df

HARTWIG_WGS_BLCA_2023
HARTWIG_WGS_EGC_2023
HARTWIG_WGS_LMS_2023
CBIOP_WGS_STAD_ONCOSG_2018
CBIOP_WXS_CM_VALLEN_2018
CBIOP_WXS_BLCA_VALLEN_2018
STJUDE_WGS_D_AML_2018
HARTWIG_WGS_HCC_2023
HARTWIG_WGS_UCEC_2023
STJUDE_WGS_D_EPD_2018
HARTWIG_WGS_COADREAD_2023
HARTWIG_WGS_GB_2023
HARTWIG_WGS_ANSC_2023
HARTWIG_WGS_RCC_2023
STJUDE_WGS_M_OS_2018
STJUDE_WGS_D_OS_2018
STJUDE_WGS_D_CM_2018
CBIOP_WXS_BCC_UNIGE_2016_TREAT
STJUDE_WGS_D_RHBDS_2018
CBIOP_WGS_PRAD_EURUROL_2017
STJUDE_WGS_D_ACC_2018
HARTWIG_WGS_WDTC_2023
STJUDE_WGS_D_EWS_2018
HARTWIG_WGS_UTUC_2023
HARTWIG_WGS_GIST_2023
HARTWIG_WGS_LIPO_2023
STJUDE_WGS_D_MB_2018
CBIOP_WXS_ANGS_TREATED_2020
HARTWIG_WGS_GBC_2023
STJUDE_WGS_R_ALL_2018
HARTWIG_WGS_LUNG_2023
CBIOP_WXS_BRCA_MBCP_PRY_NOTREAT_2020
STJUDE_WGS_D_RB_2018
CBIOP_WXS_BRCA_MBCP_PRY_TREAT_2020
CBIOP_WXS_CLL_IUOPA_2015
CBIOP_WXS_BRCA_MBCP_MET_TREAT_2020
CBIOP_WXS_BRCA_MBCP_MET_NOTREAT_2020
HARTWIG_WGS_BLADDER_2023
HARTWIG_WGS_SCLC_2023
HARTWIG_WGS_NSCLC_2023
HARTWIG_WGS_STOMACH_2023
HART

Unnamed: 0,Gene,Uniprot_ID,CGC,o3d_status,Fish,o3d_rank,o3d_pval,o3d_qval,cbase_rank,cbase_pval,...,clustl_pval,clustl_qval,fml_rank,fml_pval,fml_qval,smreg_rank,smreg_pval,smreg_qval,Cancer,Cohort
0,PIK3CA,P42336,1,Processed,0,0.0,0.0,0.0,10.0,0.000002,...,0.000499,0.069226,15.0,0.000410,0.251111,1.0,0.0,0.000621,BLCA,HARTWIG_WGS_BLCA_2023
1,RXRA,F1D8Q5,0,Processed,0,1.0,0.0,0.0,215.0,0.004537,...,0.000041,0.011417,452.0,0.023990,0.655941,3.0,0.000102,0.089822,BLCA,HARTWIG_WGS_BLCA_2023
2,FGFR3,P22607,1,Processed,0,2.0,0.0,0.0,17.0,0.000032,...,0.001757,0.121905,13.0,0.000400,0.251111,,,,BLCA,HARTWIG_WGS_BLCA_2023
3,FBXW7,Q969H0,1,Processed,0,3.0,0.0,0.0,414.0,0.012032,...,0.004323,0.171394,244.0,0.012830,0.628007,2.0,0.000007,0.009846,BLCA,HARTWIG_WGS_BLCA_2023
4,TP53,K7PPA8,1,Processed,0,4.0,0.0,0.0,1.0,0.000000,...,0.018683,0.213306,0.0,0.000001,0.001837,0.0,0.0,0.0,BLCA,HARTWIG_WGS_BLCA_2023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21501,,,0,,0,,,,,,...,,,,,,,,,PGNG,TCGA_WXS_PGNG
21502,,,0,,0,,,,,,...,,,,,,,,,PGNG,TCGA_WXS_PGNG
21503,,,0,,0,,,,,,...,,,,,,,,,PGNG,TCGA_WXS_PGNG
21504,,,0,,0,,,,,,...,,,,,,,,,PGNG,TCGA_WXS_PGNG


In [25]:
# Substitute SOFT_TISSUE WITH SARC in TCGA
genetable_path = f"/workspace/projects/clustering_3d/o3d_analysys/o3d_paper/data/results/{SUBDIR}/{RUN}.genetable.tsv"
genetable = pd.read_csv(genetable_path, sep="\t", dtype={1 : str, 2 : str, 3 : int})
genetable.Cohort = genetable.Cohort.replace("TCGA_WXS_SOFT_TISSUE", "TCGA_WXS_SARC")
genetable.to_csv(f"results/{SUBDIR}/{RUN}.genetable.tsv", sep = "\t", index = False, header = True)

FileNotFoundError: [Errno 2] No such file or directory: '/workspace/projects/clustering_3d/o3d_analysys/o3d_paper/data/results/human_mane_raw/run_2024-07-01_16-04-14.genetable.tsv'

### Fishy genes table

In [23]:
cgc_genes = get_cgc_synonyms(cgc_df)
fishy_df["Fish"] = fishy_df["Fish"].apply(lambda genes: ",".join(
    [gene for gene in genes.split(",") if gene not in cgc_genes]))
fishy_df.to_csv("../tables/t2_fishy_genes.tsv", index=False, sep="\t")
fishy_df

Unnamed: 0,Cancer_Type,Fish
0,ACC,"DEFB130A,LINC00114,TTTY21,SNORD116-14,OR2U1P,H..."
1,AML,"CCL26,DEFB130A,LINC00114,TTTY21,SNORD116-14,OR..."
2,BLCA,"DEFB130A,LINC00114,TTTY21,SNORD116-14,MRAP,OR2..."
3,BRCA,"DEFB130A,LINC00114,TTTY21,SNORD116-14,OR2U1P,H..."
4,CCRCC,"DEFB130A,LINC00114,TTTY21,SNORD116-14,MRAP,OR2..."
5,CESC,"DEFB130A,LINC00114,TTTY21,SNORD116-14,MRAP,OR2..."
6,CHOL,"DEFB130A,LINC00114,TTTY21,SNORD116-14,MRAP,OR2..."
7,CHRCC,"DEFB130A,LINC00114,TTTY21,SNORD116-14,MRAP,OR2..."
8,COAD,"DEFB130A,TTTY21,SNORD116-14,MRAP,OR2U1P,HSFY2,..."
9,COADREAD,"DEFB130A,TTTY21,SNORD116-14,MRAP,OR2U1P,HSFY2,..."


## CH

In [40]:
RUN = "run_2024-11-04_13-09-49"
SUBDIR = "ch"
genetable_path = f"/workspace/projects/clustering_3d/o3d_analysys/o3d_paper/data/results/{SUBDIR}/{RUN}.genetable.tsv"

datasets = "/workspace/projects/clustering_3d/o3d_analysys/datasets"
o3d_output = f"{datasets}/output/ch/o3d_output/run_20241004_ch/{RUN}"
intogen_output = f"{datasets}/output/ch/intogen_output"
input_path = f"{datasets}/input/ch"
genetable_output = "/workspace/projects/clustering_3d/o3d_analysys/o3d_paper/data/results/ch"

ch_genes = pd.read_csv(f"{datasets}/ch_genes.tsv", sep = '\t')
ch_genes = ch_genes["Gene"].values

In [41]:
cohort_df = pd.DataFrame({"COHORT" : ["OTHER_WGS_HMF_FULL", 
                                      "OTHER_WXS_CH_IMPACT_PANEL", 
                                      "OTHER_WXS_TCGA_FULL"], 
                          "CANCER_TYPE" : ["CH", "CH", "CH"]})
cohort_df

Unnamed: 0,COHORT,CANCER_TYPE
0,OTHER_WGS_HMF_FULL,CH
1,OTHER_WXS_CH_IMPACT_PANEL,CH
2,OTHER_WXS_TCGA_FULL,CH


In [42]:
ch_genes = pd.read_csv(f"{datasets}/ch_genes.tsv", sep = '\t')
aml_genes = ch_genes[ch_genes["Myeloid"] == "YES"].Gene.values
ch_genes = ch_genes[ch_genes["CH"] == "YES"].Gene.values

In [77]:
df = get_genetable(cohort_df=cohort_df,
                    o3d_output=o3d_output,
                    intogen_output=intogen_output,
                    input_datasets = input_path,
                    cgc_df=cgc_df,
                    fishy_df=fishy_df,
                    ch_genes=ch_genes,
                    aml_genes=aml_genes,
                    genetable_output=genetable_output,
                    filename = RUN,
                    save = True)
df

OTHER_WGS_HMF_FULL
OTHER_WXS_CH_IMPACT_PANEL
OTHER_WXS_TCGA_FULL
Genetable saved in: /workspace/projects/clustering_3d/o3d_analysys/o3d_paper/data/results/ch/run_2024-11-04_13-09-49.genetable.tsv


Unnamed: 0,Gene,Uniprot_ID,CGC,CH,AML,Fish,o3d_rank,o3d_status,o3d_pval,o3d_qval,...,clustl_pval,clustl_qval,fml_rank,fml_pval,fml_qval,smreg_rank,smreg_pval,smreg_qval,Cancer,Cohort
0,RASAL3,Q86YV0,0,0,0,0,0.0,Processed,0.0,0.0,...,4.478998e-07,6.096414e-06,0.0,0.000001,0.00071,,,,CH,OTHER_WGS_HMF_FULL
1,COL8A2,P25067,0,0,0,0,1.0,Processed,0.0,0.0,...,1.110223e-19,3.885781e-18,11794.0,1.000000,1.00000,,,,CH,OTHER_WGS_HMF_FULL
2,SCART1,Q4G0T1,0,0,0,0,2.0,Processed,0.0,0.0,...,1.110223e-19,3.885781e-18,7.0,0.000001,0.00071,1.0,3.291521e-36,4.432034e-33,CH,OTHER_WGS_HMF_FULL
3,PLEC,Q15149,0,0,0,0,3.0,Processed,0.0,0.0,...,1.110223e-19,3.885781e-18,5.0,0.000001,0.00071,60.0,2.467413e-03,9.232579e-02,CH,OTHER_WGS_HMF_FULL
4,ZMAT1,Q5H9K5,0,0,0,0,4.0,Processed,0.0,0.0,...,1.110223e-19,3.885781e-18,11796.0,1.000000,1.00000,,,,CH,OTHER_WGS_HMF_FULL
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20976,FAM230A,,0,0,0,0,,,,,...,,,,,,,,,CH,OTHER_WXS_TCGA_FULL
20977,KLRC4-KLRK1,,0,0,0,0,,,,,...,,,,,,,,,CH,OTHER_WXS_TCGA_FULL
20978,NPIPB12,,0,0,0,0,,,,,...,,,,,,,,,CH,OTHER_WXS_TCGA_FULL
20979,TCAF2C,,0,0,0,0,,,,,...,,,,,,,,,CH,OTHER_WXS_TCGA_FULL
