In [1]:
import torch
import numpy as np
import json
import os
import re
import pandas as pd

from datasets import Dataset, load_from_disk, concatenate_datasets, DatasetDict

seed = 42
torch.manual_seed(seed)
np.random.seed(seed)

  from .autonotebook import tqdm as notebook_tqdm


helper functions and constants

In [2]:
def dataset_for_prompt(df):
    # number of rows to sample
    num_rows = min(len(df), np.random.randint(5, 25))
    df = df.sample(num_rows)
    #number of genes to sample
    num_genes = 2000 // num_rows

    use_d = True
    if len(df['diseases'].unique()) == 1:
        use_d = False

    s = ""
    sampled_indices = []
    for index, row in df.iterrows():
        cur_num_genes = min(num_genes+np.random.randint(-20, 20), 1000)
        cell_sentence = ' '.join(row[f'gene_{i}'] for i in range(1, cur_num_genes))
        if use_d:
            s += f"Cell Type: {row['cell_type']}, Tissue: {row['tissue']}, Disease: {row['diseases']}, Gene Expression: {cell_sentence}\n"
        else:
            s += f"Cell Type: {row['cell_type']}, Tissue: {row['tissue']}, Gene Expression: {cell_sentence}\n"
        sampled_indices.append(index)
        
    return s, sampled_indices

def prepare_geneset(geneset_data):
    disease_cell_combinations = geneset_data.groupby(['Disease', 'Cell Type']).size().reset_index()[['Disease', 'Cell Type']]
    num_gene_sets = 1500 // len(disease_cell_combinations)

    gene_sets = ""
    for _, row in disease_cell_combinations.iterrows():
        selected_gene_sets = geneset_data[(geneset_data['Disease'] == row['Disease']) 
                                          & (geneset_data['Cell Type'] == row['Cell Type']) 
                                          & (geneset_data['Rank Type'] == "Top")]["Gene Program"].head(num_gene_sets)
        selected_gene_sets = [gs.strip() for gs in selected_gene_sets]
        gene_sets += f"{row['Disease']} {row['Cell Type']}: "
        gene_sets += ", ".join(selected_gene_sets)
        gene_sets += "\n"
        
    return gene_sets

geneset_filtering_prompt = """I'm a researcher performing gene set enrichment analysis of a scRNA seq dataset. Here is my result of top enriched gene sets in each cell with different disease conditions and cell types ranked by enrichment level. Based on what you know about these gene sets, are these results accurate or reasonable? Review my results and point out any thing that looks incorrect in a section called review. Give me the filtered version of the gene sets, rank gene set in the same order as it is given. Only remove gene sets that you believe are absolutely wrong and do not add new gene sets not given. Format strictly like this without any other word:
Review: {review content}
{cell type}: {Gene set name}, {Gene set name}...
{cell type}: {Gene set name}, {Gene set name}...
...
"""

prompt_prefix = """This is a scientific manuscript, an example of the single-cell RNA sequencing dataset it is associated with, and results of Gene Set Enrichment Analysis to that dataset. The example dataset contains representative cells from each type and the corresponding cell sentence(ranked gene names by expression). The gene set enrichment analysis results are the top enriched gene sets for each cell type in the dataset.
You need to create question-answer pairs for Large Language Models to learn reasoning and analyzing gene expression datasets when given pieces of data converted into cell sentences. I would like questions and answers and reasonings about the given cell sentence in terms of cells, tissues, disease, or other relevant biological information. 
The questions must be answerable by looking at the given cell sentences alone. When creating questions, you should first read the given manuscript, consider what the analysis the researchers did in the manuscript and the biological context of this study. Then look at the given cell sentences(or a part of them), think about what question the researchers asked or would ask about the cell sentence. Use the gene set enrichment and the information indicated by those gene sets as evidence or analysis steps in the reasoning, also immitate the logic progression and conclusion made by the researchers in the manuscript. Then answer the question. Don't directly use or quote or mention the manuscript in question reasoinings, or answer. Avoid questions about experiment design or procedures, avoid vague questions, avoid obvious questions that do not require multiple steps of reasonings. Vary the questions, answers as much as possible to cover a diverse range of topics. Include both open ended questions and yes or no questions.
Provide Context for the question, this is only the essential background information required to answer the question, do not directly summarize or describe the dataset. Include detailed reasoning and analysis, you must reason with the gene set enrichment results. Provide Answer no less than 20 words and Reasonings much longer (even as long as 100 words). Refer to specific cell sentences as "{type} cell gene expression". Provide Keywords of the answer being the most essential parts of the correct answer. The keywords should be yes or no(must include if it is a yes or no question), or important gene set names, gene names, or biological information in question. These keywords will be used during my training to validate my model's response.
Give 30 question answer pairs as one list, don't give any other word. Strictly format like this: 
<|Context|>the context<|Question|>the content of the question<|Chain of Thoughts|>the intermediate reasonings<|Answer|>the content of the answer<|Keyword|>the answer keywords
<|Context|>the context<|Question|>the content of the question<|Chain of Thoughts|>the intermediate reasonings<|Answer|>the content of the answer<|Keyword|>the answer keywords
...
"""

loads all datasets and meta data \
assumes summary datasets are in a directory as csv files, assuming the format processed by Harry \
all files I'm looking at can be found in my C2S-RL dev github repo branch

In [3]:
# TODO: change to your own file path
datasets_directory = "/users/zlyu12/Desktop/c2s-RL/Dec19_dataset" # all summary datasets
meta_data_path = "/users/zlyu12/Desktop/c2s-RL/Create_Dataset/meta_data.json" # dataset name, filename, url etc.
geneset_data_directory = "/users/zlyu12/Desktop/c2s-RL/gene_set_data" # where all gene set data are stored
hf_dataset_output_path = "/users/zlyu12/Desktop/c2s-RL/Create_Dataset/temp_hf_dataset_new" # temporary output directory

datasets_files_paths = [os.path.join(datasets_directory, f) for f in os.listdir(datasets_directory) if f.endswith('.csv')]

subdirs = [d for d in os.listdir(geneset_data_directory) if os.path.isdir(os.path.join(geneset_data_directory, d))]
geneset_dataset_numbers = [int(re.search(r'\((\d+)\)', f).group(1)) for f in subdirs]
geneset_dataset_numbers = np.sort(geneset_dataset_numbers)

meta_data = json.load(open(meta_data_path))

try:
    hf_dataset = load_from_disk(hf_dataset_output_path)
except:
    hf_dataset = Dataset.from_dict({})

print(f"all dataset indeices: {geneset_dataset_numbers}")
print(hf_dataset)

all dataset indeices: [191 192 471 474 479 603 612 636 637 638 726 770 778]
Dataset({
    features: [],
    num_rows: 0
})


first filter the geneset dataset with LLMs \
this prepares the prompt \
copy paste, add the publication text at the end and run our favorite LLM \
run only once for a row, we rely on some variables defined here in later cells

In [71]:
# TODO: manually set the dataset index
dataset_index = 778

if not dataset_index in geneset_dataset_numbers:
    print("no gene set data for dataset: ", dataset_index)
    exit()
    
geneset_data = pd.read_csv(f"/users/zlyu12/Desktop/c2s-RL/gene_set_data/local({dataset_index})/top_gene_programs.csv")
geneset = prepare_geneset(geneset_data)

print(geneset_filtering_prompt + geneset)

I'm a researcher performing gene set enrichment analysis of a scRNA seq dataset. Here is my result of top enriched gene sets in each cell with different disease conditions and cell types ranked by enrichment level. Based on what you know about these gene sets, are these results accurate or reasonable? Review my results and point out any thing that looks incorrect in a section called review. Give me the filtered version of the gene sets, rank gene set in the same order as it is given. Only remove gene sets that you believe are absolutely wrong and do not add new gene sets not given. Format strictly like this without any other word:
Review: {review content}
{cell type}: {Gene set name}, {Gene set name}...
{cell type}: {Gene set name}, {Gene set name}...
...
COVID-19 basal cell of epithelium of bronchus: KEGG_MEDICUS_REFERENCE_TRANSLATION_INITIATION, LAKE_ADULT_KIDNEY_C18_COLLECTING_DUCT_PRINCIPAL_CELLS_MEDULLA, FAN_OVARY_CL12_T_LYMPHOCYTE_NK_CELL_2, DLX6_TARGET_GENES, TAF9B_TARGET_GENES,

In [72]:
# TODO: past the filtered output here
filtered_geneset = """
COVID-19 basal cell of epithelium of bronchus: KEGG_MEDICUS_REFERENCE_TRANSLATION_INITIATION, LAKE_ADULT_KIDNEY_C18_COLLECTING_DUCT_PRINCIPAL_CELLS_MEDULLA, FAN_OVARY_CL12_T_LYMPHOCYTE_NK_CELL_2, DLX6_TARGET_GENES, TAF9B_TARGET_GENES, LAKE_ADULT_KIDNEY_C10_THIN_ASCENDING_LIMB, MENON_FETAL_KIDNEY_8_CONNECTING_TUBULE_CELLS, MXD1_TARGET_GENES, MENON_FETAL_KIDNEY_4_PODOCYTES, GSE19888_ADENOSINE_A3R_ACT_VS_TCELL_MEMBRANES_ACT_IN_MAST_CELL_DN, ATXN7L3_TARGET_GENES, IRX2_TARGET_GENES, FAN_OVARY_CL16_LYMPHATIC_ENDOTHELIAL_CELL, TRAVAGLINI_LUNG_CAPILLARY_INTERMEDIATE_2_CELL, ZHENG_CORD_BLOOD_C2_PUTATIVE_BASOPHIL_EOSINOPHIL_MAST_CELL_PROGENITOR, ZHENG_CORD_BLOOD_C7_PUTATIVE_LYMPHOID_PRIMED_MULTIPOTENT_PROGENITOR_1, GSE4984_UNTREATED_VS_GALECTIN1_TREATED_DC_UP, GAVISH_3CA_METAPROGRAM_EPITHELIAL_STRESS, BUSSLINGER_DUODENAL_DIFFERENTIATING_STEM_CELLS, LAKE_ADULT_KIDNEY_C8_DECENDING_THIN_LIMB, BRACHAT_RESPONSE_TO_CISPLATIN, KLF7_TARGET_GENES, CHANDRAN_METASTASIS_UP, STAMBOLSKY_RESPONSE_TO_VITAMIN_D3_UP, TFCP2_TARGET_GENES, GSE27786_LIN_NEG_VS_BCELL_DN, MAP2K1_TARGET_GENES, GSE15330_WT_VS_IKAROS_KO_MEGAKARYOCYTE_ERYTHROID_PROGENITOR_DN, GAVISH_3CA_METAPROGRAM_EPITHELIAL_V_ATPASE, PARK_HSC_VS_MULTIPOTENT_PROGENITORS_UP, CHICAS_RB1_TARGETS_LOW_SERUM, TRAVAGLINI_LUNG_CD8_MEMORY_EFFECTOR_T_CELL, TRAVAGLINI_LUNG_PROXIMAL_BASAL_CELL, CHANDRAN_METASTASIS_TOP50_DN, RODRIGUES_NTN1_AND_DCC_TARGETS, LAKE_ADULT_KIDNEY_C19_COLLECTING_DUCT_INTERCALATED_CELLS_TYPE_A_MEDULLA, BUSSLINGER_DUODENAL_STEM_CELLS, GOBP_POSITIVE_REGULATION_OF_MYOBLAST_PROLIFERATION, MARSON_FOXP3_TARGETS_UP, CAVARD_LIVER_CANCER_MALIGNANT_VS_BENIGN, GSE22589_HEALTHY_VS_SIV_INFECTED_DC_UP, OLSSON_E2F3_TARGETS_UP, GSE27786_LSK_VS_NEUTROPHIL_DN, CEBPE_TARGET_GENES, STAMBOLSKY_TARGETS_OF_MUTATED_TP53_UP, GAVISH_3CA_METAPROGRAM_MACROPHAGES_STRESS_HSP, SOX11_TARGET_GENES, GSE16450_CTRL_VS_IFNA_6H_STIM_MATURE_NEURON_CELL_LINE_DN, HOWLIN_CITED1_TARGETS_1_UP, RUBENSTEIN_SKELETAL_MUSCLE_FAP_CELLS, GSE27786_LSK_VS_CD4_TCELL_DN, KMT2D_TARGET_GENES, MCBRYAN_PUBERTAL_BREAST_4_5WK_DN, GSE5589_WT_VS_IL10_KO_LPS_STIM_MACROPHAGE_45MIN_DN, GAVISH_3CA_MALIGNANT_METAPROGRAM_10_PROTEIN_MATURATION, LAKE_ADULT_KIDNEY_C9_THIN_ASCENDING_LIMB, GSE7509_FCGRIIB_VS_TNFA_IL1B_IL6_PGE_STIM_DC_DN, ZSCAN5C_TARGET_GENES, GAVISH_3CA_METAPROGRAM_FIBROBLASTS_CAF_4, GSE8685_IL2_STARVED_VS_IL15_ACT_IL2_STARVED_CD4_TCELL_UP, ZNF236_TARGET_GENES, HOWLIN_CITED1_TARGETS_2_UP, HERNANDEZ_MITOTIC_ARREST_BY_DOCETAXEL_1_UP, SARRIO_EPITHELIAL_MESENCHYMAL_TRANSITION_DN, GSE37416_CTRL_VS_48H_F_TULARENSIS_LVS_NEUTROPHIL_DN, GAVISH_3CA_MALIGNANT_METAPROGRAM_41_UNASSIGNED, NFE2L1_TARGET_GENES, BRUINS_UVC_RESPONSE_VIA_TP53_GROUP_C, ZFHX3_TARGET_GENES, GSE25087_FETAL_VS_ADULT_TCONV_DN, GSE5099_UNSTIM_VS_MCSF_TREATED_MONOCYTE_DAY3_UP, GSE14308_TH1_VS_TH17_DN, LAKE_ADULT_KIDNEY_C12_THICK_ASCENDING_LIMB, FURUKAWA_DUSP6_TARGETS_PCI35_UP, GSE2770_TGFB_AND_IL4_VS_TGFB_AND_IL12_TREATED_ACT_CD4_TCELL_48H_UP, PECE_MAMMARY_STEM_CELL_UP, BUSSLINGER_DUODENAL_TRANSIT_AMPLIFYING_CELLS, GSE27786_LSK_VS_BCELL_DN, GLI4_TARGET_GENES, GSE40273_EOS_KO_VS_WT_TREG_UP, IWANAGA_CARCINOGENESIS_BY_KRAS_DN, BUSSLINGER_GASTRIC_LYZ_POSITIVE_CELLS, GSE41867_DAY6_VS_DAY8_LCMV_CLONE13_EFFECTOR_CD8_TCELL_DN, GSE23308_CTRL_VS_CORTICOSTERONE_TREATED_MACROPHAGE_MINERALCORTICOID_REC_KO_DN, GSE13493_DP_VS_CD4INTCD8POS_THYMOCYTE_UP, COATES_MACROPHAGE_M1_VS_M2_UP, ZHENG_CORD_BLOOD_C10_MULTILYMPHOID_PROGENITOR, WHITFIELD_CELL_CYCLE_G2, GSE5542_UNTREATED_VS_IFNA_AND_IFNG_TREATED_EPITHELIAL_CELLS_24H_UP, PKCA_DN.V1_DN, GSE11924_TH1_VS_TH17_CD4_TCELL_UP, FAN_OVARY_CL3_MATURE_CUMULUS_GRANULOSA_CELL_1, GSE17721_CTRL_VS_POLYIC_1H_BMDC_DN

COVID-19 bronchial goblet cell: KEGG_MEDICUS_REFERENCE_TRANSLATION_INITIATION, FAN_OVARY_CL12_T_LYMPHOCYTE_NK_CELL_2, DLX6_TARGET_GENES, TAF9B_TARGET_GENES, LAKE_ADULT_KIDNEY_C18_COLLECTING_DUCT_PRINCIPAL_CELLS_MEDULLA, MENON_FETAL_KIDNEY_8_CONNECTING_TUBULE_CELLS, MENON_FETAL_KIDNEY_4_PODOCYTES, IRX2_TARGET_GENES, ATXN7L3_TARGET_GENES, ZHENG_CORD_BLOOD_C2_PUTATIVE_BASOPHIL_EOSINOPHIL_MAST_CELL_PROGENITOR, MXD1_TARGET_GENES, GSE19888_ADENOSINE_A3R_ACT_VS_TCELL_MEMBRANES_ACT_IN_MAST_CELL_DN, ZHENG_CORD_BLOOD_C7_PUTATIVE_LYMPHOID_PRIMED_MULTIPOTENT_PROGENITOR_1, FAN_OVARY_CL16_LYMPHATIC_ENDOTHELIAL_CELL, BRACHAT_RESPONSE_TO_CISPLATIN, GAVISH_3CA_METAPROGRAM_EPITHELIAL_STRESS, CHANDRAN_METASTASIS_UP, GSE4984_UNTREATED_VS_GALECTIN1_TREATED_DC_UP, STAMBOLSKY_RESPONSE_TO_VITAMIN_D3_UP, CHICAS_RB1_TARGETS_LOW_SERUM, GSE27786_LIN_NEG_VS_BCELL_DN, TFCP2_TARGET_GENES, GAVISH_3CA_METAPROGRAM_EPITHELIAL_V_ATPASE, MARSON_FOXP3_TARGETS_UP, GSE15330_WT_VS_IKAROS_KO_MEGAKARYOCYTE_ERYTHROID_PROGENITOR_DN, RODRIGUES_NTN1_AND_DCC_TARGETS, PARK_HSC_VS_MULTIPOTENT_PROGENITORS_UP, TRAVAGLINI_LUNG_CD8_MEMORY_EFFECTOR_T_CELL, OLSSON_E2F3_TARGETS_UP, CHANDRAN_METASTASIS_TOP50_DN, MAP2K1_TARGET_GENES, CAVARD_LIVER_CANCER_MALIGNANT_VS_BENIGN, GOBP_POSITIVE_REGULATION_OF_MYOBLAST_PROLIFERATION, GSE22589_HEALTHY_VS_SIV_INFECTED_DC_UP, STAMBOLSKY_TARGETS_OF_MUTATED_TP53_UP, GSE16450_CTRL_VS_IFNA_6H_STIM_MATURE_NEURON_CELL_LINE_DN, GAVISH_3CA_METAPROGRAM_MACROPHAGES_STRESS_HSP, KLF7_TARGET_GENES, GAVISH_3CA_METAPROGRAM_FIBROBLASTS_CAF_4, GSE27786_LSK_VS_CD4_TCELL_DN, ZNF236_TARGET_GENES, HOWLIN_CITED1_TARGETS_2_UP, GSE27786_LSK_VS_NEUTROPHIL_DN, SOX11_TARGET_GENES, ZSCAN5C_TARGET_GENES, GAVISH_3CA_MALIGNANT_METAPROGRAM_41_UNASSIGNED, CEBPE_TARGET_GENES, GSE7509_FCGRIIB_VS_TNFA_IL1B_IL6_PGE_STIM_DC_DN, GSE8685_IL2_STARVED_VS_IL15_ACT_IL2_STARVED_CD4_TCELL_UP, GSE5589_WT_VS_IL10_KO_LPS_STIM_MACROPHAGE_45MIN_DN, HOWLIN_CITED1_TARGETS_1_UP, GAVISH_3CA_MALIGNANT_METAPROGRAM_10_PROTEIN_MATURATION, LAKE_ADULT_KIDNEY_C10_THIN_ASCENDING_LIMB, GSE37416_CTRL_VS_48H_F_TULARENSIS_LVS_NEUTROPHIL_DN, MCBRYAN_PUBERTAL_BREAST_4_5WK_DN, RUBENSTEIN_SKELETAL_MUSCLE_FAP_CELLS, GSE14308_TH1_VS_TH17_DN, BRUINS_UVC_RESPONSE_VIA_TP53_GROUP_C, HERNANDEZ_MITOTIC_ARREST_BY_DOCETAXEL_1_UP, WHITFIELD_CELL_CYCLE_G2, IWANAGA_CARCINOGENESIS_BY_KRAS_DN, GSE2770_TGFB_AND_IL4_VS_TGFB_AND_IL12_TREATED_ACT_CD4_TCELL_48H_UP, GSE25087_FETAL_VS_ADULT_TCONV_DN, GSE5099_UNSTIM_VS_MCSF_TREATED_MONOCYTE_DAY3_UP, GSE17721_CTRL_VS_POLYIC_1H_BMDC_DN, GSE27786_LSK_VS_BCELL_DN, TRAVAGLINI_LUNG_PROXIMAL_CILIATED_CELL, FURUKAWA_DUSP6_TARGETS_PCI35_UP, GSE40273_EOS_KO_VS_WT_TREG_UP, GSE23308_CTRL_VS_CORTICOSTERONE_TREATED_MACROPHAGE_MINERALCORTICOID_REC_KO_DN, GSE41867_DAY6_VS_DAY8_LCMV_CLONE13_EFFECTOR_CD8_TCELL_DN, KMT2D_TARGET_GENES, OSMAN_BLADDER_CANCER_UP, GSE5542_UNTREATED_VS_IFNA_AND_IFNG_TREATED_EPITHELIAL_CELLS_24H_UP, PKCA_DN.V1_DN, TRAVAGLINI_LUNG_CAPILLARY_INTERMEDIATE_2_CELL, SARRIO_EPITHELIAL_MESENCHYMAL_TRANSITION_DN, LAKE_ADULT_KIDNEY_C8_DECENDING_THIN_LIMB, GSE13493_DP_VS_CD4INTCD8POS_THYMOCYTE_UP, COATES_MACROPHAGE_M1_VS_M2_UP, TRAVAGLINI_LUNG_CD8_NAIVE_T_CELL, FAN_OVARY_CL3_MATURE_CUMULUS_GRANULOSA_CELL_1, GAVIN_FOXP3_TARGETS_CLUSTER_P3, GSE17721_POLYIC_VS_GARDIQUIMOD_16H_BMDC_UP, GSE11924_TH1_VS_TH17_CD4_TCELL_UP, ZHENG_CORD_BLOOD_C10_MULTILYMPHOID_PROGENITOR, GLI4_TARGET_GENES, NAKAYA_PBMC_FLUARIX_FLUVIRIN_AGE_18_50YO_CORRELATED_WITH_HAI_28DY_RESPONSE_AT_3DY_NEGATIVE, MCBRYAN_PUBERTAL_BREAST_5_6WK_DN, GSE32901_NAIVE_VS_TH17_ENRICHED_CD4_TCELL_UP, CIC_TARGET_GENES, GSE42724_B1_BCELL_VS_PLASMABLAST_UP, GAUTAM_EYE_IRIS_CILIARY_BODY_PUTATIVE_STEM_CELLS

COVID-19 brush cell of bronchus: KEGG_MEDICUS_REFERENCE_TRANSLATION_INITIATION, FAN_OVARY_CL12_T_LYMPHOCYTE_NK_CELL_2, DLX6_TARGET_GENES, LAKE_ADULT_KIDNEY_C18_COLLECTING_DUCT_PRINCIPAL_CELLS_MEDULLA, TAF9B_TARGET_GENES, MXD1_TARGET_GENES, GSE19888_ADENOSINE_A3R_ACT_VS_TCELL_MEMBRANES_ACT_IN_MAST_CELL_DN, ZHENG_CORD_BLOOD_C7_PUTATIVE_LYMPHOID_PRIMED_MULTIPOTENT_PROGENITOR_1, ATXN7L3_TARGET_GENES, IRX2_TARGET_GENES, MENON_FETAL_KIDNEY_4_PODOCYTES, STAMBOLSKY_RESPONSE_TO_VITAMIN_D3_UP, MENON_FETAL_KIDNEY_8_CONNECTING_TUBULE_CELLS, CHANDRAN_METASTASIS_UP, ZHENG_CORD_BLOOD_C2_PUTATIVE_BASOPHIL_EOSINOPHIL_MAST_CELL_PROGENITOR, BRACHAT_RESPONSE_TO_CISPLATIN, TRAVAGLINI_LUNG_CAPILLARY_INTERMEDIATE_2_CELL, KLF7_TARGET_GENES, LAKE_ADULT_KIDNEY_C10_THIN_ASCENDING_LIMB, BUSSLINGER_DUODENAL_DIFFERENTIATING_STEM_CELLS, GSE4984_UNTREATED_VS_GALECTIN1_TREATED_DC_UP, GSE27786_LIN_NEG_VS_BCELL_DN, GAVISH_3CA_METAPROGRAM_EPITHELIAL_STRESS, BUSSLINGER_DUODENAL_STEM_CELLS, TRAVAGLINI_LUNG_CD8_MEMORY_EFFECTOR_T_CELL, FAN_OVARY_CL16_LYMPHATIC_ENDOTHELIAL_CELL, CHICAS_RB1_TARGETS_LOW_SERUM, GAVISH_3CA_METAPROGRAM_EPITHELIAL_V_ATPASE, TFCP2_TARGET_GENES, MAP2K1_TARGET_GENES, CHANDRAN_METASTASIS_TOP50_DN, PARK_HSC_VS_MULTIPOTENT_PROGENITORS_UP, RODRIGUES_NTN1_AND_DCC_TARGETS, GSE22589_HEALTHY_VS_SIV_INFECTED_DC_UP, CAVARD_LIVER_CANCER_MALIGNANT_VS_BENIGN, GOBP_POSITIVE_REGULATION_OF_MYOBLAST_PROLIFERATION, GSE15330_WT_VS_IKAROS_KO_MEGAKARYOCYTE_ERYTHROID_PROGENITOR_DN, OLSSON_E2F3_TARGETS_UP, GSE27786_LSK_VS_CD4_TCELL_DN, MARSON_FOXP3_TARGETS_UP, LAKE_ADULT_KIDNEY_C8_DECENDING_THIN_LIMB, STAMBOLSKY_TARGETS_OF_MUTATED_TP53_UP, GAVISH_3CA_METAPROGRAM_FIBROBLASTS_CAF_4, GSE27786_LSK_VS_NEUTROPHIL_DN, GSE16450_CTRL_VS_IFNA_6H_STIM_MATURE_NEURON_CELL_LINE_DN, HOWLIN_CITED1_TARGETS_1_UP, SOX11_TARGET_GENES, ZSCAN5C_TARGET_GENES, KMT2D_TARGET_GENES, ZNF236_TARGET_GENES, GSE7509_FCGRIIB_VS_TNFA_IL1B_IL6_PGE_STIM_DC_DN, GSE5589_WT_VS_IL10_KO_LPS_STIM_MACROPHAGE_45MIN_DN, HOWLIN_CITED1_TARGETS_2_UP, ZFHX3_TARGET_GENES, GAVISH_3CA_METAPROGRAM_MACROPHAGES_STRESS_HSP, LAKE_ADULT_KIDNEY_C19_COLLECTING_DUCT_INTERCALATED_CELLS_TYPE_A_MEDULLA, CEBPE_TARGET_GENES, GSE8685_IL2_STARVED_VS_IL15_ACT_IL2_STARVED_CD4_TCELL_UP, MCBRYAN_PUBERTAL_BREAST_4_5WK_DN, HERNANDEZ_MITOTIC_ARREST_BY_DOCETAXEL_1_UP, NFE2L1_TARGET_GENES, GSE14308_TH1_VS_TH17_DN, GAVISH_3CA_MALIGNANT_METAPROGRAM_10_PROTEIN_MATURATION, GAVISH_3CA_MALIGNANT_METAPROGRAM_41_UNASSIGNED, SARRIO_EPITHELIAL_MESENCHYMAL_TRANSITION_DN, GSE37416_CTRL_VS_48H_F_TULARENSIS_LVS_NEUTROPHIL_DN, GSE25087_FETAL_VS_ADULT_TCONV_DN, BRUINS_UVC_RESPONSE_VIA_TP53_GROUP_C, GSE5099_UNSTIM_VS_MCSF_TREATED_MONOCYTE_DAY3_UP, IWANAGA_CARCINOGENESIS_BY_KRAS_DN, GSE5542_UNTREATED_VS_IFNA_AND_IFNG_TREATED_EPITHELIAL_CELLS_24H_UP, WHITFIELD_CELL_CYCLE_G2, RUBENSTEIN_SKELETAL_MUSCLE_FAP_CELLS, GLI4_TARGET_GENES, GSE2770_TGFB_AND_IL4_VS_TGFB_AND_IL12_TREATED_ACT_CD4_TCELL_48H_UP, GSE27786_LSK_VS_BCELL_DN, GSE41867_DAY6_VS_DAY8_LCMV_CLONE13_EFFECTOR_CD8_TCELL_DN, GSE17721_CTRL_VS_POLYIC_1H_BMDC_DN, FURUKAWA_DUSP6_TARGETS_PCI35_UP, PKCA_DN.V1_DN, GSE23308_CTRL_VS_CORTICOSTERONE_TREATED_MACROPHAGE_MINERALCORTICOID_REC_KO_DN, GSE13493_DP_VS_CD4INTCD8POS_THYMOCYTE_UP, TRAVAGLINI_LUNG_CD8_NAIVE_T_CELL, COATES_MACROPHAGE_M1_VS_M2_UP, GSE40273_EOS_KO_VS_WT_TREG_UP, NAKAYA_PBMC_FLUARIX_FLUVIRIN_AGE_18_50YO_CORRELATED_WITH_HAI_28DY_RESPONSE_AT_7DY_NEGATIVE, BUSSLINGER_DUODENAL_TRANSIT_AMPLIFYING_CELLS, NAKAYA_PBMC_FLUARIX_FLUVIRIN_AGE_18_50YO_CORRELATED_WITH_HAI_28DY_RESPONSE_AT_3DY_NEGATIVE, MCBRYAN_PUBERTAL_BREAST_5_6WK_DN, OSMAN_BLADDER_CANCER_UP, TRAVAGLINI_LUNG_PROXIMAL_BASAL_CELL, GSE11924_TH1_VS_TH17_CD4_TCELL_UP, ZHENG_CORD_BLOOD_C10_MULTILYMPHOID_PROGENITOR

COVID-19 ciliated cell of the bronchus: TRAVAGLINI_LUNG_PROXIMAL_CILIATED_CELL, KEGG_MEDICUS_REFERENCE_TRANSLATION_INITIATION, LAKE_ADULT_KIDNEY_C18_COLLECTING_DUCT_PRINCIPAL_CELLS_MEDULLA, TRAVAGLINI_LUNG_CILIATED_CELL, TRAVAGLINI_LUNG_CAPILLARY_INTERMEDIATE_2_CELL, LAKE_ADULT_KIDNEY_C10_THIN_ASCENDING_LIMB, LAKE_ADULT_KIDNEY_C8_DECENDING_THIN_LIMB, LAKE_ADULT_KIDNEY_C19_COLLECTING_DUCT_INTERCALATED_CELLS_TYPE_A_MEDULLA, LAKE_ADULT_KIDNEY_C9_THIN_ASCENDING_LIMB, DODD_NASOPHARYNGEAL_CARCINOMA_UP, SENGUPTA_NASOPHARYNGEAL_CARCINOMA_DN, FAN_OVARY_CL12_T_LYMPHOCYTE_NK_CELL_2, DURANTE_ADULT_OLFACTORY_NEUROEPITHELIUM_RESPIRATORY_CILIATED_CELLS, MENON_FETAL_KIDNEY_8_CONNECTING_TUBULE_CELLS, DESCARTES_FETAL_EYE_CORNEAL_AND_CONJUNCTIVAL_EPITHELIAL_CELLS, LAKE_ADULT_KIDNEY_C12_THICK_ASCENDING_LIMB, AIZARANI_LIVER_C24_EPCAM_POS_BILE_DUCT_CELLS_3, MENON_FETAL_KIDNEY_4_PODOCYTES, FAN_OVARY_CL16_LYMPHATIC_ENDOTHELIAL_CELL, RUBENSTEIN_SKELETAL_MUSCLE_MYELOID_CELLS, BUSSLINGER_GASTRIC_ISTHMUS_CELLS, DESCARTES_FETAL_LUNG_CILIATED_EPITHELIAL_CELLS, DLX6_TARGET_GENES, HSIAO_HOUSEKEEPING_GENES, TRAVAGLINI_LUNG_PROLIFERATING_MACROPHAGE_CELL, GAVISH_3CA_METAPROGRAM_EPITHELIAL_SECRETED, LAKE_ADULT_KIDNEY_C7_PROXIMAL_TUBULE_EPITHELIAL_CELLS_S3, GSE16450_CTRL_VS_IFNA_6H_STIM_MATURE_NEURON_CELL_LINE_DN, DURANTE_ADULT_OLFACTORY_NEUROEPITHELIUM_SUSTENTACULAR_CELLS, ATXN7L3_TARGET_GENES, ZHENG_CORD_BLOOD_C2_PUTATIVE_BASOPHIL_EOSINOPHIL_MAST_CELL_PROGENITOR, MENON_FETAL_KIDNEY_6_COLLECTING_DUCT_CELLS, MXD1_TARGET_GENES, BUSSLINGER_DUODENAL_DIFFERENTIATING_STEM_CELLS, IRX2_TARGET_GENES, TAF9B_TARGET_GENES, GSE15330_WT_VS_IKAROS_KO_MEGAKARYOCYTE_ERYTHROID_PROGENITOR_DN, FAN_OVARY_CL10_PUTATIVE_EARLY_ATRESIA_GRANULOSA_CELL, GAVISH_3CA_MALIGNANT_METAPROGRAM_23_SECRETED_2, GSE4984_UNTREATED_VS_GALECTIN1_TREATED_DC_UP, TRAVAGLINI_LUNG_MESOTHELIAL_CELL, MARSON_FOXP3_TARGETS_UP, ZFHX3_TARGET_GENES, RUBENSTEIN_SKELETAL_MUSCLE_FBN1_FAP_CELLS, RUBENSTEIN_SKELETAL_MUSCLE_SATELLITE_CELLS, DESCARTES_FETAL_STOMACH_CILIATED_EPITHELIAL_CELLS, BUSSLINGER_GASTRIC_IMMATURE_PIT_CELLS, FAN_OVARY_CL4_T_LYMPHOCYTE_NK_CELL_1, BUSSLINGER_DUODENAL_STEM_CELLS, PECE_MAMMARY_STEM_CELL_UP, BUSSLINGER_GASTRIC_MATURE_PIT_CELLS, RUBENSTEIN_SKELETAL_MUSCLE_FAP_CELLS, GAVISH_3CA_METAPROGRAM_EPITHELIAL_STRESS, GSE37416_CTRL_VS_48H_F_TULARENSIS_LVS_NEUTROPHIL_DN, NFE2L1_TARGET_GENES, GAVISH_3CA_METAPROGRAM_EPITHELIAL_EPI_2, GSE32901_NAIVE_VS_TH17_ENRICHED_CD4_TCELL_UP, MODULE_83, BUSSLINGER_DUODENAL_TRANSIT_AMPLIFYING_CELLS, CAVARD_LIVER_CANCER_MALIGNANT_VS_BENIGN, GSE27786_LIN_NEG_VS_BCELL_DN, OLSSON_E2F3_TARGETS_UP, KLF7_TARGET_GENES, GAVISH_3CA_METAPROGRAM_EPITHELIAL_V_ATPASE, TRAVAGLINI_LUNG_PLATELET_MEGAKARYOCYTE_CELL, LAKE_ADULT_KIDNEY_C23_ENDOTHELIAL_CELLS_AVR, FAN_OVARY_CL2_PUTATIVE_EARLY_ATRETIC_FOLLICLE_THECAL_CELL_1, CHICAS_RB1_TARGETS_LOW_SERUM, GLI4_TARGET_GENES, BRACHAT_RESPONSE_TO_CISPLATIN, BUSSLINGER_GASTRIC_LYZ_POSITIVE_CELLS, TFCP2_TARGET_GENES, OSMAN_BLADDER_CANCER_UP, GAVISH_3CA_MALIGNANT_METAPROGRAM_14_EMT_3, FAN_OVARY_CL3_MATURE_CUMULUS_GRANULOSA_CELL_1, GAO_ESOPHAGUS_25W_C1_CILIATED_EPITHELIAL_CELLS, TRAVAGLINI_LUNG_CD8_MEMORY_EFFECTOR_T_CELL, HE_LIM_SUN_FETAL_LUNG_C1_PROXIMAL_SECRETORY_1_CELL, GSE19888_ADENOSINE_A3R_ACT_VS_TCELL_MEMBRANES_ACT_IN_MAST_CELL_DN, WAMUNYOKOLI_OVARIAN_CANCER_LMP_UP, PARK_HSC_VS_MULTIPOTENT_PROGENITORS_UP, ZHENG_CORD_BLOOD_C7_PUTATIVE_LYMPHOID_PRIMED_MULTIPOTENT_PROGENITOR_1, GSE22589_HEALTHY_VS_SIV_INFECTED_DC_UP, GAVISH_3CA_METAPROGRAM_MACROPHAGES_STRESS_HSP, GSE40273_EOS_KO_VS_WT_TREG_UP, RUBENSTEIN_SKELETAL_MUSCLE_NK_CELLS, FAN_OVARY_CL13_MONOCYTE_MACROPHAGE, RODRIGUES_NTN1_AND_DCC_TARGETS, CREIGHTON_ENDOCRINE_THERAPY_RESISTANCE_2, BUSSLINGER_GASTRIC_PPP1R1B_POSITIVE_CELLS, GAVIN_FOXP3_TARGETS_CLUSTER_P3, CHANDRAN_METASTASIS_TOP50_DN, GSE2770_TGFB_AND_IL4_VS_TGFB_AND_IL12_TREATED_ACT_CD4_TCELL_48H_UP

COVID-19 club cell: KEGG_MEDICUS_REFERENCE_TRANSLATION_INITIATION, TRAVAGLINI_LUNG_CAPILLARY_INTERMEDIATE_2_CELL, GAVISH_3CA_METAPROGRAM_EPITHELIAL_SECRETED, DESCARTES_FETAL_EYE_CORNEAL_AND_CONJUNCTIVAL_EPITHELIAL_CELLS, DURANTE_ADULT_OLFACTORY_NEUROEPITHELIUM_SUSTENTACULAR_CELLS, GAVISH_3CA_MALIGNANT_METAPROGRAM_23_SECRETED_2, TRAVAGLINI_LUNG_MUCOUS_CELL, HE_LIM_SUN_FETAL_LUNG_C1_PROXIMAL_SECRETORY_1_CELL, LAKE_ADULT_KIDNEY_C18_COLLECTING_DUCT_PRINCIPAL_CELLS_MEDULLA, HE_LIM_SUN_FETAL_LUNG_C1_CLUB_CELL, LAKE_ADULT_KIDNEY_C10_THIN_ASCENDING_LIMB, HE_LIM_SUN_FETAL_LUNG_C1_PROXIMAL_SECRETORY_3_CELL, HE_LIM_SUN_FETAL_LUNG_C1_PROXIMAL_SECRETORY_2_CELL, LAKE_ADULT_KIDNEY_C9_THIN_ASCENDING_LIMB, LIU_PROSTATE_CANCER_DN, GSE32901_NAIVE_VS_TH17_ENRICHED_CD4_TCELL_UP, LAKE_ADULT_KIDNEY_C8_DECENDING_THIN_LIMB, SENGUPTA_NASOPHARYNGEAL_CARCINOMA_DN, DURANTE_ADULT_OLFACTORY_NEUROEPITHELIUM_BOWMANS_GLAND, FAN_OVARY_CL12_T_LYMPHOCYTE_NK_CELL_2, MENON_FETAL_KIDNEY_8_CONNECTING_TUBULE_CELLS, GAVISH_3CA_METAPROGRAM_EPITHELIAL_ALVEOLAR, TAKEDA_TARGETS_OF_NUP98_HOXA9_FUSION_16D_DN, GAVISH_3CA_MALIGNANT_METAPROGRAM_31_ALVEOLAR, MODULE_164, LAKE_ADULT_KIDNEY_C19_COLLECTING_DUCT_INTERCALATED_CELLS_TYPE_A_MEDULLA, AIZARANI_LIVER_C24_EPCAM_POS_BILE_DUCT_CELLS_3, GSE17580_UNINFECTED_VS_S_MANSONI_INF_TREG_UP, DURANTE_ADULT_OLFACTORY_NEUROEPITHELIUM_RESPIRATORY_EPITHELIAL_CELLS, HE_LIM_SUN_FETAL_LUNG_C1_SMG_CELL, LAKE_ADULT_KIDNEY_C12_THICK_ASCENDING_LIMB, LOPES_METHYLATED_IN_COLON_CANCER_UP, SCHLINGEMANN_SKIN_CARCINOGENESIS_TPA_UP, HE_LIM_SUN_FETAL_LUNG_C1_MID_BASAL_CELL, MENON_FETAL_KIDNEY_4_PODOCYTES, GSE19888_ADENOSINE_A3R_INH_VS_TCELL_MEMBRANES_ACT_MAST_CELL_DN, GOBP_REGULATION_OF_SYNCYTIUM_FORMATION_BY_PLASMA_MEMBRANE_FUSION, GOBP_POSITIVE_REGULATION_OF_MYOBLAST_FUSION, ZHENG_CORD_BLOOD_C2_PUTATIVE_BASOPHIL_EOSINOPHIL_MAST_CELL_PROGENITOR, GOBP_POSITIVE_REGULATION_OF_SYNCYTIUM_FORMATION_BY_PLASMA_MEMBRANE_FUSION, GSE15330_WT_VS_IKAROS_KO_MEGAKARYOCYTE_ERYTHROID_PROGENITOR_DN, FAN_OVARY_CL16_LYMPHATIC_ENDOTHELIAL_CELL, SENGUPTA_NASOPHARYNGEAL_CARCINOMA_WITH_LMP1_DN, HE_LIM_SUN_FETAL_LUNG_C1_PROXIMAL_SECRETORY_PROGENITORS_CELL, GSE24671_CTRL_VS_BAKIMULC_INFECTED_MOUSE_SPLENOCYTES_DN, OLSSON_E2F3_TARGETS_UP, GAVISH_3CA_METAPROGRAM_EPITHELIAL_EPI_2, GSE4984_UNTREATED_VS_GALECTIN1_TREATED_DC_UP, DESCARTES_FETAL_ADRENAL_VASCULAR_ENDOTHELIAL_CELLS, GSE6681_DELETED_FOXP3_VS_WT_TREG_UP, GSE23502_BM_VS_COLON_TUMOR_HDC_KO_MYELOID_DERIVED_SUPPRESSOR_CELL_UP, GSE29617_CTRL_VS_DAY3_TIV_FLU_VACCINE_PBMC_2008_UP, ATXN7L3_TARGET_GENES, GSE23502_WT_VS_HDC_KO_MYELOID_DERIVED_SUPPRESSOR_CELL_COLON_TUMOR_DN, DLX6_TARGET_GENES, GOBP_REGULATION_OF_MYOBLAST_FUSION, GSE3203_WT_VS_IFNAR1_KO_INFLUENZA_INFECTED_LN_BCELL_UP, MODULE_433, TRAVAGLINI_LUNG_PROLIFERATING_MACROPHAGE_CELL, GAVISH_3CA_METAPROGRAM_EPITHELIAL_STRESS, HAY_BONE_MARROW_MONOCYTE, GSE5099_MONOCYTE_VS_ALTERNATIVE_M2_MACROPHAGE_UP, RUBENSTEIN_SKELETAL_MUSCLE_MYELOID_CELLS, MENON_FETAL_KIDNEY_6_COLLECTING_DUCT_CELLS, GSE17974_0H_VS_0.5H_IN_VITRO_ACT_CD4_TCELL_UP, GSE19888_ADENOSINE_A3R_INH_VS_ACT_WITH_INHIBITOR_PRETREATMENT_IN_MAST_CELL_DN, GSE20366_CD103_KLRG1_DP_VS_DN_TREG_UP, GSE17721_0.5H_VS_24H_LPS_BMDC_UP, GSE17974_CTRL_VS_ACT_IL4_AND_ANTI_IL12_72H_CD4_TCELL_UP, WP_PROSTAGLANDIN_SYNTHESIS_AND_REGULATION, GSE40273_EOS_KO_VS_WT_TREG_UP, GOMF_PHOSPHOLIPASE_INHIBITOR_ACTIVITY, MODULE_418, GSE17721_CTRL_VS_LPS_24H_BMDC_DN, GSE17721_PAM3CSK4_VS_CPG_2H_BMDC_DN, BUSSLINGER_GASTRIC_LYZ_POSITIVE_CELLS, NAKAMURA_BRONCHIAL_AND_BRONCHIOLAR_EPITHELIA, GSE3982_DC_VS_CENT_MEMORY_CD4_TCELL_DN, GSE17580_UNINFECTED_VS_S_MANSONI_INF_TEFF_DN, IRX2_TARGET_GENES, GSE17721_LPS_VS_PAM3CSK4_2H_BMDC_UP, MARSON_FOXP3_TARGETS_UP, GOMF_PHOSPHOLIPASE_A2_INHIBITOR_ACTIVITY, BRUINS_UVC_RESPONSE_VIA_TP53_GROUP_D, GSE13306_TREG_RA_VS_TCONV_RA_DN, MXD1_TARGET_GENES, GSE16450_CTRL_VS_IFNA_6H_STIM_MATURE_NEURON_CELL_LINE_DN, TAF9B_TARGET_GENES, DESCARTES_FETAL_HEART_ELF3_AGBL2_POSITIVE_CELLS, DODD_NASOPHARYNGEAL_CARCINOMA_UP, TFCP2_TARGET_GENES, NAKAMURA_LUNG_CANCER_DIFFERENTIATION_MARKERS, MAP2K1_TARGET_GENES

COVID-19 lung neuroendocrine cell: FAN_OVARY_CL12_T_LYMPHOCYTE_NK_CELL_2, KEGG_MEDICUS_REFERENCE_TRANSLATION_INITIATION, MENON_FETAL_KIDNEY_8_CONNECTING_TUBULE_CELLS, MENON_FETAL_KIDNEY_4_PODOCYTES, ZHENG_CORD_BLOOD_C2_PUTATIVE_BASOPHIL_EOSINOPHIL_MAST_CELL_PROGENITOR, FAN_OVARY_CL16_LYMPHATIC_ENDOTHELIAL_CELL, MARSON_FOXP3_TARGETS_UP, GSE15330_WT_VS_IKAROS_KO_MEGAKARYOCYTE_ERYTHROID_PROGENITOR_DN, IRX2_TARGET_GENES, BRACHAT_RESPONSE_TO_CISPLATIN, ATXN7L3_TARGET_GENES, TRAVAGLINI_LUNG_PROXIMAL_CILIATED_CELL, OLSSON_E2F3_TARGETS_UP, GAVISH_3CA_METAPROGRAM_EPITHELIAL_STRESS, GAVISH_3CA_METAPROGRAM_EPITHELIAL_V_ATPASE, TFCP2_TARGET_GENES, PARK_HSC_VS_MULTIPOTENT_PROGENITORS_UP, STAMBOLSKY_TARGETS_OF_MUTATED_TP53_UP, CHANDRAN_METASTASIS_TOP50_DN, ZHENG_CORD_BLOOD_C7_PUTATIVE_LYMPHOID_PRIMED_MULTIPOTENT_PROGENITOR_1, GSE19888_ADENOSINE_A3R_ACT_VS_TCELL_MEMBRANES_ACT_IN_MAST_CELL_DN, GOBP_POSITIVE_REGULATION_OF_MYOBLAST_PROLIFERATION, GSE16450_CTRL_VS_IFNA_6H_STIM_MATURE_NEURON_CELL_LINE_DN, CHICAS_RB1_TARGETS_LOW_SERUM, RUBENSTEIN_SKELETAL_MUSCLE_FAP_CELLS, CAVARD_LIVER_CANCER_MALIGNANT_VS_BENIGN, RODRIGUES_NTN1_AND_DCC_TARGETS, CHANDRAN_METASTASIS_UP, MAP2K1_TARGET_GENES, TAF9B_TARGET_GENES, GSE27786_LIN_NEG_VS_BCELL_DN, GAVISH_3CA_MALIGNANT_METAPROGRAM_41_UNASSIGNED, HOWLIN_CITED1_TARGETS_2_UP, ZNF236_TARGET_GENES, CEBPE_TARGET_GENES, GSE4984_UNTREATED_VS_GALECTIN1_TREATED_DC_UP, GAVISH_3CA_MALIGNANT_METAPROGRAM_10_PROTEIN_MATURATION, GAVISH_3CA_METAPROGRAM_FIBROBLASTS_CAF_4, STAMBOLSKY_RESPONSE_TO_VITAMIN_D3_UP, BRUINS_UVC_RESPONSE_VIA_TP53_GROUP_C, GSE27786_LSK_VS_NEUTROPHIL_DN, TRAVAGLINI_LUNG_CD8_MEMORY_EFFECTOR_T_CELL, GAVISH_3CA_METAPROGRAM_MACROPHAGES_STRESS_HSP, GSE8685_IL2_STARVED_VS_IL15_ACT_IL2_STARVED_CD4_TCELL_UP, GSE17721_CTRL_VS_POLYIC_1H_BMDC_DN, SOX11_TARGET_GENES, GSE22589_HEALTHY_VS_SIV_INFECTED_DC_UP, GSE27786_LSK_VS_CD4_TCELL_DN, DLX6_TARGET_GENES, GSE5589_WT_VS_IL10_KO_LPS_STIM_MACROPHAGE_45MIN_DN, FURUKAWA_DUSP6_TARGETS_PCI35_UP, ZSCAN5C_TARGET_GENES, GSE2770_TGFB_AND_IL4_VS_TGFB_AND_IL12_TREATED_ACT_CD4_TCELL_48H_UP, GSE7509_FCGRIIB_VS_TNFA_IL1B_IL6_PGE_STIM_DC_DN, GSE40273_EOS_KO_VS_WT_TREG_UP, GAVIN_FOXP3_TARGETS_CLUSTER_P3, IWANAGA_CARCINOGENESIS_BY_KRAS_DN, WHITFIELD_CELL_CYCLE_G2, HOWLIN_CITED1_TARGETS_1_UP, GSE27786_LSK_VS_BCELL_DN, HERNANDEZ_MITOTIC_ARREST_BY_DOCETAXEL_1_UP, GSE5099_UNSTIM_VS_MCSF_TREATED_MONOCYTE_DAY3_UP, GSE14308_TH1_VS_TH17_DN, GSE37416_CTRL_VS_48H_F_TULARENSIS_LVS_NEUTROPHIL_DN, MXD1_TARGET_GENES, GSE23308_CTRL_VS_CORTICOSTERONE_TREATED_MACROPHAGE_MINERALCORTICOID_REC_KO_DN, MCBRYAN_PUBERTAL_BREAST_4_5WK_DN, PKCA_DN.V1_DN, TRAVAGLINI_LUNG_CD8_NAIVE_T_CELL, GSE41867_DAY6_VS_DAY8_LCMV_CLONE13_EFFECTOR_CD8_TCELL_DN, FAN_OVARY_CL17_PUTATIVE_APOPTOTIC_SMOOTH_MUSCLE_CELL, GSE42724_B1_BCELL_VS_PLASMABLAST_UP, GSE17721_POLYIC_VS_GARDIQUIMOD_16H_BMDC_UP, GSE5542_UNTREATED_VS_IFNA_AND_IFNG_TREATED_EPITHELIAL_CELLS_24H_UP, OSMAN_BLADDER_CANCER_UP, FAN_OVARY_CL3_MATURE_CUMULUS_GRANULOSA_CELL_1, GSE11924_TH1_VS_TH17_CD4_TCELL_UP, GSE5589_LPS_AND_IL10_VS_LPS_AND_IL6_STIM_IL10_KO_MACROPHAGE_45MIN_UP, GSE17721_LPS_VS_POLYIC_4H_BMDC_DN, GSE25087_FETAL_VS_ADULT_TCONV_DN, GSE32986_UNSTIM_VS_GMCSF_AND_CURDLAN_HIGHDOSE_STIM_DC_DN, EPC1_TARGET_GENES, GSE13493_DP_VS_CD4INTCD8POS_THYMOCYTE_UP, GAUTAM_EYE_IRIS_CILIARY_BODY_PUTATIVE_STEM_CELLS, LAKE_ADULT_KIDNEY_C18_COLLECTING_DUCT_PRINCIPAL_CELLS_MEDULLA, GSE32901_NAIVE_VS_TH17_ENRICHED_CD4_TCELL_UP, ZHENG_CORD_BLOOD_C10_MULTILYMPHOID_PROGENITOR, GSE17721_LPS_VS_POLYIC_1H_BMDC_DN, CIC_TARGET_GENES, COATES_MACROPHAGE_M1_VS_M2_UP, NAKAYA_PBMC_FLUARIX_FLUVIRIN_AGE_18_50YO_CORRELATED_WITH_HAI_28DY_RESPONSE_AT_3DY_NEGATIVE, SNF5_DN.V1_DN, RUBENSTEIN_SKELETAL_MUSCLE_PERICYTES

COVID-19 pulmonary ionocyte: KEGG_MEDICUS_REFERENCE_TRANSLATION_INITIATION, FAN_OVARY_CL12_T_LYMPHOCYTE_NK_CELL_2, MENON_FETAL_KIDNEY_4_PODOCYTES, MENON_FETAL_KIDNEY_8_CONNECTING_TUBULE_CELLS, IRX2_TARGET_GENES, ZHENG_CORD_BLOOD_C2_PUTATIVE_BASOPHIL_EOSINOPHIL_MAST_CELL_PROGENITOR, TAF9B_TARGET_GENES, BRACHAT_RESPONSE_TO_CISPLATIN, DLX6_TARGET_GENES, GSE19888_ADENOSINE_A3R_ACT_VS_TCELL_MEMBRANES_ACT_IN_MAST_CELL_DN, ZHENG_CORD_BLOOD_C7_PUTATIVE_LYMPHOID_PRIMED_MULTIPOTENT_PROGENITOR_1, ATXN7L3_TARGET_GENES, CHANDRAN_METASTASIS_UP, OLSSON_E2F3_TARGETS_UP, FAN_OVARY_CL16_LYMPHATIC_ENDOTHELIAL_CELL, CHICAS_RB1_TARGETS_LOW_SERUM, GAVISH_3CA_METAPROGRAM_EPITHELIAL_V_ATPASE, PARK_HSC_VS_MULTIPOTENT_PROGENITORS_UP, MARSON_FOXP3_TARGETS_UP, RODRIGUES_NTN1_AND_DCC_TARGETS, TFCP2_TARGET_GENES, STAMBOLSKY_RESPONSE_TO_VITAMIN_D3_UP, GAVISH_3CA_METAPROGRAM_EPITHELIAL_STRESS, STAMBOLSKY_TARGETS_OF_MUTATED_TP53_UP, CHANDRAN_METASTASIS_TOP50_DN, GSE27786_LIN_NEG_VS_BCELL_DN, TRAVAGLINI_LUNG_CD8_MEMORY_EFFECTOR_T_CELL, GOBP_POSITIVE_REGULATION_OF_MYOBLAST_PROLIFERATION, MAP2K1_TARGET_GENES, CAVARD_LIVER_CANCER_MALIGNANT_VS_BENIGN, GSE15330_WT_VS_IKAROS_KO_MEGAKARYOCYTE_ERYTHROID_PROGENITOR_DN, MXD1_TARGET_GENES, GSE4984_UNTREATED_VS_GALECTIN1_TREATED_DC_UP, ZNF236_TARGET_GENES, GSE22589_HEALTHY_VS_SIV_INFECTED_DC_UP, GAVISH_3CA_METAPROGRAM_FIBROBLASTS_CAF_4, GAVISH_3CA_METAPROGRAM_MACROPHAGES_STRESS_HSP, GAVISH_3CA_MALIGNANT_METAPROGRAM_41_UNASSIGNED, HOWLIN_CITED1_TARGETS_2_UP, GSE27786_LSK_VS_CD4_TCELL_DN, SOX11_TARGET_GENES, GSE8685_IL2_STARVED_VS_IL15_ACT_IL2_STARVED_CD4_TCELL_UP, GSE27786_LSK_VS_NEUTROPHIL_DN, BRUINS_UVC_RESPONSE_VIA_TP53_GROUP_C, ZSCAN5C_TARGET_GENES, GSE7509_FCGRIIB_VS_TNFA_IL1B_IL6_PGE_STIM_DC_DN, CEBPE_TARGET_GENES, GSE16450_CTRL_VS_IFNA_6H_STIM_MATURE_NEURON_CELL_LINE_DN, GSE5589_WT_VS_IL10_KO_LPS_STIM_MACROPHAGE_45MIN_DN, GAVISH_3CA_MALIGNANT_METAPROGRAM_10_PROTEIN_MATURATION, HOWLIN_CITED1_TARGETS_1_UP, GSE14308_TH1_VS_TH17_DN, RUBENSTEIN_SKELETAL_MUSCLE_FAP_CELLS, GSE17721_CTRL_VS_POLYIC_1H_BMDC_DN, WHITFIELD_CELL_CYCLE_G2, IWANAGA_CARCINOGENESIS_BY_KRAS_DN, MCBRYAN_PUBERTAL_BREAST_4_5WK_DN, GSE37416_CTRL_VS_48H_F_TULARENSIS_LVS_NEUTROPHIL_DN, HERNANDEZ_MITOTIC_ARREST_BY_DOCETAXEL_1_UP, GSE2770_TGFB_AND_IL4_VS_TGFB_AND_IL12_TREATED_ACT_CD4_TCELL_48H_UP, GSE40273_EOS_KO_VS_WT_TREG_UP, FURUKAWA_DUSP6_TARGETS_PCI35_UP, GSE27786_LSK_VS_BCELL_DN, TRAVAGLINI_LUNG_PROXIMAL_CILIATED_CELL, GSE23308_CTRL_VS_CORTICOSTERONE_TREATED_MACROPHAGE_MINERALCORTICOID_REC_KO_DN, GSE5099_UNSTIM_VS_MCSF_TREATED_MONOCYTE_DAY3_UP, PKCA_DN.V1_DN, GSE41867_DAY6_VS_DAY8_LCMV_CLONE13_EFFECTOR_CD8_TCELL_DN, KLF7_TARGET_GENES, GSE5542_UNTREATED_VS_IFNA_AND_IFNG_TREATED_EPITHELIAL_CELLS_24H_UP, GSE25087_FETAL_VS_ADULT_TCONV_DN, OSMAN_BLADDER_CANCER_UP, TRAVAGLINI_LUNG_CD8_NAIVE_T_CELL, FAN_OVARY_CL17_PUTATIVE_APOPTOTIC_SMOOTH_MUSCLE_CELL, GSE13493_DP_VS_CD4INTCD8POS_THYMOCYTE_UP, GSE11924_TH1_VS_TH17_CD4_TCELL_UP, ZHENG_CORD_BLOOD_C10_MULTILYMPHOID_PROGENITOR, GSE17721_POLYIC_VS_GARDIQUIMOD_16H_BMDC_UP, COATES_MACROPHAGE_M1_VS_M2_UP, GSE42724_B1_BCELL_VS_PLASMABLAST_UP, FAN_OVARY_CL3_MATURE_CUMULUS_GRANULOSA_CELL_1, GAVIN_FOXP3_TARGETS_CLUSTER_P3, EPC1_TARGET_GENES, NAKAYA_PBMC_FLUARIX_FLUVIRIN_AGE_18_50YO_CORRELATED_WITH_HAI_28DY_RESPONSE_AT_3DY_NEGATIVE, GSE17721_LPS_VS_POLYIC_4H_BMDC_DN, BLANCO_MELO_COVID19_SARS_COV_2_LOW_MOI_INFECTION_A594_ACE2_EXPRESSING_CELLS_UP, CIC_TARGET_GENES, GAUTAM_EYE_IRIS_CILIARY_BODY_PUTATIVE_STEM_CELLS, GSE5099_DAY3_VS_DAY7_MCSF_TREATED_MACROPHAGE_DN, GSE5589_LPS_AND_IL10_VS_LPS_AND_IL6_STIM_IL10_KO_MACROPHAGE_45MIN_UP, GSE32901_NAIVE_VS_TH17_ENRICHED_CD4_TCELL_UP, GSE32986_UNSTIM_VS_GMCSF_AND_CURDLAN_HIGHDOSE_STIM_DC_DN, MCBRYAN_PUBERTAL_BREAST_5_6WK_DN

COVID-19 somatic cell: KEGG_MEDICUS_REFERENCE_TRANSLATION_INITIATION, TRAVAGLINI_LUNG_CAPILLARY_INTERMEDIATE_2_CELL, LAKE_ADULT_KIDNEY_C18_COLLECTING_DUCT_PRINCIPAL_CELLS_MEDULLA, LAKE_ADULT_KIDNEY_C10_THIN_ASCENDING_LIMB, DESCARTES_FETAL_EYE_CORNEAL_AND_CONJUNCTIVAL_EPITHELIAL_CELLS, LAKE_ADULT_KIDNEY_C8_DECENDING_THIN_LIMB, GAVISH_3CA_METAPROGRAM_EPITHELIAL_SECRETED, LAKE_ADULT_KIDNEY_C19_COLLECTING_DUCT_INTERCALATED_CELLS_TYPE_A_MEDULLA, LAKE_ADULT_KIDNEY_C9_THIN_ASCENDING_LIMB, DURANTE_ADULT_OLFACTORY_NEUROEPITHELIUM_SUSTENTACULAR_CELLS, GAVISH_3CA_MALIGNANT_METAPROGRAM_23_SECRETED_2, LAKE_ADULT_KIDNEY_C12_THICK_ASCENDING_LIMB, RUBENSTEIN_SKELETAL_MUSCLE_MYELOID_CELLS, FAN_OVARY_CL12_T_LYMPHOCYTE_NK_CELL_2, HE_LIM_SUN_FETAL_LUNG_C1_PROXIMAL_SECRETORY_1_CELL, BUSSLINGER_DUODENAL_DIFFERENTIATING_STEM_CELLS, BUSSLINGER_DUODENAL_STEM_CELLS, HE_LIM_SUN_FETAL_LUNG_C1_PROXIMAL_SECRETORY_3_CELL, LAKE_ADULT_KIDNEY_C7_PROXIMAL_TUBULE_EPITHELIAL_CELLS_S3, HE_LIM_SUN_FETAL_LUNG_C1_PROXIMAL_SECRETORY_2_CELL, TRAVAGLINI_LUNG_PROXIMAL_BASAL_CELL, HE_LIM_SUN_FETAL_LUNG_C1_CLUB_CELL, TRAVAGLINI_LUNG_MUCOUS_CELL, BUSSLINGER_GASTRIC_LYZ_POSITIVE_CELLS, MENON_FETAL_KIDNEY_8_CONNECTING_TUBULE_CELLS, DLX6_TARGET_GENES, RUBENSTEIN_SKELETAL_MUSCLE_SATELLITE_CELLS, TRAVAGLINI_LUNG_MESOTHELIAL_CELL, AIZARANI_LIVER_C24_EPCAM_POS_BILE_DUCT_CELLS_3, HSIAO_HOUSEKEEPING_GENES, BUSSLINGER_GASTRIC_ISTHMUS_CELLS, BUSSLINGER_DUODENAL_TRANSIT_AMPLIFYING_CELLS, BUSSLINGER_GASTRIC_PPP1R1B_POSITIVE_CELLS, LAKE_ADULT_KIDNEY_C23_ENDOTHELIAL_CELLS_AVR, SENGUPTA_NASOPHARYNGEAL_CARCINOMA_DN, PECE_MAMMARY_STEM_CELL_UP, FAN_OVARY_CL2_PUTATIVE_EARLY_ATRETIC_FOLLICLE_THECAL_CELL_1, MENON_FETAL_KIDNEY_6_COLLECTING_DUCT_CELLS, ZFHX3_TARGET_GENES, TIEN_INTESTINE_PROBIOTICS_24HR_DN, TRAVAGLINI_LUNG_PROXIMAL_CILIATED_CELL, GAVISH_3CA_METAPROGRAM_EPITHELIAL_EPI_2, MXD1_TARGET_GENES, TAF9B_TARGET_GENES, FAN_OVARY_CL16_LYMPHATIC_ENDOTHELIAL_CELL, GSE24671_CTRL_VS_BAKIMULC_INFECTED_MOUSE_SPLENOCYTES_DN, GNF2_TPT1, TIEN_INTESTINE_PROBIOTICS_6HR_UP, LIU_PROSTATE_CANCER_DN, MENON_FETAL_KIDNEY_4_PODOCYTES, BUSSLINGER_ESOPHAGEAL_LATE_SUPRABASAL_CELLS, DODD_NASOPHARYNGEAL_CARCINOMA_UP, GSE32901_NAIVE_VS_TH17_ENRICHED_CD4_TCELL_UP, MORF_ACTG1, BUSSLINGER_GASTRIC_IMMATURE_PIT_CELLS, MODULE_164, ATXN7L3_TARGET_GENES, GSE4984_UNTREATED_VS_GALECTIN1_TREATED_DC_UP, GSE3982_DC_VS_CENT_MEMORY_CD4_TCELL_DN, RUBENSTEIN_SKELETAL_MUSCLE_PCV_ENDOTHELIAL_CELLS, MODULE_83, TRAVAGLINI_LUNG_PROLIFERATING_MACROPHAGE_CELL, ZHENG_CORD_BLOOD_C2_PUTATIVE_BASOPHIL_EOSINOPHIL_MAST_CELL_PROGENITOR, GSE15330_WT_VS_IKAROS_KO_MEGAKARYOCYTE_ERYTHROID_PROGENITOR_DN, GAVISH_3CA_METAPROGRAM_EPITHELIAL_ALVEOLAR, MORF_NPM1, DURANTE_ADULT_OLFACTORY_NEUROEPITHELIUM_BOWMANS_GLAND, BUSSLINGER_GASTRIC_MATURE_PIT_CELLS, TRAVAGLINI_LUNG_DIFFERENTIATING_BASAL_CELL, RUBENSTEIN_SKELETAL_MUSCLE_NK_CELLS, GAVISH_3CA_MALIGNANT_METAPROGRAM_31_ALVEOLAR, NFE2L1_TARGET_GENES, RUBENSTEIN_SKELETAL_MUSCLE_T_CELLS, TRAVAGLINI_LUNG_CLUB_CELL, NAKAMURA_TUMOR_ZONE_PERIPHERAL_VS_CENTRAL_DN, RUBENSTEIN_SKELETAL_MUSCLE_FBN1_FAP_CELLS, RUBENSTEIN_SKELETAL_MUSCLE_B_CELLS, TAKEDA_TARGETS_OF_NUP98_HOXA9_FUSION_16D_DN, IRX2_TARGET_GENES, MENON_FETAL_KIDNEY_10_IMMUNE_CELLS, GSE2405_0H_VS_24H_A_PHAGOCYTOPHILUM_STIM_NEUTROPHIL_UP, GSE16450_CTRL_VS_IFNA_6H_STIM_MATURE_NEURON_CELL_LINE_DN, GAVISH_3CA_METAPROGRAM_EPITHELIAL_STRESS, FAN_OVARY_CL0_XBP1_SELK_HIGH_STROMAL_CELL, NKX2_2_TARGET_GENES, MORF_TPT1, GAVISH_3CA_MALIGNANT_METAPROGRAM_14_EMT_3, DURANTE_ADULT_OLFACTORY_NEUROEPITHELIUM_RESPIRATORY_EPITHELIAL_CELLS, MODULE_114, HE_LIM_SUN_FETAL_LUNG_C1_MID_BASAL_CELL, AIZARANI_LIVER_C7_EPCAM_POS_BILE_DUCT_CELLS_2, MANNO_MIDBRAIN_NEUROTYPES_BASAL, FAN_OVARY_CL10_PUTATIVE_EARLY_ATRESIA_GRANULOSA_CELL

normal basal cell of epithelium of bronchus: KEGG_MEDICUS_REFERENCE_TRANSLATION_INITIATION, LAKE_ADULT_KIDNEY_C18_COLLECTING_DUCT_PRINCIPAL_CELLS_MEDULLA, FAN_OVARY_CL12_T_LYMPHOCYTE_NK_CELL_2, DLX6_TARGET_GENES, TRAVAGLINI_LUNG_CAPILLARY_INTERMEDIATE_2_CELL, LAKE_ADULT_KIDNEY_C10_THIN_ASCENDING_LIMB, TAF9B_TARGET_GENES, LAKE_ADULT_KIDNEY_C8_DECENDING_THIN_LIMB, MXD1_TARGET_GENES, LAKE_ADULT_KIDNEY_C19_COLLECTING_DUCT_INTERCALATED_CELLS_TYPE_A_MEDULLA, IRX2_TARGET_GENES, MENON_FETAL_KIDNEY_4_PODOCYTES, MENON_FETAL_KIDNEY_8_CONNECTING_TUBULE_CELLS, PECE_MAMMARY_STEM_CELL_UP, ATXN7L3_TARGET_GENES, LAKE_ADULT_KIDNEY_C9_THIN_ASCENDING_LIMB, GAVISH_3CA_METAPROGRAM_EPITHELIAL_STRESS, ZHENG_CORD_BLOOD_C7_PUTATIVE_LYMPHOID_PRIMED_MULTIPOTENT_PROGENITOR_1, ZHENG_CORD_BLOOD_C2_PUTATIVE_BASOPHIL_EOSINOPHIL_MAST_CELL_PROGENITOR, GSE19888_ADENOSINE_A3R_ACT_VS_TCELL_MEMBRANES_ACT_IN_MAST_CELL_DN, GSE4984_UNTREATED_VS_GALECTIN1_TREATED_DC_UP, FAN_OVARY_CL16_LYMPHATIC_ENDOTHELIAL_CELL, RODRIGUES_NTN1_AND_DCC_TARGETS, CHANDRAN_METASTASIS_UP, KLF7_TARGET_GENES, GAVISH_3CA_METAPROGRAM_EPITHELIAL_V_ATPASE, RUBENSTEIN_SKELETAL_MUSCLE_FAP_CELLS, CAVARD_LIVER_CANCER_MALIGNANT_VS_BENIGN, BRACHAT_RESPONSE_TO_CISPLATIN, GSE16450_CTRL_VS_IFNA_6H_STIM_MATURE_NEURON_CELL_LINE_DN, TRAVAGLINI_LUNG_CD8_MEMORY_EFFECTOR_T_CELL, BUSSLINGER_DUODENAL_STEM_CELLS, CHICAS_RB1_TARGETS_LOW_SERUM, GSE37416_CTRL_VS_48H_F_TULARENSIS_LVS_NEUTROPHIL_DN, STAMBOLSKY_RESPONSE_TO_VITAMIN_D3_UP, TFCP2_TARGET_GENES, BUSSLINGER_DUODENAL_DIFFERENTIATING_STEM_CELLS, MAP2K1_TARGET_GENES, PARK_HSC_VS_MULTIPOTENT_PROGENITORS_UP, GSE27786_LIN_NEG_VS_BCELL_DN, OLSSON_E2F3_TARGETS_UP, GAVISH_3CA_METAPROGRAM_FIBROBLASTS_CAF_4, MARSON_FOXP3_TARGETS_UP, CHANDRAN_METASTASIS_TOP50_DN, GSE15330_WT_VS_IKAROS_KO_MEGAKARYOCYTE_ERYTHROID_PROGENITOR_DN, GOBP_POSITIVE_REGULATION_OF_MYOBLAST_PROLIFERATION, GAVISH_3CA_MALIGNANT_METAPROGRAM_10_PROTEIN_MATURATION, GSE27786_LSK_VS_CD4_TCELL_DN, ZNF236_TARGET_GENES, MCBRYAN_PUBERTAL_BREAST_4_5WK_DN, LAKE_ADULT_KIDNEY_C12_THICK_ASCENDING_LIMB, GSE27786_LSK_VS_NEUTROPHIL_DN, GSE2770_TGFB_AND_IL4_VS_TGFB_AND_IL12_TREATED_ACT_CD4_TCELL_48H_UP, GSE22589_HEALTHY_VS_SIV_INFECTED_DC_UP, NAKAMURA_TUMOR_ZONE_PERIPHERAL_VS_CENTRAL_DN, GAVISH_3CA_MALIGNANT_METAPROGRAM_41_UNASSIGNED, STAMBOLSKY_TARGETS_OF_MUTATED_TP53_UP, TIEN_INTESTINE_PROBIOTICS_24HR_DN, GSE5589_WT_VS_IL10_KO_LPS_STIM_MACROPHAGE_45MIN_DN, LAKE_ADULT_KIDNEY_C23_ENDOTHELIAL_CELLS_AVR, CEBPE_TARGET_GENES, KMT2D_TARGET_GENES, HOWLIN_CITED1_TARGETS_1_UP, SOX11_TARGET_GENES, ZSCAN5C_TARGET_GENES, HERNANDEZ_MITOTIC_ARREST_BY_DOCETAXEL_1_UP, GSE7509_FCGRIIB_VS_TNFA_IL1B_IL6_PGE_STIM_DC_DN, HOWLIN_CITED1_TARGETS_2_UP, SARRIO_EPITHELIAL_MESENCHYMAL_TRANSITION_DN, GAVISH_3CA_METAPROGRAM_MACROPHAGES_STRESS_HSP, TRAVAGLINI_LUNG_PROXIMAL_BASAL_CELL, GSE40273_EOS_KO_VS_WT_TREG_UP, BRUINS_UVC_RESPONSE_VIA_TP53_GROUP_C, LAKE_ADULT_KIDNEY_C7_PROXIMAL_TUBULE_EPITHELIAL_CELLS_S3, GSE8685_IL2_STARVED_VS_IL15_ACT_IL2_STARVED_CD4_TCELL_UP, BUSSLINGER_GASTRIC_LYZ_POSITIVE_CELLS, GSE5099_UNSTIM_VS_MCSF_TREATED_MONOCYTE_DAY3_UP, FURUKAWA_DUSP6_TARGETS_PCI35_UP, GSE25087_FETAL_VS_ADULT_TCONV_DN, CIC_TARGET_GENES, GSE27786_LSK_VS_BCELL_DN, MCBRYAN_PUBERTAL_BREAST_5_6WK_DN, NFE2L1_TARGET_GENES, GSE14308_TH1_VS_TH17_DN, GSE11924_TH1_VS_TH17_CD4_TCELL_UP, GSE13493_DP_VS_CD4INTCD8POS_THYMOCYTE_UP, COATES_MACROPHAGE_M1_VS_M2_UP, HSIAO_HOUSEKEEPING_GENES, GLI4_TARGET_GENES, IWANAGA_CARCINOGENESIS_BY_KRAS_DN, OSMAN_BLADDER_CANCER_UP, PKCA_DN.V1_DN, GSE17721_CTRL_VS_POLYIC_1H_BMDC_DN

normal bronchial goblet cell: KEGG_MEDICUS_REFERENCE_TRANSLATION_INITIATION, FAN_OVARY_CL12_T_LYMPHOCYTE_NK_CELL_2, DLX6_TARGET_GENES, TAF9B_TARGET_GENES, IRX2_TARGET_GENES, ZHENG_CORD_BLOOD_C7_PUTATIVE_LYMPHOID_PRIMED_MULTIPOTENT_PROGENITOR_1, MENON_FETAL_KIDNEY_4_PODOCYTES, GSE19888_ADENOSINE_A3R_ACT_VS_TCELL_MEMBRANES_ACT_IN_MAST_CELL_DN, RODRIGUES_NTN1_AND_DCC_TARGETS, CHANDRAN_METASTASIS_UP, ATXN7L3_TARGET_GENES, ZHENG_CORD_BLOOD_C2_PUTATIVE_BASOPHIL_EOSINOPHIL_MAST_CELL_PROGENITOR, MXD1_TARGET_GENES, BRACHAT_RESPONSE_TO_CISPLATIN, GAVISH_3CA_METAPROGRAM_EPITHELIAL_STRESS, MENON_FETAL_KIDNEY_8_CONNECTING_TUBULE_CELLS, LAKE_ADULT_KIDNEY_C18_COLLECTING_DUCT_PRINCIPAL_CELLS_MEDULLA, GAVISH_3CA_METAPROGRAM_EPITHELIAL_V_ATPASE, STAMBOLSKY_RESPONSE_TO_VITAMIN_D3_UP, OLSSON_E2F3_TARGETS_UP, CHICAS_RB1_TARGETS_LOW_SERUM, GSE4984_UNTREATED_VS_GALECTIN1_TREATED_DC_UP, TRAVAGLINI_LUNG_CD8_MEMORY_EFFECTOR_T_CELL, CAVARD_LIVER_CANCER_MALIGNANT_VS_BENIGN, PARK_HSC_VS_MULTIPOTENT_PROGENITORS_UP, TFCP2_TARGET_GENES, MAP2K1_TARGET_GENES, GAVISH_3CA_METAPROGRAM_FIBROBLASTS_CAF_4, GOBP_POSITIVE_REGULATION_OF_MYOBLAST_PROLIFERATION, ZNF236_TARGET_GENES, CHANDRAN_METASTASIS_TOP50_DN, GSE27786_LIN_NEG_VS_BCELL_DN, FAN_OVARY_CL16_LYMPHATIC_ENDOTHELIAL_CELL, MARSON_FOXP3_TARGETS_UP, STAMBOLSKY_TARGETS_OF_MUTATED_TP53_UP, MCBRYAN_PUBERTAL_BREAST_4_5WK_DN, GSE27786_LSK_VS_CD4_TCELL_DN, GAVISH_3CA_MALIGNANT_METAPROGRAM_41_UNASSIGNED, GSE22589_HEALTHY_VS_SIV_INFECTED_DC_UP, RUBENSTEIN_SKELETAL_MUSCLE_FAP_CELLS, GAVISH_3CA_MALIGNANT_METAPROGRAM_10_PROTEIN_MATURATION, GSE37416_CTRL_VS_48H_F_TULARENSIS_LVS_NEUTROPHIL_DN, GSE27786_LSK_VS_NEUTROPHIL_DN, GSE16450_CTRL_VS_IFNA_6H_STIM_MATURE_NEURON_CELL_LINE_DN, GSE15330_WT_VS_IKAROS_KO_MEGAKARYOCYTE_ERYTHROID_PROGENITOR_DN, SOX11_TARGET_GENES, ZSCAN5C_TARGET_GENES, CEBPE_TARGET_GENES, GSE5589_WT_VS_IL10_KO_LPS_STIM_MACROPHAGE_45MIN_DN, GSE2770_TGFB_AND_IL4_VS_TGFB_AND_IL12_TREATED_ACT_CD4_TCELL_48H_UP, GSE7509_FCGRIIB_VS_TNFA_IL1B_IL6_PGE_STIM_DC_DN, HOWLIN_CITED1_TARGETS_2_UP, HOWLIN_CITED1_TARGETS_1_UP, HERNANDEZ_MITOTIC_ARREST_BY_DOCETAXEL_1_UP, GAVISH_3CA_METAPROGRAM_MACROPHAGES_STRESS_HSP, KLF7_TARGET_GENES, GSE8685_IL2_STARVED_VS_IL15_ACT_IL2_STARVED_CD4_TCELL_UP, BRUINS_UVC_RESPONSE_VIA_TP53_GROUP_C, GSE40273_EOS_KO_VS_WT_TREG_UP, GSE14308_TH1_VS_TH17_DN, IWANAGA_CARCINOGENESIS_BY_KRAS_DN, GSE17721_CTRL_VS_POLYIC_1H_BMDC_DN, GSE27786_LSK_VS_BCELL_DN, GSE5099_UNSTIM_VS_MCSF_TREATED_MONOCYTE_DAY3_UP, FURUKAWA_DUSP6_TARGETS_PCI35_UP, PKCA_DN.V1_DN, GSE5542_UNTREATED_VS_IFNA_AND_IFNG_TREATED_EPITHELIAL_CELLS_24H_UP, GSE41867_DAY6_VS_DAY8_LCMV_CLONE13_EFFECTOR_CD8_TCELL_DN, GSE13493_DP_VS_CD4INTCD8POS_THYMOCYTE_UP, WHITFIELD_CELL_CYCLE_G2, GSE25087_FETAL_VS_ADULT_TCONV_DN, CIC_TARGET_GENES, MCBRYAN_PUBERTAL_BREAST_5_6WK_DN, GSE23308_CTRL_VS_CORTICOSTERONE_TREATED_MACROPHAGE_MINERALCORTICOID_REC_KO_DN, GSE11924_TH1_VS_TH17_CD4_TCELL_UP, KMT2D_TARGET_GENES, TRAVAGLINI_LUNG_CD8_NAIVE_T_CELL, COATES_MACROPHAGE_M1_VS_M2_UP, OSMAN_BLADDER_CANCER_UP, SARRIO_EPITHELIAL_MESENCHYMAL_TRANSITION_DN, ZHENG_CORD_BLOOD_C10_MULTILYMPHOID_PROGENITOR, GSE43863_DAY6_EFF_VS_DAY150_MEM_LY6C_INT_CXCR5POS_CD4_TCELL_UP, GSE42724_B1_BCELL_VS_PLASMABLAST_UP, NAKAYA_PBMC_FLUARIX_FLUVIRIN_AGE_18_50YO_CORRELATED_WITH_HAI_28DY_RESPONSE_AT_3DY_NEGATIVE, NCOA6_TARGET_GENES, GSE17721_POLYIC_VS_GARDIQUIMOD_16H_BMDC_UP, BLANCO_MELO_COVID19_SARS_COV_2_LOW_MOI_INFECTION_A594_ACE2_EXPRESSING_CELLS_UP, GSE5099_DAY3_VS_DAY7_MCSF_TREATED_MACROPHAGE_DN, GSE17721_LPS_VS_POLYIC_4H_BMDC_DN, GAVIN_FOXP3_TARGETS_CLUSTER_P3, TAKEDA_TARGETS_OF_NUP98_HOXA9_FUSION_3D_UP, GSE32901_NAIVE_VS_TH17_ENRICHED_CD4_TCELL_UP, FAN_OVARY_CL3_MATURE_CUMULUS_GRANULOSA_CELL_1

normal brush cell of bronchus: KEGG_MEDICUS_REFERENCE_TRANSLATION_INITIATION, FAN_OVARY_CL12_T_LYMPHOCYTE_NK_CELL_2, DLX6_TARGET_GENES, TAF9B_TARGET_GENES, LAKE_ADULT_KIDNEY_C18_COLLECTING_DUCT_PRINCIPAL_CELLS_MEDULLA, MXD1_TARGET_GENES, ZHENG_CORD_BLOOD_C7_PUTATIVE_LYMPHOID_PRIMED_MULTIPOTENT_PROGENITOR_1, GSE19888_ADENOSINE_A3R_ACT_VS_TCELL_MEMBRANES_ACT_IN_MAST_CELL_DN, IRX2_TARGET_GENES, MENON_FETAL_KIDNEY_4_PODOCYTES, ATXN7L3_TARGET_GENES, CHANDRAN_METASTASIS_UP, ZHENG_CORD_BLOOD_C2_PUTATIVE_BASOPHIL_EOSINOPHIL_MAST_CELL_PROGENITOR, STAMBOLSKY_RESPONSE_TO_VITAMIN_D3_UP, BRACHAT_RESPONSE_TO_CISPLATIN, GAVISH_3CA_METAPROGRAM_EPITHELIAL_STRESS, MENON_FETAL_KIDNEY_8_CONNECTING_TUBULE_CELLS, RODRIGUES_NTN1_AND_DCC_TARGETS, TRAVAGLINI_LUNG_CD8_MEMORY_EFFECTOR_T_CELL, GSE4984_UNTREATED_VS_GALECTIN1_TREATED_DC_UP, CHICAS_RB1_TARGETS_LOW_SERUM, GAVISH_3CA_METAPROGRAM_EPITHELIAL_V_ATPASE, GSE27786_LIN_NEG_VS_BCELL_DN, TFCP2_TARGET_GENES, KLF7_TARGET_GENES, MAP2K1_TARGET_GENES, PARK_HSC_VS_MULTIPOTENT_PROGENITORS_UP, CAVARD_LIVER_CANCER_MALIGNANT_VS_BENIGN, FAN_OVARY_CL16_LYMPHATIC_ENDOTHELIAL_CELL, CHANDRAN_METASTASIS_TOP50_DN, OLSSON_E2F3_TARGETS_UP, GOBP_POSITIVE_REGULATION_OF_MYOBLAST_PROLIFERATION, GAVISH_3CA_METAPROGRAM_FIBROBLASTS_CAF_4, GSE22589_HEALTHY_VS_SIV_INFECTED_DC_UP, GSE27786_LSK_VS_CD4_TCELL_DN, ZNF236_TARGET_GENES, MARSON_FOXP3_TARGETS_UP, TRAVAGLINI_LUNG_CAPILLARY_INTERMEDIATE_2_CELL, STAMBOLSKY_TARGETS_OF_MUTATED_TP53_UP, GSE27786_LSK_VS_NEUTROPHIL_DN, GSE15330_WT_VS_IKAROS_KO_MEGAKARYOCYTE_ERYTHROID_PROGENITOR_DN, MCBRYAN_PUBERTAL_BREAST_4_5WK_DN, ZSCAN5C_TARGET_GENES, SOX11_TARGET_GENES, GSE16450_CTRL_VS_IFNA_6H_STIM_MATURE_NEURON_CELL_LINE_DN, GSE5589_WT_VS_IL10_KO_LPS_STIM_MACROPHAGE_45MIN_DN, HOWLIN_CITED1_TARGETS_1_UP, GAVISH_3CA_MALIGNANT_METAPROGRAM_41_UNASSIGNED, GAVISH_3CA_MALIGNANT_METAPROGRAM_10_PROTEIN_MATURATION, HOWLIN_CITED1_TARGETS_2_UP, GSE7509_FCGRIIB_VS_TNFA_IL1B_IL6_PGE_STIM_DC_DN, CEBPE_TARGET_GENES, GSE37416_CTRL_VS_48H_F_TULARENSIS_LVS_NEUTROPHIL_DN, GAVISH_3CA_METAPROGRAM_MACROPHAGES_STRESS_HSP, HERNANDEZ_MITOTIC_ARREST_BY_DOCETAXEL_1_UP, GSE8685_IL2_STARVED_VS_IL15_ACT_IL2_STARVED_CD4_TCELL_UP, LAKE_ADULT_KIDNEY_C10_THIN_ASCENDING_LIMB, RUBENSTEIN_SKELETAL_MUSCLE_FAP_CELLS, KMT2D_TARGET_GENES, GSE14308_TH1_VS_TH17_DN, GSE2770_TGFB_AND_IL4_VS_TGFB_AND_IL12_TREATED_ACT_CD4_TCELL_48H_UP, BRUINS_UVC_RESPONSE_VIA_TP53_GROUP_C, BUSSLINGER_DUODENAL_STEM_CELLS, GSE25087_FETAL_VS_ADULT_TCONV_DN, SARRIO_EPITHELIAL_MESENCHYMAL_TRANSITION_DN, IWANAGA_CARCINOGENESIS_BY_KRAS_DN, GSE5099_UNSTIM_VS_MCSF_TREATED_MONOCYTE_DAY3_UP, GSE5542_UNTREATED_VS_IFNA_AND_IFNG_TREATED_EPITHELIAL_CELLS_24H_UP, GSE17721_CTRL_VS_POLYIC_1H_BMDC_DN, GSE27786_LSK_VS_BCELL_DN, FURUKAWA_DUSP6_TARGETS_PCI35_UP, BUSSLINGER_DUODENAL_DIFFERENTIATING_STEM_CELLS, PKCA_DN.V1_DN, GSE40273_EOS_KO_VS_WT_TREG_UP, GSE13493_DP_VS_CD4INTCD8POS_THYMOCYTE_UP, GSE41867_DAY6_VS_DAY8_LCMV_CLONE13_EFFECTOR_CD8_TCELL_DN, WHITFIELD_CELL_CYCLE_G2, TRAVAGLINI_LUNG_CD8_NAIVE_T_CELL, GSE23308_CTRL_VS_CORTICOSTERONE_TREATED_MACROPHAGE_MINERALCORTICOID_REC_KO_DN, MCBRYAN_PUBERTAL_BREAST_5_6WK_DN, LAKE_ADULT_KIDNEY_C8_DECENDING_THIN_LIMB, COATES_MACROPHAGE_M1_VS_M2_UP, GSE11924_TH1_VS_TH17_CD4_TCELL_UP, CIC_TARGET_GENES, OSMAN_BLADDER_CANCER_UP, ZFHX3_TARGET_GENES, LAKE_ADULT_KIDNEY_C19_COLLECTING_DUCT_INTERCALATED_CELLS_TYPE_A_MEDULLA, NAKAYA_PBMC_FLUARIX_FLUVIRIN_AGE_18_50YO_CORRELATED_WITH_HAI_28DY_RESPONSE_AT_7DY_NEGATIVE, NAKAYA_PBMC_FLUARIX_FLUVIRIN_AGE_18_50YO_CORRELATED_WITH_HAI_28DY_RESPONSE_AT_3DY_NEGATIVE, GLI4_TARGET_GENES, ZHENG_CORD_BLOOD_C10_MULTILYMPHOID_PROGENITOR, GSE42724_B1_BCELL_VS_PLASMABLAST_UP, BLANCO_MELO_COVID19_SARS_COV_2_LOW_MOI_INFECTION_A594_ACE2_EXPRESSING_CELLS_UP

normal ciliated cell of the bronchus: KEGG_MEDICUS_REFERENCE_TRANSLATION_INITIATION, TRAVAGLINI_LUNG_PROXIMAL_CILIATED_CELL, LAKE_ADULT_KIDNEY_C18_COLLECTING_DUCT_PRINCIPAL_CELLS_MEDULLA, TRAVAGLINI_LUNG_CAPILLARY_INTERMEDIATE_2_CELL, LAKE_ADULT_KIDNEY_C10_THIN_ASCENDING_LIMB, LAKE_ADULT_KIDNEY_C8_DECENDING_THIN_LIMB, LAKE_ADULT_KIDNEY_C19_COLLECTING_DUCT_INTERCALATED_CELLS_TYPE_A_MEDULLA, TRAVAGLINI_LUNG_CILIATED_CELL, LAKE_ADULT_KIDNEY_C9_THIN_ASCENDING_LIMB, FAN_OVARY_CL12_T_LYMPHOCYTE_NK_CELL_2, DODD_NASOPHARYNGEAL_CARCINOMA_UP, SENGUPTA_NASOPHARYNGEAL_CARCINOMA_DN, LAKE_ADULT_KIDNEY_C12_THICK_ASCENDING_LIMB, MENON_FETAL_KIDNEY_8_CONNECTING_TUBULE_CELLS, DESCARTES_FETAL_EYE_CORNEAL_AND_CONJUNCTIVAL_EPITHELIAL_CELLS, HSIAO_HOUSEKEEPING_GENES, RUBENSTEIN_SKELETAL_MUSCLE_MYELOID_CELLS, DLX6_TARGET_GENES, BUSSLINGER_GASTRIC_ISTHMUS_CELLS, PECE_MAMMARY_STEM_CELL_UP, LAKE_ADULT_KIDNEY_C7_PROXIMAL_TUBULE_EPITHELIAL_CELLS_S3, MENON_FETAL_KIDNEY_4_PODOCYTES, DURANTE_ADULT_OLFACTORY_NEUROEPITHELIUM_RESPIRATORY_CILIATED_CELLS, AIZARANI_LIVER_C24_EPCAM_POS_BILE_DUCT_CELLS_3, BUSSLINGER_DUODENAL_STEM_CELLS, FAN_OVARY_CL16_LYMPHATIC_ENDOTHELIAL_CELL, TIEN_INTESTINE_PROBIOTICS_24HR_DN, RUBENSTEIN_SKELETAL_MUSCLE_FBN1_FAP_CELLS, BUSSLINGER_DUODENAL_DIFFERENTIATING_STEM_CELLS, LAKE_ADULT_KIDNEY_C23_ENDOTHELIAL_CELLS_AVR, MXD1_TARGET_GENES, GSE16450_CTRL_VS_IFNA_6H_STIM_MATURE_NEURON_CELL_LINE_DN, RUBENSTEIN_SKELETAL_MUSCLE_SATELLITE_CELLS, TAF9B_TARGET_GENES, GAVISH_3CA_METAPROGRAM_EPITHELIAL_SECRETED, ATXN7L3_TARGET_GENES, IRX2_TARGET_GENES, TRAVAGLINI_LUNG_MESOTHELIAL_CELL, ZHENG_CORD_BLOOD_C2_PUTATIVE_BASOPHIL_EOSINOPHIL_MAST_CELL_PROGENITOR, DURANTE_ADULT_OLFACTORY_NEUROEPITHELIUM_SUSTENTACULAR_CELLS, GSE4984_UNTREATED_VS_GALECTIN1_TREATED_DC_UP, BUSSLINGER_GASTRIC_IMMATURE_PIT_CELLS, MENON_FETAL_KIDNEY_6_COLLECTING_DUCT_CELLS, FAN_OVARY_CL4_T_LYMPHOCYTE_NK_CELL_1, ZFHX3_TARGET_GENES, BUSSLINGER_GASTRIC_LYZ_POSITIVE_CELLS, FAN_OVARY_CL2_PUTATIVE_EARLY_ATRETIC_FOLLICLE_THECAL_CELL_1, TRAVAGLINI_LUNG_PROLIFERATING_MACROPHAGE_CELL, GSE37416_CTRL_VS_48H_F_TULARENSIS_LVS_NEUTROPHIL_DN, RUBENSTEIN_SKELETAL_MUSCLE_FAP_CELLS, BUSSLINGER_DUODENAL_TRANSIT_AMPLIFYING_CELLS, FAN_OVARY_CL10_PUTATIVE_EARLY_ATRESIA_GRANULOSA_CELL, BUSSLINGER_GASTRIC_PPP1R1B_POSITIVE_CELLS, GSE15330_WT_VS_IKAROS_KO_MEGAKARYOCYTE_ERYTHROID_PROGENITOR_DN, MODULE_83, GAVISH_3CA_METAPROGRAM_EPITHELIAL_EPI_2, MARSON_FOXP3_TARGETS_UP, GAVISH_3CA_MALIGNANT_METAPROGRAM_23_SECRETED_2, GAVISH_3CA_METAPROGRAM_EPITHELIAL_STRESS, BUSSLINGER_GASTRIC_MATURE_PIT_CELLS, TIEN_INTESTINE_PROBIOTICS_6HR_UP, NFE2L1_TARGET_GENES, FAN_OVARY_CL13_MONOCYTE_MACROPHAGE, CAVARD_LIVER_CANCER_MALIGNANT_VS_BENIGN, KLF7_TARGET_GENES, NAKAMURA_TUMOR_ZONE_PERIPHERAL_VS_CENTRAL_DN, DESCARTES_FETAL_LUNG_CILIATED_EPITHELIAL_CELLS, GAVISH_3CA_METAPROGRAM_EPITHELIAL_V_ATPASE, RUBENSTEIN_SKELETAL_MUSCLE_PCV_ENDOTHELIAL_CELLS, GAVISH_3CA_MALIGNANT_METAPROGRAM_14_EMT_3, TRAVAGLINI_LUNG_PROXIMAL_BASAL_CELL, CHICAS_RB1_TARGETS_LOW_SERUM, BUSSLINGER_ESOPHAGEAL_LATE_SUPRABASAL_CELLS, OLSSON_E2F3_TARGETS_UP, GSE27786_LIN_NEG_VS_BCELL_DN, MORF_ACTG1, TRAVAGLINI_LUNG_CD8_MEMORY_EFFECTOR_T_CELL, ZHENG_CORD_BLOOD_C7_PUTATIVE_LYMPHOID_PRIMED_MULTIPOTENT_PROGENITOR_1, GNF2_TPT1, RODRIGUES_NTN1_AND_DCC_TARGETS, GSE19888_ADENOSINE_A3R_ACT_VS_TCELL_MEMBRANES_ACT_IN_MAST_CELL_DN, GSE2770_TGFB_AND_IL4_VS_TGFB_AND_IL12_TREATED_ACT_CD4_TCELL_48H_UP, GLI4_TARGET_GENES, FAN_OVARY_CL0_XBP1_SELK_HIGH_STROMAL_CELL, TFCP2_TARGET_GENES, BRACHAT_RESPONSE_TO_CISPLATIN, HE_LIM_SUN_FETAL_LUNG_C1_PROXIMAL_SECRETORY_3_CELL, RUBENSTEIN_SKELETAL_MUSCLE_NK_CELLS, CHANDRAN_METASTASIS_UP, MENON_FETAL_KIDNEY_10_IMMUNE_CELLS, GSE2405_0H_VS_24H_A_PHAGOCYTOPHILUM_STIM_NEUTROPHIL_UP, OSMAN_BLADDER_CANCER_UP, TRAVAGLINI_LUNG_MUCOUS_CELL

normal club cell: KEGG_MEDICUS_REFERENCE_TRANSLATION_INITIATION, TRAVAGLINI_LUNG_CAPILLARY_INTERMEDIATE_2_CELL, DESCARTES_FETAL_EYE_CORNEAL_AND_CONJUNCTIVAL_EPITHELIAL_CELLS, GAVISH_3CA_METAPROGRAM_EPITHELIAL_SECRETED, DURANTE_ADULT_OLFACTORY_NEUROEPITHELIUM_SUSTENTACULAR_CELLS, LAKE_ADULT_KIDNEY_C18_COLLECTING_DUCT_PRINCIPAL_CELLS_MEDULLA, GAVISH_3CA_MALIGNANT_METAPROGRAM_23_SECRETED_2, TRAVAGLINI_LUNG_MUCOUS_CELL, HE_LIM_SUN_FETAL_LUNG_C1_PROXIMAL_SECRETORY_1_CELL, LAKE_ADULT_KIDNEY_C10_THIN_ASCENDING_LIMB, HE_LIM_SUN_FETAL_LUNG_C1_CLUB_CELL, HE_LIM_SUN_FETAL_LUNG_C1_PROXIMAL_SECRETORY_3_CELL, LAKE_ADULT_KIDNEY_C8_DECENDING_THIN_LIMB, MODULE_164, LAKE_ADULT_KIDNEY_C9_THIN_ASCENDING_LIMB, SENGUPTA_NASOPHARYNGEAL_CARCINOMA_DN, HE_LIM_SUN_FETAL_LUNG_C1_PROXIMAL_SECRETORY_2_CELL, FAN_OVARY_CL12_T_LYMPHOCYTE_NK_CELL_2, DURANTE_ADULT_OLFACTORY_NEUROEPITHELIUM_RESPIRATORY_EPITHELIAL_CELLS, LAKE_ADULT_KIDNEY_C19_COLLECTING_DUCT_INTERCALATED_CELLS_TYPE_A_MEDULLA, GSE17580_UNINFECTED_VS_S_MANSONI_INF_TREG_UP, GSE3982_DC_VS_CENT_MEMORY_CD4_TCELL_DN, LIU_PROSTATE_CANCER_DN, GSE24671_CTRL_VS_BAKIMULC_INFECTED_MOUSE_SPLENOCYTES_DN, SCHLINGEMANN_SKIN_CARCINOGENESIS_TPA_UP, WP_PROSTAGLANDIN_SYNTHESIS_AND_REGULATION, GOMF_PHOSPHOLIPASE_INHIBITOR_ACTIVITY, MODULE_433, GOMF_PHOSPHOLIPASE_A2_INHIBITOR_ACTIVITY, MODULE_418, NAKAMURA_BRONCHIAL_AND_BRONCHIOLAR_EPITHELIA, GSE5099_MONOCYTE_VS_ALTERNATIVE_M2_MACROPHAGE_UP, NAKAMURA_LUNG_CANCER_DIFFERENTIATION_MARKERS, LAKE_ADULT_KIDNEY_C12_THICK_ASCENDING_LIMB, DESCARTES_FETAL_HEART_ELF3_AGBL2_POSITIVE_CELLS, MENON_FETAL_KIDNEY_8_CONNECTING_TUBULE_CELLS, GOBP_RESPONSE_TO_OZONE, GSE22033_WT_VS_PPARG_KO_MEF_DN, TRAVAGLINI_LUNG_EREG_DENDRITIC_CELL, MODULE_448, MODULE_404, KRAS.50_UP.V1_DN, GSE21380_NON_TFH_VS_TFH_CD4_TCELL_DN, GOBP_NEGATIVE_REGULATION_OF_INTERLEUKIN_4_PRODUCTION, GSE22229_UNTREATED_VS_IMMUNOSUPP_THERAPY_RENAL_TRANSPLANT_PATIENT_PBMC_UP, DURANTE_ADULT_OLFACTORY_NEUROEPITHELIUM_RESPIRATORY_SECRETORY_CELLS, GSE37532_VISCERAL_ADIPOSE_TISSUE_VS_LN_DERIVED_TREG_CD4_TCELL_UP, GOMF_ENZYME_INHIBITOR_ACTIVITY, GSE3982_DC_VS_MAC_UP, GSE6674_UNSTIM_VS_ANTI_IGM_STIM_BCELL_DN, RUBENSTEIN_SKELETAL_MUSCLE_MYELOID_CELLS, GSE24574_BCL6_LOW_TFH_VS_NAIVE_CD4_TCELL_DN, DLX6_TARGET_GENES, PID_HNF3A_PATHWAY, CTIP_DN.V1_DN, HSIAO_HOUSEKEEPING_GENES, GSE24574_BCL6_HIGH_TFH_VS_TFH_CD4_TCELL_UP, GSE15624_CTRL_VS_6H_HALOFUGINONE_TREATED_CD4_TCELL_UP, GSE36392_EOSINOPHIL_VS_NEUTROPHIL_IL25_TREATED_LUNG_UP, GSE19512_NAUTRAL_VS_INDUCED_TREG_UP, MENON_FETAL_KIDNEY_4_PODOCYTES, GSE5679_CTRL_VS_PPARG_LIGAND_ROSIGLITAZONE_AND_RARA_AGONIST_AM580_TREATED_DC_DN, GSE28737_WT_VS_BCL6_KO_MARGINAL_ZONE_BCELL_UP, GSE22025_UNTREATED_VS_PROGESTERONE_TREATED_CD4_TCELL_UP, LAKE_ADULT_KIDNEY_C23_ENDOTHELIAL_CELLS_AVR, ZHENG_CORD_BLOOD_C2_PUTATIVE_BASOPHIL_EOSINOPHIL_MAST_CELL_PROGENITOR, GSE46606_DAY1_VS_DAY3_CD40L_IL2_IL5_STIMULATED_IRF4_KO_BCELL_UP, AIZARANI_LIVER_C24_EPCAM_POS_BILE_DUCT_CELLS_3, LAKE_ADULT_KIDNEY_C7_PROXIMAL_TUBULE_EPITHELIAL_CELLS_S3, GSE14000_4H_VS_16H_LPS_DC_UP, GSE35543_IN_VIVO_NTREG_VS_CONVERTED_EX_ITREG_DN, KRAS.LUNG_UP.V1_DN, KRAS.300_UP.V1_DN, GSE36476_YOUNG_VS_OLD_DONOR_MEMORY_CD4_TCELL_16H_TSST_ACT_UP, TAKEDA_TARGETS_OF_NUP98_HOXA9_FUSION_16D_DN, GSE41176_UNSTIM_VS_ANTI_IGM_STIM_TAK1_KO_BCELL_6H_DN, DODD_NASOPHARYNGEAL_CARCINOMA_UP, GSE42021_CD24INT_TREG_VS_CD24INT_TCONV_THYMUS_DN, GSE19401_PAM2CSK4_VS_RETINOIC_ACID_AND_PAM2CSK4_STIM_FOLLICULAR_DC_UP, HALLMARK_PEROXISOME, GOMF_MOLECULAR_FUNCTION_INHIBITOR_ACTIVITY, YAMASHITA_METHYLATED_IN_PROSTATE_CANCER, GSE9650_EXHAUSTED_VS_MEMORY_CD8_TCELL_UP, GSE2585_CTEC_VS_THYMIC_MACROPHAGE_DN, GSE42021_TCONV_PLN_VS_CD24LO_TCONV_THYMUS_UP, MODULE_6, TTCYRGAA_UNKNOWN, GSE42021_TCONV_PLN_VS_CD24INT_TCONV_THYMUS_UP, ATXN7L3_TARGET_GENES, GAVISH_3CA_METAPROGRAM_EPITHELIAL_EPI_2, GSE4984_UNTREATED_VS_GALECTIN1_TREATED_DC_UP, GOBP_RESPONSE_TO_FIBROBLAST_GROWTH_FACTOR, GOBP_EMBRYO_IMPLANTATION

normal lung neuroendocrine cell: KEGG_MEDICUS_REFERENCE_TRANSLATION_INITIATION, FAN_OVARY_CL12_T_LYMPHOCYTE_NK_CELL_2, MENON_FETAL_KIDNEY_8_CONNECTING_TUBULE_CELLS, MENON_FETAL_KIDNEY_4_PODOCYTES, ZHENG_CORD_BLOOD_C2_PUTATIVE_BASOPHIL_EOSINOPHIL_MAST_CELL_PROGENITOR, IRX2_TARGET_GENES, BRACHAT_RESPONSE_TO_CISPLATIN, MARSON_FOXP3_TARGETS_UP, FAN_OVARY_CL16_LYMPHATIC_ENDOTHELIAL_CELL, GSE15330_WT_VS_IKAROS_KO_MEGAKARYOCYTE_ERYTHROID_PROGENITOR_DN, OLSSON_E2F3_TARGETS_UP, ATXN7L3_TARGET_GENES, GAVISH_3CA_METAPROGRAM_EPITHELIAL_STRESS, GAVISH_3CA_METAPROGRAM_EPITHELIAL_V_ATPASE, TFCP2_TARGET_GENES, GSE19888_ADENOSINE_A3R_ACT_VS_TCELL_MEMBRANES_ACT_IN_MAST_CELL_DN, ZHENG_CORD_BLOOD_C7_PUTATIVE_LYMPHOID_PRIMED_MULTIPOTENT_PROGENITOR_1, PARK_HSC_VS_MULTIPOTENT_PROGENITORS_UP, CHANDRAN_METASTASIS_TOP50_DN, STAMBOLSKY_TARGETS_OF_MUTATED_TP53_UP, CHICAS_RB1_TARGETS_LOW_SERUM, GOBP_POSITIVE_REGULATION_OF_MYOBLAST_PROLIFERATION, TAF9B_TARGET_GENES, RODRIGUES_NTN1_AND_DCC_TARGETS, CAVARD_LIVER_CANCER_MALIGNANT_VS_BENIGN, CHANDRAN_METASTASIS_UP, RUBENSTEIN_SKELETAL_MUSCLE_FAP_CELLS, MAP2K1_TARGET_GENES, GSE16450_CTRL_VS_IFNA_6H_STIM_MATURE_NEURON_CELL_LINE_DN, GAVISH_3CA_MALIGNANT_METAPROGRAM_41_UNASSIGNED, GSE27786_LIN_NEG_VS_BCELL_DN, GAVISH_3CA_METAPROGRAM_FIBROBLASTS_CAF_4, GSE4984_UNTREATED_VS_GALECTIN1_TREATED_DC_UP, ZNF236_TARGET_GENES, DLX6_TARGET_GENES, STAMBOLSKY_RESPONSE_TO_VITAMIN_D3_UP, TRAVAGLINI_LUNG_CD8_MEMORY_EFFECTOR_T_CELL, HOWLIN_CITED1_TARGETS_2_UP, CEBPE_TARGET_GENES, GSE27786_LSK_VS_NEUTROPHIL_DN, GAVISH_3CA_MALIGNANT_METAPROGRAM_10_PROTEIN_MATURATION, BRUINS_UVC_RESPONSE_VIA_TP53_GROUP_C, GAVISH_3CA_METAPROGRAM_MACROPHAGES_STRESS_HSP, GSE8685_IL2_STARVED_VS_IL15_ACT_IL2_STARVED_CD4_TCELL_UP, GSE22589_HEALTHY_VS_SIV_INFECTED_DC_UP, GSE17721_CTRL_VS_POLYIC_1H_BMDC_DN, GSE27786_LSK_VS_CD4_TCELL_DN, SOX11_TARGET_GENES, GSE5589_WT_VS_IL10_KO_LPS_STIM_MACROPHAGE_45MIN_DN, ZSCAN5C_TARGET_GENES, GSE7509_FCGRIIB_VS_TNFA_IL1B_IL6_PGE_STIM_DC_DN, GSE40273_EOS_KO_VS_WT_TREG_UP, GSE2770_TGFB_AND_IL4_VS_TGFB_AND_IL12_TREATED_ACT_CD4_TCELL_48H_UP, FURUKAWA_DUSP6_TARGETS_PCI35_UP, IWANAGA_CARCINOGENESIS_BY_KRAS_DN, HOWLIN_CITED1_TARGETS_1_UP, MXD1_TARGET_GENES, GSE37416_CTRL_VS_48H_F_TULARENSIS_LVS_NEUTROPHIL_DN, WHITFIELD_CELL_CYCLE_G2, GSE27786_LSK_VS_BCELL_DN, HERNANDEZ_MITOTIC_ARREST_BY_DOCETAXEL_1_UP, GSE5099_UNSTIM_VS_MCSF_TREATED_MONOCYTE_DAY3_UP, GSE14308_TH1_VS_TH17_DN, TRAVAGLINI_LUNG_PROXIMAL_CILIATED_CELL, GAVIN_FOXP3_TARGETS_CLUSTER_P3, GSE23308_CTRL_VS_CORTICOSTERONE_TREATED_MACROPHAGE_MINERALCORTICOID_REC_KO_DN, MCBRYAN_PUBERTAL_BREAST_4_5WK_DN, PKCA_DN.V1_DN, GSE41867_DAY6_VS_DAY8_LCMV_CLONE13_EFFECTOR_CD8_TCELL_DN, TRAVAGLINI_LUNG_CD8_NAIVE_T_CELL, GSE5542_UNTREATED_VS_IFNA_AND_IFNG_TREATED_EPITHELIAL_CELLS_24H_UP, FAN_OVARY_CL17_PUTATIVE_APOPTOTIC_SMOOTH_MUSCLE_CELL, GSE42724_B1_BCELL_VS_PLASMABLAST_UP, GSE17721_POLYIC_VS_GARDIQUIMOD_16H_BMDC_UP, OSMAN_BLADDER_CANCER_UP, GSE11924_TH1_VS_TH17_CD4_TCELL_UP, GSE5589_LPS_AND_IL10_VS_LPS_AND_IL6_STIM_IL10_KO_MACROPHAGE_45MIN_UP, GSE17721_LPS_VS_POLYIC_4H_BMDC_DN, GSE25087_FETAL_VS_ADULT_TCONV_DN, GSE32986_UNSTIM_VS_GMCSF_AND_CURDLAN_HIGHDOSE_STIM_DC_DN, EPC1_TARGET_GENES, GSE13493_DP_VS_CD4INTCD8POS_THYMOCYTE_UP, GAUTAM_EYE_IRIS_CILIARY_BODY_PUTATIVE_STEM_CELLS, LAKE_ADULT_KIDNEY_C18_COLLECTING_DUCT_PRINCIPAL_CELLS_MEDULLA, GSE32901_NAIVE_VS_TH17_ENRICHED_CD4_TCELL_UP, ZHENG_CORD_BLOOD_C10_MULTILYMPHOID_PROGENITOR, GSE17721_LPS_VS_POLYIC_1H_BMDC_DN, CIC_TARGET_GENES, COATES_MACROPHAGE_M1_VS_M2_UP, NAKAYA_PBMC_FLUARIX_FLUVIRIN_AGE_18_50YO_CORRELATED_WITH_HAI_28DY_RESPONSE_AT_3DY_NEGATIVE, MCBRYAN_PUBERTAL_BREAST_5_6WK_DN, GSE17721_LPS_VS_POLYIC_1H_BMDC_DN, LAKE_ADULT_KIDNEY_C18_COLLECTING_DUCT_PRINCIPAL_CELLS_MEDULLA, BLANCO_MELO_COVID19_SARS_COV_2_LOW_MOI_INFECTION_A594_ACE2_EXPRESSING_CELLS_UP

normal pulmonary ionocyte: KEGG_MEDICUS_REFERENCE_TRANSLATION_INITIATION, FAN_OVARY_CL12_T_LYMPHOCYTE_NK_CELL_2, ZHENG_CORD_BLOOD_C7_PUTATIVE_LYMPHOID_PRIMED_MULTIPOTENT_PROGENITOR_1, BRACHAT_RESPONSE_TO_CISPLATIN, ZHENG_CORD_BLOOD_C2_PUTATIVE_BASOPHIL_EOSINOPHIL_MAST_CELL_PROGENITOR, IRX2_TARGET_GENES, MENON_FETAL_KIDNEY_4_PODOCYTES, GSE19888_ADENOSINE_A3R_ACT_VS_TCELL_MEMBRANES_ACT_IN_MAST_CELL_DN, RODRIGUES_NTN1_AND_DCC_TARGETS, CHANDRAN_METASTASIS_UP, OLSSON_E2F3_TARGETS_UP, TAF9B_TARGET_GENES, GAVISH_3CA_METAPROGRAM_EPITHELIAL_STRESS, GAVISH_3CA_METAPROGRAM_EPITHELIAL_V_ATPASE, PARK_HSC_VS_MULTIPOTENT_PROGENITORS_UP, GAVISH_3CA_METAPROGRAM_FIBROBLASTS_CAF_4, ZNF236_TARGET_GENES, TFCP2_TARGET_GENES, CHICAS_RB1_TARGETS_LOW_SERUM, DLX6_TARGET_GENES, GAVISH_3CA_MALIGNANT_METAPROGRAM_41_UNASSIGNED, STAMBOLSKY_TARGETS_OF_MUTATED_TP53_UP, CAVARD_LIVER_CANCER_MALIGNANT_VS_BENIGN, ATXN7L3_TARGET_GENES, GOBP_POSITIVE_REGULATION_OF_MYOBLAST_PROLIFERATION, MAP2K1_TARGET_GENES, CHANDRAN_METASTASIS_TOP50_DN, STAMBOLSKY_RESPONSE_TO_VITAMIN_D3_UP, TRAVAGLINI_LUNG_CD8_MEMORY_EFFECTOR_T_CELL, MARSON_FOXP3_TARGETS_UP, MENON_FETAL_KIDNEY_8_CONNECTING_TUBULE_CELLS, GSE27786_LIN_NEG_VS_BCELL_DN, GSE27786_LSK_VS_NEUTROPHIL_DN, GSE27786_LSK_VS_CD4_TCELL_DN, GAVISH_3CA_MALIGNANT_METAPROGRAM_10_PROTEIN_MATURATION, SOX11_TARGET_GENES, MCBRYAN_PUBERTAL_BREAST_4_5WK_DN, GSE22589_HEALTHY_VS_SIV_INFECTED_DC_UP, RUBENSTEIN_SKELETAL_MUSCLE_FAP_CELLS, HOWLIN_CITED1_TARGETS_2_UP, BRUINS_UVC_RESPONSE_VIA_TP53_GROUP_C, ZSCAN5C_TARGET_GENES, GSE4984_UNTREATED_VS_GALECTIN1_TREATED_DC_UP, GSE8685_IL2_STARVED_VS_IL15_ACT_IL2_STARVED_CD4_TCELL_UP, CEBPE_TARGET_GENES, GSE7509_FCGRIIB_VS_TNFA_IL1B_IL6_PGE_STIM_DC_DN, GSE5589_WT_VS_IL10_KO_LPS_STIM_MACROPHAGE_45MIN_DN, GSE15330_WT_VS_IKAROS_KO_MEGAKARYOCYTE_ERYTHROID_PROGENITOR_DN, GAVISH_3CA_METAPROGRAM_MACROPHAGES_STRESS_HSP, GSE17721_CTRL_VS_POLYIC_1H_BMDC_DN, GSE2770_TGFB_AND_IL4_VS_TGFB_AND_IL12_TREATED_ACT_CD4_TCELL_48H_UP, GSE37416_CTRL_VS_48H_F_TULARENSIS_LVS_NEUTROPHIL_DN, MXD1_TARGET_GENES, GSE40273_EOS_KO_VS_WT_TREG_UP, IWANAGA_CARCINOGENESIS_BY_KRAS_DN, FAN_OVARY_CL16_LYMPHATIC_ENDOTHELIAL_CELL, HOWLIN_CITED1_TARGETS_1_UP, HERNANDEZ_MITOTIC_ARREST_BY_DOCETAXEL_1_UP, GSE16450_CTRL_VS_IFNA_6H_STIM_MATURE_NEURON_CELL_LINE_DN, GSE14308_TH1_VS_TH17_DN, GSE27786_LSK_VS_BCELL_DN, FURUKAWA_DUSP6_TARGETS_PCI35_UP, PKCA_DN.V1_DN, WHITFIELD_CELL_CYCLE_G2, GSE5542_UNTREATED_VS_IFNA_AND_IFNG_TREATED_EPITHELIAL_CELLS_24H_UP, GSE5099_UNSTIM_VS_MCSF_TREATED_MONOCYTE_DAY3_UP, GSE41867_DAY6_VS_DAY8_LCMV_CLONE13_EFFECTOR_CD8_TCELL_DN, GSE23308_CTRL_VS_CORTICOSTERONE_TREATED_MACROPHAGE_MINERALCORTICOID_REC_KO_DN, GSE13493_DP_VS_CD4INTCD8POS_THYMOCYTE_UP, TRAVAGLINI_LUNG_CD8_NAIVE_T_CELL, GSE11924_TH1_VS_TH17_CD4_TCELL_UP, CIC_TARGET_GENES, GSE42724_B1_BCELL_VS_PLASMABLAST_UP, MCBRYAN_PUBERTAL_BREAST_5_6WK_DN, FAN_OVARY_CL17_PUTATIVE_APOPTOTIC_SMOOTH_MUSCLE_CELL, ZHENG_CORD_BLOOD_C10_MULTILYMPHOID_PROGENITOR, NCOA6_TARGET_GENES, GSE25087_FETAL_VS_ADULT_TCONV_DN, BLANCO_MELO_COVID19_SARS_COV_2_LOW_MOI_INFECTION_A594_ACE2_EXPRESSING_CELLS_UP, COATES_MACROPHAGE_M1_VS_M2_UP, GSE43863_DAY6_EFF_VS_DAY150_MEM_LY6C_INT_CXCR5POS_CD4_TCELL_UP, GSE17721_LPS_VS_POLYIC_4H_BMDC_DN, GSE17721_POLYIC_VS_GARDIQUIMOD_16H_BMDC_UP, OSMAN_BLADDER_CANCER_UP, KLF7_TARGET_GENES, GSE32901_NAIVE_VS_TH17_ENRICHED_CD4_TCELL_UP, GAVIN_FOXP3_TARGETS_CLUSTER_P3, NAKAYA_PBMC_FLUARIX_FLUVIRIN_AGE_18_50YO_CORRELATED_WITH_HAI_28DY_RESPONSE_AT_3DY_NEGATIVE, GSE5589_LPS_AND_IL10_VS_LPS_AND_IL6_STIM_IL10_KO_MACROPHAGE_45MIN_UP, GSE32986_UNSTIM_VS_GMCSF_AND_CURDLAN_HIGHDOSE_STIM_DC_DN, EPC1_TARGET_GENES, GSE5099_DAY3_VS_DAY7_MCSF_TREATED_MACROPHAGE_DN, STARK_PREFRONTAL_CORTEX_22Q11_DELETION_UP

normal somatic cell: KEGG_MEDICUS_REFERENCE_TRANSLATION_INITIATION, TRAVAGLINI_LUNG_CAPILLARY_INTERMEDIATE_2_CELL, LAKE_ADULT_KIDNEY_C18_COLLECTING_DUCT_PRINCIPAL_CELLS_MEDULLA, LAKE_ADULT_KIDNEY_C10_THIN_ASCENDING_LIMB, LAKE_ADULT_KIDNEY_C8_DECENDING_THIN_LIMB, LAKE_ADULT_KIDNEY_C19_COLLECTING_DUCT_INTERCALATED_CELLS_TYPE_A_MEDULLA, LAKE_ADULT_KIDNEY_C9_THIN_ASCENDING_LIMB, DESCARTES_FETAL_EYE_CORNEAL_AND_CONJUNCTIVAL_EPITHELIAL_CELLS, LAKE_ADULT_KIDNEY_C12_THICK_ASCENDING_LIMB, RUBENSTEIN_SKELETAL_MUSCLE_MYELOID_CELLS, HSIAO_HOUSEKEEPING_GENES, FAN_OVARY_CL12_T_LYMPHOCYTE_NK_CELL_2, PECE_MAMMARY_STEM_CELL_UP, LAKE_ADULT_KIDNEY_C7_PROXIMAL_TUBULE_EPITHELIAL_CELLS_S3, GAVISH_3CA_METAPROGRAM_EPITHELIAL_SECRETED, TIEN_INTESTINE_PROBIOTICS_24HR_DN, BUSSLINGER_DUODENAL_STEM_CELLS, LAKE_ADULT_KIDNEY_C23_ENDOTHELIAL_CELLS_AVR, DURANTE_ADULT_OLFACTORY_NEUROEPITHELIUM_SUSTENTACULAR_CELLS, DLX6_TARGET_GENES, BUSSLINGER_GASTRIC_ISTHMUS_CELLS, BUSSLINGER_DUODENAL_DIFFERENTIATING_STEM_CELLS, BUSSLINGER_GASTRIC_LYZ_POSITIVE_CELLS, RUBENSTEIN_SKELETAL_MUSCLE_SATELLITE_CELLS, RUBENSTEIN_SKELETAL_MUSCLE_FBN1_FAP_CELLS, TRAVAGLINI_LUNG_MESOTHELIAL_CELL, TRAVAGLINI_LUNG_PROXIMAL_BASAL_CELL, GAVISH_3CA_MALIGNANT_METAPROGRAM_23_SECRETED_2, AIZARANI_LIVER_C24_EPCAM_POS_BILE_DUCT_CELLS_3, BUSSLINGER_GASTRIC_PPP1R1B_POSITIVE_CELLS, SENGUPTA_NASOPHARYNGEAL_CARCINOMA_DN, MENON_FETAL_KIDNEY_8_CONNECTING_TUBULE_CELLS, BUSSLINGER_ESOPHAGEAL_LATE_SUPRABASAL_CELLS, HE_LIM_SUN_FETAL_LUNG_C1_PROXIMAL_SECRETORY_3_CELL, TRAVAGLINI_LUNG_MUCOUS_CELL, MODULE_164, BUSSLINGER_GASTRIC_IMMATURE_PIT_CELLS, GNF2_TPT1, GSE3982_DC_VS_CENT_MEMORY_CD4_TCELL_DN, HE_LIM_SUN_FETAL_LUNG_C1_PROXIMAL_SECRETORY_1_CELL, BUSSLINGER_DUODENAL_TRANSIT_AMPLIFYING_CELLS, FAN_OVARY_CL2_PUTATIVE_EARLY_ATRETIC_FOLLICLE_THECAL_CELL_1, TIEN_INTESTINE_PROBIOTICS_6HR_UP, HE_LIM_SUN_FETAL_LUNG_C1_CLUB_CELL, GAVISH_3CA_METAPROGRAM_EPITHELIAL_EPI_2, MENON_FETAL_KIDNEY_6_COLLECTING_DUCT_CELLS, MXD1_TARGET_GENES, RUBENSTEIN_SKELETAL_MUSCLE_PCV_ENDOTHELIAL_CELLS, TRAVAGLINI_LUNG_CLUB_CELL, NAKAMURA_TUMOR_ZONE_PERIPHERAL_VS_CENTRAL_DN, DODD_NASOPHARYNGEAL_CARCINOMA_UP, MENON_FETAL_KIDNEY_4_PODOCYTES, GSE24671_CTRL_VS_BAKIMULC_INFECTED_MOUSE_SPLENOCYTES_DN, HE_LIM_SUN_FETAL_LUNG_C1_PROXIMAL_SECRETORY_2_CELL, TAF9B_TARGET_GENES, MODULE_83, MORF_ACTG1, FAN_OVARY_CL0_XBP1_SELK_HIGH_STROMAL_CELL, TRAVAGLINI_LUNG_EREG_DENDRITIC_CELL, RUBENSTEIN_SKELETAL_MUSCLE_T_CELLS, DESCARTES_FETAL_LUNG_SQUAMOUS_EPITHELIAL_CELLS, ATXN7L3_TARGET_GENES, TRAVAGLINI_LUNG_PROXIMAL_CILIATED_CELL, MORF_NPM1, TRAVAGLINI_LUNG_DIFFERENTIATING_BASAL_CELL, GSE16450_CTRL_VS_IFNA_6H_STIM_MATURE_NEURON_CELL_LINE_DN, BUSSLINGER_GASTRIC_MATURE_PIT_CELLS, ZFHX3_TARGET_GENES, GSE4984_UNTREATED_VS_GALECTIN1_TREATED_DC_UP, GAVISH_3CA_MALIGNANT_METAPROGRAM_14_EMT_3, MURARO_PANCREAS_ACINAR_CELL, FAN_OVARY_CL13_MONOCYTE_MACROPHAGE, FAN_OVARY_CL16_LYMPHATIC_ENDOTHELIAL_CELL, ZHENG_CORD_BLOOD_C2_PUTATIVE_BASOPHIL_EOSINOPHIL_MAST_CELL_PROGENITOR, DURANTE_ADULT_OLFACTORY_NEUROEPITHELIUM_RESPIRATORY_COLUMNAR_CELLS, MENON_FETAL_KIDNEY_10_IMMUNE_CELLS, FAN_OVARY_CL4_T_LYMPHOCYTE_NK_CELL_1, RUBENSTEIN_SKELETAL_MUSCLE_B_CELLS, IRX2_TARGET_GENES, GSE2405_0H_VS_24H_A_PHAGOCYTOPHILUM_STIM_NEUTROPHIL_UP, GSE15930_STIM_VS_STIM_AND_TRICHOSTATINA_48H_CD8_T_CELL_UP, GSE14000_TRANSLATED_RNA_VS_MRNA_4H_LPS_DC_DN, DURANTE_ADULT_OLFACTORY_NEUROEPITHELIUM_RESPIRATORY_EPITHELIAL_CELLS, WP_PROSTAGLANDIN_SYNTHESIS_AND_REGULATION, NFE2L1_TARGET_GENES, GSE37416_CTRL_VS_48H_F_TULARENSIS_LVS_NEUTROPHIL_DN, GAVISH_3CA_MALIGNANT_METAPROGRAM_19_EPITHELIAL_SENESCENCE, GNATENKO_PLATELET_SIGNATURE, RUBENSTEIN_SKELETAL_MUSCLE_FAP_CELLS, GOMF_ENZYME_INHIBITOR_ACTIVITY, MORF_TPT1, MODULE_114, RUBENSTEIN_SKELETAL_MUSCLE_NK_CELLS, GAVISH_3CA_MALIGNANT_METAPROGRAM_14_EMT_3, DURANTE_ADULT_OLFACTORY_NEUROEPITHELIUM_RESPIRATORY_EPITHELIAL_CELLS, MODULE_114, HE_LIM_SUN_FETAL_LUNG_C1_MID_BASAL_CELL, AIZARANI_LIVER_C7_EPCAM_POS_BILE_DUCT_CELLS_2, MANNO_MIDBRAIN_NEUROTYPES_BASAL, FAN_OVARY_CL10_PUTATIVE_EARLY_ATRESIA_GRANULOSA_CELL

"""

In [73]:
dataset_name = [k for k,v in meta_data.items() if f'({dataset_index})' in v.get('filename', '')]

if len(dataset_name) != 1:
    print("dataset index: ", dataset_index)
    print(dataset_name)
    print(f"dataset \"{dataset_name}\" not in meta_data!")
else:
    dataset_name = dataset_name[0]
    cur_url = meta_data[dataset_name]['url']
    dataset_file_path = [path for path in datasets_files_paths if f"({dataset_index})" in path][0]
    dataset_df = pd.read_csv(dataset_file_path)
    dataset_in_prompt, sampled_indices = dataset_for_prompt(dataset_df)
    print("Dataset Index: ", dataset_index)
    print("\nDataset Name: ", dataset_name)
    print("\nPublication URL: ", cur_url)
    print("\nPrompt:\n", prompt_prefix + "Dataset: \n" + dataset_in_prompt + "Geneset Enrichment Results:\n" + filtered_geneset + "Manuscript:\n")

Dataset Index:  778

Dataset Name:  Single-cell longitudinal analysis of SARS-CoV-2 infection in human bronchial epithelial cells

Publication URL:  https://cellxgene.cziscience.com/collections/2a79d190-a41e-4408-88c8-ac5c4d03c0fc

Prompt:
 This is a scientific manuscript, an example of the single-cell RNA sequencing dataset it is associated with, and results of Gene Set Enrichment Analysis to that dataset. The example dataset contains representative cells from each type and the corresponding cell sentence(ranked gene names by expression). The gene set enrichment analysis results are the top enriched gene sets for each cell type in the dataset.
You need to create question-answer pairs for Large Language Models to learn reasoning and analyzing gene expression datasets when given pieces of data converted into cell sentences. I would like questions and answers and reasonings about the given cell sentence in terms of cells, tissues, disease, or other relevant biological information. 
The q

Just copy paste the output into the next cell, shouldn't need to do additional processing

In [74]:
output = """
<|Context|>In {ciliated cell of the bronchus gene expression} with COVID-19, numerous mitochondrial and ribosomal genes appear highly ranked. Some enriched gene sets include TRAVAGLINI_LUNG_CILIATED_CELL and KEGG_MEDICUS_REFERENCE_TRANSLATION_INITIATION. <|Question|>How might the observed high expression of ribosomal genes in these infected ciliated cells relate to viral replication? <|Chain of Thoughts|>These ciliated cells show strong expression of ribosomal transcripts, aligning with a gene set promoting translation initiation. This could enhance the formation of viral proteins. Because KEGG_MEDICUS_REFERENCE_TRANSLATION_INITIATION points to active protein biosynthesis, cells that upregulate ribosomal machinery may facilitate robust viral replication. <|Answer|>Infected ciliated cells with heightened ribosomal gene expression are more capable of generating viral proteins, promoting efficient replication of the pathogen. By leveraging an active protein synthesis environment, viral components can be rapidly produced, supporting robust infection cycles within these cells. <|Keyword|>ribosomal genes, KEGG_MEDICUS_REFERENCE_TRANSLATION_INITIATION

<|Context|>In {pulmonary ionocyte cell gene expression} from a COVID-19 sample, genes like MT-ND2 and MT-ND1 appear among the top. Gene sets such as IRX2_TARGET_GENES are enriched. <|Question|>Could increased mitochondrial gene expression in these pulmonary ionocytes indicate heightened metabolic activity supporting cellular stress responses? <|Chain of Thoughts|>Mitochondrial genes like MT-ND2 and MT-ND1 suggest robust oxidative phosphorylation. IRX2_TARGET_GENES enrichment implies possible regulatory pathways. If metabolic processes rise, the cell may respond more vigorously to infection-induced stress, potentially influencing ion transport functions central to ionocytes. <|Answer|>Yes. Elevated mitochondrial gene expression indicates a surge in ATP production and energy metabolism, likely aiding stress responses and sustaining the ionocyte’s specialized roles in ion homeostasis. This metabolic boost may be critical for adapting to the demands of viral infection. <|Keyword|>yes, MT-ND2, IRX2_TARGET_GENES

<|Context|>In {bronchial goblet cell gene expression} classified as normal, numerous ribosomal proteins (RPL41, RPS27) rank highly. The gene set GSE27786_LIN_NEG_VS_BCELL_DN is also enriched. <|Question|>Do these normal goblet cells show any indication of heightened immune signaling based on the expression of pro-inflammatory mediators? <|Chain of Thoughts|>This subset predominantly shows housekeeping ribosomal transcripts. GSE27786_LIN_NEG_VS_BCELL_DN references lymphocyte dynamics but does not strongly imply an active immune response. No major cytokines like IL-6 or TNF appear at high rank. Hence, minimal inflammatory signaling is likely. <|Answer|>No. The goblet cells here mainly exhibit structural and housekeeping gene expression patterns, with no marked pro-inflammatory transcripts. This implies a baseline or resting state rather than an immune-alert condition in normal goblet cells. <|Keyword|>no, goblet cells, ribosomal proteins

<|Context|>In {lung neuroendocrine cell gene expression} under normal conditions, transcripts like TFF3 and IGFBP5 dominate. Gene sets like TAF9B_TARGET_GENES are enriched. <|Question|>What could the high presence of TFF3 suggest about the protective or secretory role of these neuroendocrine cells in normal airway tissue? <|Chain of Thoughts|>TFF3 is often linked to mucosal protection and repair. Neuroendocrine cells expressing TFF3 may help maintain airway lining integrity. TAF9B_TARGET_GENES might modulate transcription, supporting secretory functions. Together, these factors imply a supportive role in homeostasis. <|Answer|>Elevated TFF3 expression in neuroendocrine cells can reinforce barrier stability and foster protective secretory functions. By releasing factors that stabilize mucosal surfaces, these cells contribute to the airway’s normal defense and repair mechanisms. <|Keyword|>TFF3, neuroendocrine, secretory function

<|Context|>In {somatic cell gene expression} with COVID-19, we see S100A6, SCGB1A1, and cilia-related transcripts. Gene sets like GAVISH_3CA_METAPROGRAM_EPITHELIAL_SECRETED show enrichment. <|Question|>Might SCGB1A1 upregulation in these somatic cells correlate with secretory processes aiding mucosal protection during infection? <|Chain of Thoughts|>SCGB1A1 (secretoglobin) is known for immunomodulatory roles. GAVISH_3CA_METAPROGRAM_EPITHELIAL_SECRETED suggests heavy secreted protein activity. This could strengthen antimicrobial defenses or modulate inflammation in COVID-19 airway cells, aiding epithelial barrier defense. <|Answer|>Yes. Increased SCGB1A1 aligns with enhanced secretory and immunoregulatory capabilities, potentially helping the epithelial surface maintain protective barriers under viral threat. This molecule’s known anti-inflammatory properties may help mitigate mucosal damage. <|Keyword|>yes, SCGB1A1, GAVISH_3CA_METAPROGRAM_EPITHELIAL_SECRETED

<|Context|>The {bronchial goblet cell gene expression} with COVID-19 includes MALAT1 and S100A6, with KEGG_MEDICUS_REFERENCE_TRANSLATION_INITIATION listed among top gene sets. <|Question|>Why might these goblet cells have concurrent high S100A6 and translation-initiation signals under viral infection? <|Chain of Thoughts|>S100A6, a calcium-binding protein, can be upregulated in stress or cell-cycle events. Coupled with KEGG_MEDICUS_REFERENCE_TRANSLATION_INITIATION, it indicates robust protein synthesis demands. Likely the cell is ramping up production of secretory or stress-related proteins as part of an antiviral or repair process. <|Answer|>Goblet cells may be reacting to viral stress by elevating protein synthesis pathways. S100A6, often linked to cell survival, may complement heightened translation to produce protective mediators or accelerate cellular regeneration during infection. <|Keyword|>S100A6, KEGG_MEDICUS_REFERENCE_TRANSLATION_INITIATION

<|Context|>In {lung neuroendocrine cell gene expression} with COVID-19, TFF3, GRP, and CALCA stand out. The gene set FAN_OVARY_CL16_LYMPHATIC_ENDOTHELIAL_CELL is enriched unexpectedly. <|Question|>Could the presence of GRP and CALCA in lung neuroendocrine cells indicate a role in modulating vascular or immune interactions during COVID-19? <|Chain of Thoughts|>GRP (gastrin-releasing peptide) and CALCA (calcitonin-related) are neuropeptides that may influence local blood flow or inflammatory cell recruitment. The enriched gene set referencing lymphatic or endothelial contexts suggests crosstalk with vascular compartments, potentially shaping local immune infiltration. <|Answer|>Yes. These neuropeptides may orchestrate tissue perfusion and inflammation, helping regulate immune cell movement and vascular tone. Their expression could be key in bridging neural signals with immune responses in the infected airway environment. <|Keyword|>yes, GRP, CALCA, FAN_OVARY_CL16_LYMPHATIC_ENDOTHELIAL_CELL

<|Context|>In {brush cell of bronchus gene expression} with COVID-19, the top genes include MALAT1, TMSB4X, and SOX4. A major enriched set is DLX6_TARGET_GENES. <|Question|>Does high TMSB4X expression in these infected brush cells point to changes in actin organization possibly linked to secretory modifications? <|Chain of Thoughts|>TMSB4X helps sequester actin monomers. Brush cells rely on specialized microvilli for chemosensory function. DLX6_TARGET_GENES might guide differentiation signals. Elevated TMSB4X could enhance cytoskeletal restructuring during infection, influencing brush cell secretions or sensory responses. <|Answer|>Yes. Increased TMSB4X potentially drives cytoskeletal remodeling, which in turn may affect how brush cells perform their specialized sensory or secretory functions under COVID-19 stress. <|Keyword|>yes, TMSB4X, DLX6_TARGET_GENES

<|Context|>In {ciliated cell of the bronchus gene expression} with normal status, MALAT1 and FTH1 appear frequently, with TRAVAGLINI_LUNG_CILIATED_CELL and HSIAO_HOUSEKEEPING_GENES enriched. <|Question|>What does the consistent presence of FTH1 imply for iron regulation in normal ciliated cells? <|Chain of Thoughts|>FTH1 encodes ferritin heavy chain, crucial for iron storage. TRAVAGLINI_LUNG_CILIATED_CELL set confirms typical ciliated features. FTH1 expression suggests that normal ciliated cells tightly regulate iron homeostasis, possibly preventing oxidative damage and sustaining ciliary integrity. <|Answer|>Normal ciliated cells appear to maintain careful iron storage via FTH1, likely to control oxidative stress and preserve healthy ciliary function vital for airway clearance. <|Keyword|>FTH1, iron regulation, ciliated cells

<|Context|>Within {club cell gene expression} in COVID-19, SCGB3A1 and SLPI stand out, while GAVISH_3CA_METAPROGRAM_EPITHELIAL_SECRETED is enriched. <|Question|>Could SLPI (secretory leukocyte protease inhibitor) elevation suggest an enhanced anti-inflammatory response among these infected club cells? <|Chain of Thoughts|>Club cells secrete protective molecules, and SLPI can neutralize proteolytic enzymes, reducing tissue damage. GAVISH_3CA_METAPROGRAM_EPITHELIAL_SECRETED indicates robust secretory pathways. Thus, increased SLPI might be a protective measure against inflammatory damage, consistent with an anti-inflammatory role. <|Answer|>Yes. Heightened SLPI in club cells likely bolsters the local airway defense, limiting excessive proteolysis and curbing uncontrolled inflammation, thereby safeguarding epithelial integrity during infection. <|Keyword|>yes, SLPI, anti-inflammatory response

<|Context|>In {pulmonary ionocyte cell gene expression} with normal tissue, FTH1 and TMSB4X rank high. Among enriched sets is DLX6_TARGET_GENES. <|Question|>Why might pulmonary ionocytes exhibit both high TMSB4X and strong iron storage signals through FTH1 in a normal environment? <|Chain of Thoughts|>Ionocytes regulate ion transport, requiring stable intracellular conditions. TMSB4X can support cytoskeletal stability. FTH1 ensures iron homeostasis. Together with DLX6_TARGET_GENES, this hints at a specialized strategy to preserve cellular integrity and possibly modulate oxygen or metal balance in normal airways. <|Answer|>Their elevated iron storage and cytoskeletal organization likely promote stable ionic regulation and protect cells from oxidative stress, ensuring a finely tuned environment necessary for normal ionocyte function. <|Keyword|>TMSB4X, FTH1, ionocyte

<|Context|>The {ciliated cell of the bronchus gene expression} from a COVID-19 sample includes S100A11, KRT19, and strong hits in GAVISH_3CA_METAPROGRAM_EPITHELIAL_STRESS. <|Question|>Does upregulated S100A11 in these ciliated cells imply they are undergoing a heightened stress or damage response? <|Chain of Thoughts|>S100A11 modulates cytoskeletal interactions and is often linked to cellular damage or stress. With GAVISH_3CA_METAPROGRAM_EPITHELIAL_STRESS enriched, these ciliated cells likely respond to infection by elevating S100A11 to mitigate or signal injury, possibly adjusting ciliary function. <|Answer|>Yes. The elevated S100A11 suggests a stress response aimed at stabilizing the epithelial barrier, indicating an attempt by ciliated cells to counteract infection-induced damage. <|Keyword|>yes, S100A11, GAVISH_3CA_METAPROGRAM_EPITHELIAL_STRESS

<|Context|>Examining {somatic cell gene expression} under normal conditions reveals strong expression of S100A6 and SCGB1A1, plus GNF2_TPT1 enrichment. <|Question|>What significance could TPT1 hold in these normal somatic cells’ maintenance or secretory roles? <|Chain of Thoughts|>TPT1 (translationally controlled tumor protein) is associated with cell growth and stress tolerance. GNF2_TPT1 suggests it’s actively regulated, possibly stabilizing intracellular processes. Coupled with SCGB1A1’s secretory function, TPT1 may aid in fundamental housekeeping or protective responses. <|Answer|>Elevated TPT1 likely supports core metabolic and stress regulation, helping normal somatic cells maintain stable secretory capabilities and robust cellular housekeeping processes. <|Keyword|>TPT1, secretory roles, normal somatic cell

<|Context|>In {basal cell of epithelium of bronchus gene expression} with COVID-19, we see S100A2, KRT19, and gene sets like CHANDRAN_METASTASIS_UP enriched. <|Question|>Could the presence of S100A2 and KRT19 reflect remodeling of basal cells during bronchial epithelial injury in COVID-19? <|Chain of Thoughts|>S100A2 can modulate cell growth, and KRT19 is a structural keratin. CHANDRAN_METASTASIS_UP might denote reorganization signals. These factors together hint at basal cells transitioning or repairing damage, possibly indicative of a regenerative or altered state. <|Answer|>Yes. Their combined expression aligns with structural changes and reparative behaviors, suggesting basal cells shift toward regeneration or adaptive remodeling under viral assault. <|Keyword|>yes, S100A2, KRT19

<|Context|>Among {bronchial goblet cell gene expression} in normal tissue, MUC5AC and MUC5B are highly ranked, with the gene set GSE22589_HEALTHY_VS_SIV_INFECTED_DC_UP. <|Question|>What role might these abundant mucin transcripts play in protecting the bronchial surface in a normal, non-infected state? <|Chain of Thoughts|>MUC5AC and MUC5B form mucus layers that trap particles and pathogens. Enrichment of immune-related gene sets suggests these goblet cells also coordinate protective responses. In normal conditions, ample mucins maintain a barrier that prevents infiltration of harmful agents. <|Answer|>They create a robust mucus barrier, trapping debris and microbes, thereby preserving epithelial integrity and supporting healthy airway function when no active infection is present. <|Keyword|>MUC5AC, MUC5B, goblet cells

<|Context|>For {brush cell of bronchus gene expression} in normal tissue, we see high MALAT1, FOXJ1, and an enrichment of IRX2_TARGET_GENES. <|Question|>Given FOXJ1 typically marks ciliated cells, why might a brush cell express it in normal airways? <|Chain of Thoughts|>Brush cells can share certain transcription factors with ciliated cells, possibly reflecting partial overlap or transitional states. IRX2_TARGET_GENES might shape epithelial identity. Low-level FOXJ1 in brush cells may suggest a minor ciliary feature or an early transitional marker. <|Answer|>FOXJ1 expression in brush cells can indicate a partial ciliary program or transitional identity. These cells may share certain structural or regulatory traits with ciliated cells, underscoring epithelial diversity. <|Keyword|>FOXJ1, brush cells, transitional marker

<|Context|>For {lung neuroendocrine cell gene expression} under COVID-19, TFF3 and SST appear alongside GAVISH_3CA_METAPROGRAM_EPITHELIAL_STRESS. <|Question|>Could somatostatin (SST) be related to modulating inflammation in these neuroendocrine cells under viral stress? <|Chain of Thoughts|>SST can suppress certain inflammatory processes. With GAVISH_3CA_METAPROGRAM_EPITHELIAL_STRESS enriched, these neuroendocrine cells may release SST to dampen excess immune activity and stabilize local homeostasis during the infection. <|Answer|>Yes. SST produced by neuroendocrine cells can help restrain inflammatory signals, possibly limiting tissue damage during COVID-19 and contributing to a controlled immune environment. <|Keyword|>yes, SST, neuroendocrine, inflammation

<|Context|>In {pulmonary ionocyte cell gene expression} from COVID-19 data, we see S100A6, TPT1, with enriched sets like CHICAS_RB1_TARGETS_LOW_SERUM. <|Question|>Does elevated S100A6 point to higher cell-cycle or stress-related roles in these infected ionocytes? <|Chain of Thoughts|>S100A6 can associate with proliferative or stress pathways. Coupled with CHICAS_RB1_TARGETS_LOW_SERUM, it suggests that Rb1-related signals might be modulated. These ionocytes could be shifting cell-cycle controls to handle infection-induced stress or partial proliferation cues. <|Answer|>Yes. S100A6’s presence may signal cellular adjustments to stress or proliferation demands, indicating the ionocytes are actively modulating growth pathways under viral challenges. <|Keyword|>yes, S100A6, CHICAS_RB1_TARGETS_LOW_SERUM

<|Context|>Within {somatic cell gene expression} in COVID-19, genes such as TPT1, KRT4, KRT13 appear. A relevant gene set is GAVISH_3CA_METAPROGRAM_MACROPHAGES_STRESS_HSP. <|Question|>What might the co-expression of these keratins with heat shock program enrichment imply about epithelial resilience? <|Chain of Thoughts|>KRT4 and KRT13 are often linked to epithelial barrier properties. GAVISH_3CA_METAPROGRAM_MACROPHAGES_STRESS_HSP references stress proteins like HSPs, hinting at protective responses. This synergy suggests stronger epithelial resilience, possibly to reduce cell damage or maintain structure. <|Answer|>The combination of keratin reinforcement and heat shock-related pathways likely strengthens epithelial integrity under viral duress. These cells appear better equipped to maintain structure and limit injury. <|Keyword|>KRT13, GAVISH_3CA_METAPROGRAM_MACROPHAGES_STRESS_HSP

<|Context|>In {club cell gene expression} for normal tissue, SCGB1A1, CYP2F2, and LYPD2 are common, with LAKE_ADULT_KIDNEY_C10_THIN_ASCENDING_LIMB also enriched. <|Question|>Why might there be a kidney-related gene set enrichment in typical club cells of the bronchus? <|Chain of Thoughts|>LAKE_ADULT_KIDNEY_C10_THIN_ASCENDING_LIMB suggests a shared transport or channel expression pattern. Club cells secrete and regulate fluid composition, akin to certain renal tubule processes. This parallel might reflect convergent expression of ion transport genes. <|Answer|>They appear to share transport or channel gene expression with kidney epithelial segments, underscoring functional parallels in fluid and solute regulation across organ systems. <|Keyword|>club cells, LAKE_ADULT_KIDNEY_C10_THIN_ASCENDING_LIMB, transport

<|Context|>Focusing on {ciliated cell of the bronchus gene expression} under COVID-19, there is notable expression of S100P, DYNLL1, and TUBA1A, with TRAVAGLINI_LUNG_PROXIMAL_CILIATED_CELL set. <|Question|>Could the presence of TUBA1A and DYNLL1 in infected cells reflect cytoskeletal reorganization important for viral assembly or release? <|Chain of Thoughts|>TUBA1A is part of the microtubule network, and DYNLL1 is a dynein light chain. Together, they could facilitate intracellular trafficking. By being enriched in these ciliated cells under infection, their upregulation might favor virion movement or egress. <|Answer|>Yes. Heightened microtubule- and dynein-related genes can enhance viral movement through the cell, possibly optimizing viral trafficking and contributing to more effective viral release. <|Keyword|>yes, TUBA1A, DYNLL1

<|Context|>Looking at {bronchial goblet cell gene expression} in COVID-19 conditions, MALAT1, TPT1, and KRT8 are high, with IRX2_TARGET_GENES enriched. <|Question|>In these goblet cells, does IRX2_TARGET_GENES enrichment suggest any shift in epithelial identity during infection? <|Chain of Thoughts|>IRX2 influences epithelial lineage decisions. High KRT8 is typical of simple epithelia but can also appear in stress contexts. Thus, IRX2_TARGET_GENES might indicate partial reprogramming or adaptive changes in goblet cells responding to infection challenges. <|Answer|>Yes. IRX2-related pathways may guide these cells toward altered states, blending their secretory identity with stress-induced epithelial remodeling in the face of COVID-19. <|Keyword|>yes, IRX2_TARGET_GENES, goblet cell remodeling

<|Context|>Within {brush cell of bronchus gene expression} in COVID-19, TPT1, TXNIP, and ANXA1 rank high. Gene set GSE23308_CTRL_VS_CORTICOSTERONE_TREATED_MACROPHAGE_MINERALCORTICOID_REC_KO_DN shows enrichment. <|Question|>Why might brush cells express TXNIP strongly under infection, and does it imply a redox regulatory function? <|Chain of Thoughts|>TXNIP modulates thioredoxin, integral in redox balance. Infection stresses can alter ROS levels. Coupled with an immune-associated gene set, this suggests brush cells manage oxidative stress, potentially mitigating damage while orchestrating local epithelial defense. <|Answer|>Yes. Elevated TXNIP likely helps these brush cells maintain redox homeostasis under infection stress, preventing excessive oxidative injury and preserving bronchial function. <|Keyword|>yes, TXNIP, redox regulation

<|Context|>Regarding {club cell gene expression} under COVID-19, SCGB3A1, MT-ND3, and KRT19 appear. The set MAP2K1_TARGET_GENES is enriched. <|Question|>Could MT-ND3 elevation signify a mitochondrial enhancement possibly contributing to MAP2K1-driven signaling in these club cells? <|Chain of Thoughts|>MT-ND3 is part of the respiratory chain, indicating boosted energy metabolism. MAP2K1_TARGET_GENES connect to MAPK cascades requiring ATP. A synergy between improved mitochondrial function and MAPK signaling may help club cells respond to damage or infection. <|Answer|>Yes. Club cells seemingly ramp up their metabolic capacity, potentially powering MAPK-related pathways that govern cell survival, repair, or inflammatory responses under COVID-19 stress. <|Keyword|>yes, MT-ND3, MAP2K1_TARGET_GENES

<|Context|>In {lung neuroendocrine cell gene expression} from normal tissue, TFF3 and CPE appear frequently. The set ZFHX3_TARGET_GENES is enriched. <|Question|>How might TFF3 and CPE co-expression support neuroendocrine cell function in normal airway physiology? <|Chain of Thoughts|>TFF3 helps maintain mucosal surfaces, and CPE (carboxypeptidase E) processes neuropeptides. Co-expression suggests these cells secrete protective or regulatory molecules. The ZFHX3_TARGET_GENES hint at transcriptional regulation of such neuroendocrine secretory pathways. <|Answer|>Together, they promote stable peptide processing and mucosal protection, reinforcing the specialized neuroendocrine role of modulating airway secretions and local homeostasis in healthy lungs. <|Keyword|>TFF3, CPE, neuroendocrine function

<|Context|>Within {pulmonary ionocyte cell gene expression} in normal tissue, an enrichment of GAVISH_3CA_METAPROGRAM_FIBROBLASTS_CAF_4 unexpectedly appears, with gene ranks like GNAS and S100A11. <|Question|>Does the presence of fibroblast-related gene set enrichment imply any paracrine or structural collaboration with surrounding stroma in normal ionocytes? <|Chain of Thoughts|>Ionocytes typically manage ion exchange but might share signaling factors with fibroblasts. GAVISH_3CA_METAPROGRAM_FIBROBLASTS_CAF_4 suggests overlapping expression of extracellular remodeling or paracrine signals. This cross-talk could be crucial for basal membrane or local environment shaping. <|Answer|>Yes. Ionocytes may release or respond to factors paralleling fibroblast programs, hinting at close epithelial-stromal interactions for ion transport and tissue integrity in the normal airway. <|Keyword|>yes, fibroblast, GAVISH_3CA_METAPROGRAM_FIBROBLASTS_CAF_4

<|Context|>Examining {somatic cell gene expression} in COVID-19, S100A2, TACSTD2, and ID1 stand out, with GOBP_POSITIVE_REGULATION_OF_MYOBLAST_PROLIFERATION enriched. <|Question|>Why might a gene set tied to myoblast proliferation be enriched in these infected somatic cells of the bronchus? <|Chain of Thoughts|>GOBP_POSITIVE_REGULATION_OF_MYOBLAST_PROLIFERATION might indicate a shared proliferative or repair trait. Even though it’s specialized for muscle precursors, overlapping genes could be upregulated to enhance epithelial regeneration or remodeling under infection-induced damage. <|Answer|>It suggests these cells adopt or engage proliferative signals analogous to muscle repair pathways, likely to support epithelial renewal or wound repair during viral infection stress. <|Keyword|>myoblast proliferation, GOBP_POSITIVE_REGULATION_OF_MYOBLAST_PROLIFERATION, somatic cell

<|Context|>In {basal cell of epithelium of bronchus gene expression} for normal tissue, KRT5, TP63 are present, with GSE2770_TGFB_AND_IL4_VS_TGFB_AND_IL12_TREATED_ACT_CD4_TCELL_48H_UP also enriched. <|Question|>Could basal cells show immune-related gene set overlaps because they modulate local T-cell recruitment even in a normal state? <|Chain of Thoughts|>Basal cells can release cytokines affecting T-cell function. The T-cell–focused gene set suggests an unexpected link to immune signaling. Even in a normal condition, basal cells might harbor capacity to attract or instruct local immune cells, though not as strongly as during infection. <|Answer|>Yes. Their baseline expression could coordinate T-cell behavior, indicating these progenitor-like epithelial cells contribute to airway immunosurveillance, even under non-inflamed conditions. <|Keyword|>yes, basal cells, T-cell recruitment

<|Context|>Looking at {bronchial goblet cell gene expression} in normal tissue, MUC5B, RPS24, and RPL34 are high, plus STAMBOLSKY_RESPONSE_TO_VITAMIN_D3_UP is enriched. <|Question|>How might vitamin D3–linked responses influence mucus production in these goblet cells? <|Chain of Thoughts|>Vitamin D3 can modulate epithelial defense and reduce excessive inflammation. STAMBOLSKY_RESPONSE_TO_VITAMIN_D3_UP suggests these cells might adjust their mucus properties or amounts, balancing protective secretion with controlled inflammatory responses. <|Answer|>They may fine-tune mucus release under vitamin D3 signaling, ensuring robust airway protection without exacerbating thick mucus accumulation, thus maintaining a healthier epithelial lining. <|Keyword|>vitamin D3, STAMBOLSKY_RESPONSE_TO_VITAMIN_D3_UP, mucus

<|Context|>In {brush cell of bronchus gene expression} from normal conditions, we see RPL39, RPS6, plus an enriched GSE13493_DP_VS_CD4INTCD8POS_THYMOCYTE_UP set. <|Question|>Might brush cells exhibit broad immunoregulatory potential, hinted by T-cell–related gene sets, in addition to their chemosensory function? <|Chain of Thoughts|>Brush cells can detect luminal stimuli and possibly communicate with immune cells. The T-cell–linked gene set indicates they may produce signals or mediators influencing T-cell activation or trafficking, expanding beyond mere chemosensory tasks. <|Answer|>Yes. Brush cells likely integrate sensory input with immunomodulatory signals, indicating they can shape T-cell–related pathways and local immune responses in normal airway contexts. <|Keyword|>yes, brush cell, T-cell

<|Context|>Among {ciliated cell of the bronchus gene expression} in normal tissue, we see MLF1, LZTFL1, and LAKE_ADULT_KIDNEY_C9_THIN_ASCENDING_LIMB enriched. <|Question|>How could kidney-associated gene sets appear in ciliated bronchial cells? <|Chain of Thoughts|>LAKE_ADULT_KIDNEY_C9_THIN_ASCENDING_LIMB involves transporters for salt balance. Airway ciliated cells also manage fluid surface levels. The overlap may reflect shared solute transporter or channel genes used in epithelial fluid regulation in both lung and kidney. <|Answer|>The ciliated cells and renal epithelium may share transport regulatory genes, highlighting a conservation of fluid management functions across these organ systems. <|Keyword|>ciliated cells, LAKE_ADULT_KIDNEY_C9_THIN_ASCENDING_LIMB, fluid regulation

<|Context|>Within {club cell gene expression} for normal tissue, SCGB1A1, BTF3, and TSPAN1 stand out, with NAKAMURA_BRONCHIAL_AND_BRONCHIOLAR_EPITHELIA. <|Question|>Does BTF3 upregulation in club cells imply a broader transcriptional support function to maintain stable secretory phenotypes? <|Chain of Thoughts|>BTF3 helps regulate transcription initiation. Club cells secrete protective proteins (SCGB1A1). TSPAN1 also supports membrane trafficking. Thus, BTF3 might facilitate continuous production of secreted proteins that preserve the airway surface. <|Answer|>Yes. Enhancing fundamental transcription mechanisms ensures these club cells can sustain and adapt their protective secretory output in the normal bronchial environment. <|Keyword|>yes, BTF3, club cell

<|Context|>Exploring {lung neuroendocrine cell gene expression} in normal conditions, we find AVIL and TRPM5, with GSE32901_NAIVE_VS_TH17_ENRICHED_CD4_TCELL_UP enriched. <|Question|>Why might these neuroendocrine cells overlap with a T-cell–related signature, and do they potentially regulate T-helper cell dynamics? <|Chain of Thoughts|>AVIL and TRPM5 link to specialized sensory roles. Neuroendocrine cells can also secrete signals that shape T-cell activity. The T-helper–focused set suggests possible paracrine messaging that influences Th17 or naive T-cell responses in the airway. <|Answer|>They likely produce signals modulating T-helper cell behavior, illustrating a cross-talk between neuroendocrine secretion and adaptive immune responses, even under non-inflamed conditions. <|Keyword|>Th17, AVIL, TRPM5

<|Context|>In {pulmonary ionocyte cell gene expression} from COVID-19, transcripts like GNAS and RPS29 predominate, with an enriched gene set PKCA_DN.V1_DN. <|Question|>Could reduction of PKC alpha–associated activity in these ionocytes affect ion transport or fluid regulation under infection? <|Chain of Thoughts|>PKC alpha can influence epithelial transport channels. PKCA_DN.V1_DN suggests decreased PKC alpha–related signals. During infection, dampening PKC alpha might modulate fluid or ion flux, possibly adjusting alveolar surface fluid to respond to viral insults. <|Answer|>Yes. Lower PKC alpha–like signaling could reshape channel activity or fluid handling in the infected ionocytes, potentially influencing mucus hydration or airway surface tension responses. <|Keyword|>yes, PKCA_DN.V1_DN, ion transport

<|Context|>Observing {somatic cell gene expression} in normal tissue, KRT17, LCN2, and DUSP1 appear, with GOMF_ENZYME_INHIBITOR_ACTIVITY enriched. <|Question|>Do these molecules imply that normal somatic cells hold the capacity to regulate local protease or enzymatic activity in the bronchus? <|Chain of Thoughts|>LCN2 can sequester iron and modulate enzymes. DUSP1 inactivates MAP kinases. GOMF_ENZYME_INHIBITOR_ACTIVITY suggests they counteract proteolysis or excessive kinase signaling. Combined, they likely preserve epithelial balance, preventing undue tissue damage. <|Answer|>Yes. By expressing these inhibitory factors, somatic cells help keep enzymatic processes in check, promoting a stable environment and limiting injury within the airway. <|Keyword|>yes, LCN2, DUSP1, enzyme inhibitor



"""

run the following cell to convert the output into a hf dataset

In [75]:
qa_dict_list = []
for QA_pair in output.split("<|Context|>"):
    if not QA_pair:
        continue
    try:
        context = QA_pair.split("<|Question|>")[0].strip()
        question = QA_pair.split("<|Question|>")[1].split("<|Chain of Thoughts|>")[0].strip()
        chain_of_thoughts = QA_pair.split("<|Chain of Thoughts|>")[1].split("<|Answer|>")[0].strip()
        answer = QA_pair.split("<|Answer|>")[1].split("<|Keyword|>")[0].strip()
        label = QA_pair.split("<|Keyword|>")[1].strip()
    except:
        print("Error in the following QA pair:")
        print(QA_pair)
        continue

    entry = {
        "Context": context,
        "Summary_Dataset": dataset_in_prompt,
        "Question": question, 
        "Chain of Thoughts": chain_of_thoughts,
        "Answer": answer,
        "Keyword": label,
        "full_QA_pair": "<|Question|>"+QA_pair.strip(), 
        "Dataset_Name": dataset_name,
        "Publication_URL": cur_url,
        "Dataset_Index": dataset_index,
        "Used_Rows": sampled_indices,
    }
    qa_dict_list.append(entry)

qa_dict = {key: [d[key] for d in qa_dict_list] for key in qa_dict_list[0].keys()} # list of dicts to dict of lists

new_hf_dataset = Dataset.from_dict(qa_dict)
print("Example:")
new_hf_dataset[0]

Error in the following QA pair:


Example:


{'Context': 'In {ciliated cell of the bronchus gene expression} with COVID-19, numerous mitochondrial and ribosomal genes appear highly ranked. Some enriched gene sets include TRAVAGLINI_LUNG_CILIATED_CELL and KEGG_MEDICUS_REFERENCE_TRANSLATION_INITIATION.',
 'Summary_Dataset': 'Cell Type: pulmonary ionocyte, Tissue: bronchial epithelial cell (cell culture), Disease: COVID-19, Gene Expression: MALAT1 MT-CO2 MT-ND3 MT-CO1 MT-ATP6 MT-CYB MT-CO3 S100A6 EEF1A1 MT-ND4 RPL41 RPS27 H3-3B TMSB4X FTH1 RPS12 RPL34 RPL39 RPS24 PTMA ACTG1 TPT1 CD9 RPLP1 RPS28 MT-ND2 RPL12 RPS15A RPL13 RPS27A RPS18 RPS3A MT-ND1 RPL28 H3-3A ACTB TMSB10 CD24 RPL11 RPS8 RPL37 RPL10 B2M RPL7A RPL30 RPS4X RPS19 RPL3 RPL36 RPS29 RPL15 RPS23 IGFBP5 RPL6 GNAS RPL32 RPL37A RPL19 RPS15 ATP5F1E RPL35A FOS EIF1 SERF2 NEAT1 RPS3 RPS6 RPS13 RPS7 RPL5 RPL8 CALM2 KRT19 RPS14 SOX4 RPL21 RPL18 MYL6 KRT8 RPL29 RPL24 RPL9 RPL26 SAT1 MT-ND5 KRT7 RPS9 NACA SRP14 RPL18A RPL14 KRT18 RPS25 COX7C SCGB3A1 FTL DDX5 LDHB RACK1 JUN GAPDH GSTP1 

run the following cell every time to save updates \
sometimes save_to_disk will fail because it doesn't automatically overwrites, just delete the old file adn run it again

In [76]:
hf_dataset = concatenate_datasets([hf_dataset, new_hf_dataset])
hf_dataset


  block_group = [InMemoryTable(cls._concat_blocks(list(block_group), axis=axis))]
  table = cls._concat_blocks(blocks, axis=0)


Dataset({
    features: ['Context', 'Summary_Dataset', 'Question', 'Chain of Thoughts', 'Answer', 'Keyword', 'full_QA_pair', 'Dataset_Name', 'Publication_URL', 'Dataset_Index', 'Used_Rows'],
    num_rows: 335
})

In [77]:
hf_dataset.save_to_disk("geneset_dataset192-all_3-3-25")
print("current hf_dataset: ")
hf_dataset

Saving the dataset (1/1 shards): 100%|██████████| 335/335 [00:00<00:00, 13539.66 examples/s]

current hf_dataset: 





Dataset({
    features: ['Context', 'Summary_Dataset', 'Question', 'Chain of Thoughts', 'Answer', 'Keyword', 'full_QA_pair', 'Dataset_Name', 'Publication_URL', 'Dataset_Index', 'Used_Rows'],
    num_rows: 335
})

In [78]:
dataset1 = load_from_disk("/users/zlyu12/Desktop/c2s-RL/Create_Dataset/geneset_dataset192-all_3-3-25")
dataset2 = load_from_disk("/users/zlyu12/Desktop/c2s-RL/Create_Dataset/geneset_dataset191_3-1-25")

combined_dataset = concatenate_datasets([dataset1, dataset2])
print("Combined dataset:")
combined_dataset

combined_dataset.save_to_disk("/users/zlyu12/Desktop/c2s-RL/Create_Dataset/geneset_dataset_3-3-25")


Combined dataset:


Saving the dataset (1/1 shards): 100%|██████████| 365/365 [00:00<00:00, 10051.55 examples/s]


In [79]:
# Load all individual datasets
dataset3 = load_from_disk("/users/zlyu12/Desktop/c2s-RL/Create_Dataset/geneset_dataset191_2-11-25")
dataset4 = load_from_disk("/users/zlyu12/Desktop/c2s-RL/Create_Dataset/geneset_dataset192_2-13-25")
dataset5 = load_from_disk("/users/zlyu12/Desktop/c2s-RL/Create_Dataset/geneset_dataset471-603_2-13-25")
dataset6 = load_from_disk("/users/zlyu12/Desktop/c2s-RL/Create_Dataset/geneset_dataset612-636_2-13-25")
dataset7 = load_from_disk("/users/zlyu12/Desktop/c2s-RL/Create_Dataset/geneset_dataset637-778_2-13-25")

# Combine all datasets including the previously combined one
final_combined_dataset = concatenate_datasets([combined_dataset, dataset3, dataset4, dataset5, dataset6, dataset7])
print("Final combined dataset:")
final_combined_dataset

# Save the final combined dataset
final_combined_dataset.save_to_disk("/users/zlyu12/Desktop/c2s-RL/Create_Dataset/geneset_dataset_3-3+2-13-25")


  table = cls._concat_blocks(blocks, axis=0)


Final combined dataset:


Saving the dataset (1/1 shards): 100%|██████████| 784/784 [00:00<00:00, 15514.45 examples/s]


In [80]:
import os
import glob

# Get all dataset paths that start with 'dataset' in the specified directory
dataset_paths = glob.glob("/users/zlyu12/Desktop/c2s-RL/Create_Dataset/dataset*")
print(dataset_paths)

# Load all datasets
datasets = []
for path in dataset_paths:
    try:
        dataset = load_from_disk(path)
        datasets.append(dataset)
        print(f"Loaded dataset from: {path}")
    except Exception as e:
        print(f"Error loading dataset from {path}: {e}")

# Combine all datasets if any were loaded successfully
if datasets:
    all_combined_dataset = concatenate_datasets(datasets)
    print("\nFinal combined dataset:")
    print(all_combined_dataset)
else:
    print("No datasets were loaded successfully")


['/users/zlyu12/Desktop/c2s-RL/Create_Dataset/dataset0-50_12-22-24_hf_dataset', '/users/zlyu12/Desktop/c2s-RL/Create_Dataset/dataset201-250_1-10-25_hf_dataset', '/users/zlyu12/Desktop/c2s-RL/Create_Dataset/dataset351-400_1-13-25_hf_dataset', '/users/zlyu12/Desktop/c2s-RL/Create_Dataset/dataset451-488_1-15-25_hf_dataset', '/users/zlyu12/Desktop/c2s-RL/Create_Dataset/dataset489-514_1-24-25_hf_dataset', '/users/zlyu12/Desktop/c2s-RL/Create_Dataset/dataset515-536_1-24-25_hf_dataset']
Loaded dataset from: /users/zlyu12/Desktop/c2s-RL/Create_Dataset/dataset0-50_12-22-24_hf_dataset
Loaded dataset from: /users/zlyu12/Desktop/c2s-RL/Create_Dataset/dataset201-250_1-10-25_hf_dataset
Loaded dataset from: /users/zlyu12/Desktop/c2s-RL/Create_Dataset/dataset351-400_1-13-25_hf_dataset
Loaded dataset from: /users/zlyu12/Desktop/c2s-RL/Create_Dataset/dataset451-488_1-15-25_hf_dataset
Loaded dataset from: /users/zlyu12/Desktop/c2s-RL/Create_Dataset/dataset489-514_1-24-25_hf_dataset
Loaded dataset from: /

  table = cls._concat_blocks(blocks, axis=0)


In [81]:
# Load the previously saved final combined dataset
previous_combined = load_from_disk("/users/zlyu12/Desktop/c2s-RL/Create_Dataset/geneset_dataset_3-3+2-13-25")

# Combine with the all_combined_dataset
complete_dataset = concatenate_datasets([previous_combined, all_combined_dataset])
print("\nComplete combined dataset:")
print(complete_dataset)

# Save the complete combined dataset
complete_dataset.save_to_disk("/users/zlyu12/Desktop/c2s-RL/Create_Dataset/_dataset_allCoT")


ValueError: The features can't be aligned because the key Dataset_Index of features {'Context': Value(dtype='string', id=None), 'Summary_Dataset': Value(dtype='string', id=None), 'Question': Value(dtype='string', id=None), 'Answer': Value(dtype='string', id=None), 'Keyword': Value(dtype='string', id=None), 'full_QA_pair': Value(dtype='string', id=None), 'Dataset_Name': Value(dtype='string', id=None), 'Publication_URL': Value(dtype='string', id=None), 'Dataset_Index': Value(dtype='string', id=None), 'Used_Rows': Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None), 'Chain of Thoughts': Value(dtype='string', id=None)} has unexpected type - Value(dtype='string', id=None) (expected either Value(dtype='int64', id=None) or Value("null").