In [None]:
def annotation_cluster(sample,gene_lists,cluster_name='PAGODA_hc',alpha=0.01):
      '''
      this function is to get the gene_signature_scores for each cluster 
      using the (special) gene lists from reference dataset 
      and compare the gene_signature_scores of each cluster vs all other clusters
      
      input: 
      1. sample:h5ad file
      2. genelist_name: gene list, not a string
      3. genelist_score_name: gene list name, a string, format:''
      4. cluster_name: cluster name of annotation in h5ad file, a string. format:''
      
      output:
      dataframe including the pvalue, fdr, reject
      
      method:
      mannwhitneyu test
      median
      multiple test
      
      '''
      from scipy.stats import mannwhitneyu
      import statsmodels.api as sm
      from scipy.stats import (ttest_1samp, ttest_rel,ttest_ind,wilcoxon,t as t_dbn)
      from statsmodels.stats.multitest import multipletests
      
      csv_file_path = f'../analysis.d/result/csv/result.csv'
      df = {}


      
      
      # calculate the genelist scores
      
      
      ncluster = len(sample.obs[cluster_name].unique())
      annotation = sample.obs[cluster_name].unique().tolist()
      
      for gene_list in gene_lists:
            sc.tl.score_genes(sample,gene_lists[gene_list],score_name=str(gene_list),random_state=42)
            df[f"{gene_list}_pval"] = []  
            df[f"{gene_list}_FDR"] = []
            df[f"{gene_list}_reject"] =[]   

            for i in annotation:
                  cluster_i = sample.obs[sample.obs[cluster_name] == str(i)][str(gene_list)]
                  cluster_no_i = sample.obs[sample.obs[cluster_name] != str(i)][str(gene_list)]
                  statistic, pvalue = mannwhitneyu(cluster_i,cluster_no_i,alternative='greater',nan_policy='omit') 
                  
                  df[f"{gene_list}_pval"].append(pvalue)
      
            reject, pvals_corrected = multipletests(df[f"{gene_list}_pval"],alpha,method='fdr_bh')[0:2]

            df[f"{gene_list}_FDR"] = pvals_corrected
            df[f"{gene_list}_reject"] = reject

      df = pd.DataFrame(df)

      df.set_index(pd.Index(annotation), inplace=True)
      df.index.name = 'cluster'
      
      # save the csv file

      df.to_csv(csv_file_path, index=True)
      
      return df
