# BDSO Correlation
Compares the source taxonomies of the BDS Ontology with the CxG integrated MoP dataset to evaluate their compatibility

In [1]:
## IMPORT depedencies 

import os
import pandas as pd
import anndata as ad
import numpy as np
import urllib.request as request
import zipfile

## Read h5ad (ann_data)

Please manually download the h5ad file from https://cellxgene.cziscience.com/collections/ae1420fe-6630-46ed-8b3d-cc6056a66467

In [2]:
ann_data = ad.read_h5ad("h5ad-downloads/an-integrated-transcriptomic-cellxgene.h5ad", backed="r")

In [3]:
print(ann_data.obs.shape)
ann_data.obs.head(4)

(406187, 29)


Unnamed: 0,BICCN_cluster_id,QC,BICCN_cluster_label,BICCN_subclass_label,BICCN_class_label,cluster_color,size,temp_class_label,BICCN_ontology_term_id,assay_ontology_term_id,...,donor_id,suspension_type,cell_type,assay,disease,organism,sex,tissue,self_reported_ethnicity,development_stage
SM-D9D8O_S03_E1-50,48.0,1201,L6 CT Grp_1,L6 CT,Glutamatergic,#197A6E,781.0,GlutamatergicL6 CT,ILX:0770162,EFO:0008930,...,304659,cell,glutamatergic neuron,Smart-seq,normal,Mus musculus,male,primary motor cortex,na,early adult stage
SM-DAIH5_S47_E1-50,48.0,3678,L6 CT Grp_1,L6 CT,Glutamatergic,#197A6E,781.0,GlutamatergicL6 CT,ILX:0770162,EFO:0008930,...,319137,cell,glutamatergic neuron,Smart-seq,normal,Mus musculus,male,primary motor cortex,na,early adult stage
SM-DD44L_S43_E1-50,4.0,4495,Lamp5 Slc35d3,Lamp5,GABAergic,#FFA388,194.0,GABAergicLamp5,ILX:0770149,EFO:0008930,...,298355,cell,GABAergic neuron,Smart-seq,normal,Mus musculus,male,primary motor cortex,na,early adult stage
SM-D9E5O_S40_E1-50,49.0,1499,L6 CT Grp_2,L6 CT,Glutamatergic,#358459,24.0,GlutamatergicL6 CT,ILX:0770162,EFO:0008930,...,306909,cell,glutamatergic neuron,Smart-seq,normal,Mus musculus,female,primary motor cortex,na,early adult stage


## Read cell_to_cell_set_assignments (c2c_data)

Downloads, unzips and loads the cell to cell set assingments data

In [4]:
if not os.path.exists(os.path.join(os.getcwd(), "cell_to_cell_set_assignments_CCN202002013.csv")):
    # Download
    request.urlretrieve("https://github.com/AllenInstitute/MOp_taxonomies_ontology/blob/main/mouseMOp_CCN202002013/cell_to_cell_set_assignments_CCN202002013.zip?raw=true", "cell_to_cell_set_assignments_CCN202002013.zip")

    # Unzip
    with zipfile.ZipFile("cell_to_cell_set_assignments_CCN202002013.zip", 'r') as zip_ref:
        zip_ref.extractall(os.getcwd())
    
# Load to DataFrame
c2c_data = pd.read_csv("cell_to_cell_set_assignments_CCN202002013.csv")

print(c2c_data.shape)
c2c_data.head(5)

(159738, 258)


Unnamed: 0,sample_name,CS202002013_81,CS202002013_1,CS202002013_2,CS202002013_3,CS202002013_4,CS202002013_5,CS202002013_6,CS202002013_7,CS202002013_8,...,CS202002013_248,CS202002013_249,CS202002013_250,CS202002013_251,CS202002013_252,CS202002013_253,CS202002013_254,CS202002013_255,CS202002013_256,CS202002013_257
0,pBICCNsMMrMOpRAiF003d190318_AAACCCAAGGCCTTGC,1,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
1,pBICCNsMMrMOpRAiF003d190318_AAACCCAAGGCTAAAT,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,pBICCNsMMrMOpRAiF003d190318_AAACCCATCTGAGCAT,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,pBICCNsMMrMOpRAiF003d190318_AAACGAAAGGGCGAGA,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,pBICCNsMMrMOpRAiF003d190318_AAACGAACAGAGATTA,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Report the differences between c2c_data and ann_data

List all c2cdata sample names that doesn't exist in the anndata

In [5]:
adata_keys = set(ann_data.obs.index.tolist())
cdata_keys = set(c2c_data.sample_name.unique().tolist())
diff = [x for x in cdata_keys if x not in adata_keys]
diff

[]

Seems c2cdata is subset of anndata!

Filter ann_data to keep only the matching samples. 

In [6]:
adata_subset = ann_data.obs[ann_data.obs.index.isin(cdata_keys)]
adata_subset.shape

(159738, 29)

## Report the differences between ann_data and c2c_data

Ann_data has 406187 rows while c2c_data has only 159738 rows. So check if we need all the ann_data for comparison.

In [7]:
adata_subset.assay.unique()

['10x 3' v3']
Categories (3, object): ['Smart-seq', '10x 3' v2', '10x 3' v3']

In [8]:
adata_subset.suspension_type.unique()

['nucleus']
Categories (2, object): ['cell', 'nucleus']

Seems we are only using assay="10x 3' v3" and suspension_type="nucleus" !

In [9]:
adata_assay = ann_data.obs[ann_data.obs["assay"]=="10x 3' v3"]
adata_filtered = adata_assay[adata_assay["suspension_type"]=="nucleus"]
adata_filtered.shape

(199904, 29)

List all ann_data filtered rows that doesn't exist in the c2cdata

In [10]:
adata_filtered_keys = set(adata_filtered.index.tolist())
diff2 = [x for x in adata_filtered_keys if x not in cdata_keys]
only_anndata = adata_filtered[adata_filtered.index.isin(diff2)]

print(only_anndata.shape)
only_anndata.head(5)

(40166, 29)


Unnamed: 0,BICCN_cluster_id,QC,BICCN_cluster_label,BICCN_subclass_label,BICCN_class_label,cluster_color,size,temp_class_label,BICCN_ontology_term_id,assay_ontology_term_id,...,donor_id,suspension_type,cell_type,assay,disease,organism,sex,tissue,self_reported_ethnicity,development_stage
AAACCCAAGCTCTTCC-1L8TX_181211_01_A02,59.0,1.0,L6b Shisa6,L6b,Glutamatergic,#2B9880,247.0,GlutamatergicL6b,ILX:0770163,EFO:0009922,...,427311,nucleus,glutamatergic neuron,10x 3' v3,normal,Mus musculus,female,primary motor cortex,na,early adult stage
AAACCCAAGTCGAATA-1L8TX_181211_01_A02,54.0,2.0,L6 CT Brinp3,L6 CT,Glutamatergic,#338C5E,3970.0,GlutamatergicL6 CT,ILX:0770162,EFO:0009922,...,427311,nucleus,glutamatergic neuron,10x 3' v3,normal,Mus musculus,female,primary motor cortex,na,early adult stage
AAACCCACAACCCTAA-1L8TX_181211_01_A02,11.0,3.0,Vip Chat,Vip,GABAergic,#FF00FF,519.0,GABAergicVip,ILX:0770151,EFO:0009922,...,427311,nucleus,GABAergic neuron,10x 3' v3,normal,Mus musculus,female,primary motor cortex,na,early adult stage
AAACCCAGTATAGCTC-1L8TX_181211_01_A02,34.0,4.0,L5 IT Rspo1_3,L5 IT,Glutamatergic,#3CBC45,1838.0,GlutamatergicL5 IT,ILX:0770157,EFO:0009922,...,427311,nucleus,glutamatergic neuron,10x 3' v3,normal,Mus musculus,female,primary motor cortex,na,early adult stage
AAACCCAGTCAGACTT-1L8TX_181211_01_A02,30.0,5.0,L2/3 IT_2,L2/3 IT,Glutamatergic,#7C8169,2105.0,GlutamatergicL2/3 IT,ILX:0770156,EFO:0009922,...,427311,nucleus,glutamatergic neuron,10x 3' v3,normal,Mus musculus,female,primary motor cortex,na,early adult stage


# Compare Clusterings
Test whether clustering is the same between the two datasets

Download mouse nomenclature table to find accession ids

In [11]:
if not os.path.exists(os.path.join(os.getcwd(), "nomenclature_table_CCN202002013.csv")):
    # Download
    request.urlretrieve("https://raw.githubusercontent.com/obophenotype/brain_data_standards_ontologies/master/src/dendrograms/nomenclature_table_CCN202002013.csv", "nomenclature_table_CCN202002013.csv")
    
# Load to DataFrame
nomenclature_table = pd.read_csv("nomenclature_table_CCN202002013.csv", index_col="cell_set_accession")

print(nomenclature_table.shape)
nomenclature_table.head(4)

(257, 16)


Unnamed: 0_level_0,cell_set_preferred_alias,original_label,cell_set_label,cell_set_aligned_alias,cell_set_additional_aliases,cell_set_alias_assignee,cell_set_alias_citation,cell_set_structure,cell_set_ontology_tag,species,modality,taxonomy_id,taxonomy_description,child_cell_set_accessions,cell_type_card,cell_set_color
cell_set_accession,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
CS202002013_1,Lamp5-like Pax6,Lamp5 Pax6,RNAseq 001,Lamp5_4,Lamp5 Pax6,Zizhen Yao|Nikolas Jorstad|Zizhen Yao,PMID:34616066|PMID:34616062|PMID:34616066,primary motor cortex,UBERON:0001384,Mouse,RNAseq,CCN202002013,Mouse MOp BICCN taxonomy using multiple RNA-Se...,,Cell Type,#DDACC9
CS202002013_2,Lamp5-like Egln3_1,Lamp5 Egln3_1,RNAseq 002,Sncg_4,Alpha7 cell,Zizhen Yao|Nikolas Jorstad|Federico Scala,PMID:34616066|PMID:34616062|PMID:33184512,primary motor cortex,UBERON:0001384,Mouse,RNAseq,CCN202002013,Mouse MOp BICCN taxonomy using multiple RNA-Se...,,Cell Type,#FF88AD
CS202002013_3,Lamp5 Egln3_2,Lamp5 Egln3_2,RNAseq 003,,Canopy cell,Zizhen Yao|Federico Scala|Zizhen Yao,PMID:34616066|PMID:33184512|PMID:34616066,primary motor cortex,UBERON:0001384,Mouse,RNAseq,CCN202002013,Mouse MOp BICCN taxonomy using multiple RNA-Se...,,Cell Type,#DD8091
CS202002013_4,Lamp5 Egln3_3,Lamp5 Egln3_3,RNAseq 004,,Lamp5 Egln3_3,Zizhen Yao,PMID:34616066,primary motor cortex,UBERON:0001384,Mouse,RNAseq,CCN202002013,Mouse MOp BICCN taxonomy using multiple RNA-Se...,,Cell Type,#F08E98


In [12]:
adata_filtered.head(5)

Unnamed: 0,BICCN_cluster_id,QC,BICCN_cluster_label,BICCN_subclass_label,BICCN_class_label,cluster_color,size,temp_class_label,BICCN_ontology_term_id,assay_ontology_term_id,...,donor_id,suspension_type,cell_type,assay,disease,organism,sex,tissue,self_reported_ethnicity,development_stage
AAACCCAAGCTCTTCC-1L8TX_181211_01_A02,59.0,1.0,L6b Shisa6,L6b,Glutamatergic,#2B9880,247.0,GlutamatergicL6b,ILX:0770163,EFO:0009922,...,427311,nucleus,glutamatergic neuron,10x 3' v3,normal,Mus musculus,female,primary motor cortex,na,early adult stage
AAACCCAAGTCGAATA-1L8TX_181211_01_A02,54.0,2.0,L6 CT Brinp3,L6 CT,Glutamatergic,#338C5E,3970.0,GlutamatergicL6 CT,ILX:0770162,EFO:0009922,...,427311,nucleus,glutamatergic neuron,10x 3' v3,normal,Mus musculus,female,primary motor cortex,na,early adult stage
AAACCCACAACCCTAA-1L8TX_181211_01_A02,11.0,3.0,Vip Chat,Vip,GABAergic,#FF00FF,519.0,GABAergicVip,ILX:0770151,EFO:0009922,...,427311,nucleus,GABAergic neuron,10x 3' v3,normal,Mus musculus,female,primary motor cortex,na,early adult stage
AAACCCAGTATAGCTC-1L8TX_181211_01_A02,34.0,4.0,L5 IT Rspo1_3,L5 IT,Glutamatergic,#3CBC45,1838.0,GlutamatergicL5 IT,ILX:0770157,EFO:0009922,...,427311,nucleus,glutamatergic neuron,10x 3' v3,normal,Mus musculus,female,primary motor cortex,na,early adult stage
AAACCCAGTCAGACTT-1L8TX_181211_01_A02,30.0,5.0,L2/3 IT_2,L2/3 IT,Glutamatergic,#7C8169,2105.0,GlutamatergicL2/3 IT,ILX:0770156,EFO:0009922,...,427311,nucleus,glutamatergic neuron,10x 3' v3,normal,Mus musculus,female,primary motor cortex,na,early adult stage


In [14]:
nomenclature_accessions = nomenclature_table[nomenclature_table["cell_type_card"] != "No"].index.unique().tolist()
len(nomenclature_accessions)

138

Create an inverted mapping (cell_set_to_cell_assignments) and check if clustering in the nomenclature is same with the clustering in the ann data.
All cells that belongs to the same cluster in the nomenclature should have the same BICCN_class/BICCN_subclass/BICCN_cluster labels.

In [19]:
records = list()
for accession in nomenclature_accessions:
    nomenclature_type = nomenclature_table.loc[accession].cell_type_card
    samples_in_same_cluster = c2c_data[c2c_data[accession] == 1].sample_name.unique().tolist()
    for sample in samples_in_same_cluster:
        anndata_record = adata_subset.loc[sample]
        data = [accession, nomenclature_type, sample, anndata_record.BICCN_class_label, anndata_record.BICCN_subclass_label, anndata_record.BICCN_cluster_label]
        records.append(data)
df = pd.DataFrame(records, columns=["accession_id", "type", "sample_name", "BICCN_class_label", "BICCN_subclass_label", "BICCN_cluster_label"])
df.head(10)

Unnamed: 0,accession_id,type,sample_name,BICCN_class_label,BICCN_subclass_label,BICCN_cluster_label
0,CS202002013_1,Cell Type,pBICCNsMMrMOpRAiF003d190318_ATCACTTTCATTATCC,GABAergic,Sncg,Sncg Col14a1_3
1,CS202002013_1,Cell Type,pBICCNsMMrMOpRAiF004d190318_AGATCGTTCATACAGC,GABAergic,Lamp5,Lamp5 Pax6
2,CS202002013_1,Cell Type,pBICCNsMMrMOpRAiF004d190318_GGTTGTACAGAACCGA,GABAergic,Lamp5,Lamp5 Pax6
3,CS202002013_1,Cell Type,pBICCNsMMrMOpRAiF005d190318_TTTGATCTCTCTCTTC,GABAergic,Lamp5,Lamp5 Pax6
4,CS202002013_1,Cell Type,pBICCNsMMrMOpRAiF007d190314_CATCAAGGTATCACGT,GABAergic,Lamp5,Lamp5 Pax6
5,CS202002013_1,Cell Type,pBICCNsMMrMOpRAiF007d190314_CCGTGAGCAGTGGTGA,GABAergic,Lamp5,Lamp5 Pax6
6,CS202002013_1,Cell Type,pBICCNsMMrMOpRAiF007d190314_CGTAAGTTCACTACGA,GABAergic,Lamp5,Lamp5 Pax6
7,CS202002013_1,Cell Type,pBICCNsMMrMOpRAiF007d190314_CTCCAACGTGACACAG,GABAergic,Lamp5,Lamp5 Pax6
8,CS202002013_1,Cell Type,pBICCNsMMrMOpRAiF007d190314_GAGAGGTAGTCGTTAC,GABAergic,Lamp5,Lamp5 Pax6
9,CS202002013_1,Cell Type,pBICCNsMMrMOpRAiF007d190314_GCCATGGCATTCGATG,GABAergic,Lamp5,Lamp5 Pax6


Save inverted mapping to file cell_set_to_cell_assignments_report.csv

In [32]:
df.to_csv(os.path.join(os.getcwd(), "cell_set_to_cell_assignments_report.csv"))

Report the clusters that are different in the nomenclature and anndata

In [20]:
for accession in nomenclature_accessions:
    samples_in_same_cluster = c2c_data[c2c_data[accession] == 1].sample_name.unique().tolist()
    nomenclature_record = nomenclature_table.loc[accession]
    nomenclature_type = nomenclature_record.cell_type_card
    nomenclature_label = nomenclature_record.cell_set_preferred_alias
    corresponding_ann_data = df[df.sample_name.isin(samples_in_same_cluster)]
    BICCN_class = corresponding_ann_data.BICCN_class_label.unique().tolist()
    if nomenclature_type == "Class" and len(BICCN_class) > 1:
        print("'" + nomenclature_label + "' " + accession +  "(" + nomenclature_type + ") is represented in multiple BICCN_class:" + ", ".join(BICCN_class))
    BICCN_subclass = corresponding_ann_data.BICCN_subclass_label.unique().tolist()
    if nomenclature_type == "Subclass" and len(BICCN_subclass) > 1:
        print("'" + nomenclature_label + "' " + accession +  "(" + nomenclature_type + ") is represented in multiple BICCN_subclass:" + ", ".join(BICCN_subclass))
    BICCN_clusters = corresponding_ann_data.BICCN_cluster_label.unique().tolist()
    if nomenclature_type == "Cell Type" and len(BICCN_clusters) > 1:
        print("'" + nomenclature_label + "' " + accession +  "(" + nomenclature_type + ") is represented in multiple BICCN_clusters:" + ", ".join(BICCN_clusters))    

'Lamp5-like Pax6' CS202002013_1(Cell Type) is represented in multiple BICCN_clusters:Sncg Col14a1_3, Lamp5 Pax6, Lamp5 Egln3_3, Lamp5 Egln3_2, Lamp5 Slc35d3, Sncg Slc17a8
'Lamp5-like Egln3_1' CS202002013_2(Cell Type) is represented in multiple BICCN_clusters:Lamp5 Egln3_3, Lamp5 Egln3_2, Lamp5 Pax6, Sncg Col14a1_2, OPC Pdgfra_3
'Lamp5 Egln3_2' CS202002013_3(Cell Type) is represented in multiple BICCN_clusters:Lamp5 Egln3_2, Lamp5 Egln3_3, Lamp5 Pdlim5, Lamp5 Slc35d3, Vip Igfbp6_1
'Lamp5 Egln3_3' CS202002013_4(Cell Type) is represented in multiple BICCN_clusters:Prog_2, CR, Lamp5 Egln3_1, Lamp5 Egln3_2, Lamp5 Pdlim5, Vip Igfbp6_1, Lamp5 Egln3_3, Sncg Col14a1_2, Sst Myh8_2
'Lamp5 Pdlim5_1' CS202002013_5(Cell Type) is represented in multiple BICCN_clusters:Lamp5 Pdlim5, Lamp5 Egln3_1
'Lamp5 Pdlim5_2' CS202002013_6(Cell Type) is represented in multiple BICCN_clusters:Lamp5 Pdlim5, Lamp5 Slc35d3
'Lamp5 Slc35d3' CS202002013_7(Cell Type) is represented in multiple BICCN_clusters:Lamp5 Slc35d3

'L5 IT_3' CS202002013_65(Cell Type) is represented in multiple BICCN_clusters:L5 IT S100b, L2/3 IT_2, L5 IT Pld5, L5 IT Rspo1, L5 ET_4, L2/3 IT_1, L6 IT Sulf1_2, L6 IT Sulf1_1
'L5 IT_4' CS202002013_66(Cell Type) is represented in multiple BICCN_clusters:L5 IT Pld5, L6 IT Sulf1_1, L5 IT S100b, L2/3 IT_1, L6 IT Sulf1_2, L6 IT Sulf1 Fos, L2/3 IT_2, L5 IT Rspo1
'L6 IT_1' CS202002013_67(Cell Type) is represented in multiple BICCN_clusters:L6 IT Sulf1_2, L6 IT Sulf1 Fos, L6 IT Sulf1_1, L5 IT S100b, L5 IT Pld5, L2/3 IT_1, L5 IT Rspo1, L2/3 IT_2, Pvalb Prdm8_2
'L6 IT_2' CS202002013_68(Cell Type) is represented in multiple BICCN_clusters:L6 IT Sulf1_1, L6 IT Sulf1 Fos, L5 IT Pld5, L2/3 IT_1, L2/3 IT_2
'L5 ET_1' CS202002013_70(Cell Type) is represented in multiple BICCN_clusters:L5 ET_4, L5 ET_2, L5 ET_1, L5 ET_5, L6 CT Cpa6, L5 ET_6, L5 IT S100b, CR, Prog_2
'L5 ET_2' CS202002013_71(Cell Type) is represented in multiple BICCN_clusters:L5 ET_3, L5 ET_4, L5 ET_5, L5 ET_1, L5 IT S100b, L5 ET_2
'L5 

## Labels compatibility

Check label's compatibility

In [42]:
nomenclature_types = ["Class", "Subclass", "Cell Type"]
anndata_types = ["BICCN_class_label", "BICCN_subclass_label", "BICCN_cluster_label"]

for i in range(3):
    print("Analyzing type: " + target_types[i])
    named_nodes = nomenclature_table[nomenclature_table["cell_type_card"] == target_types[i]]
    nomenclature_labels = set(map(lambda x: str(x).lower().replace("-like", ""), named_nodes.cell_set_preferred_alias.unique().tolist()))
    anndata_labels = set(map(lambda x: str(x).lower(), adata_filtered[anndata_types[i]].unique().tolist()))
    
    matching = [x for x in nomenclature_labels if x in anndata_labels]
    only_in_nomenclature = [x for x in nomenclature_labels if x not in anndata_labels]
    only_in_anndata = [x for x in anndata_labels if x not in nomenclature_labels]
    
    print("Matching label count: " + str(len(matching)))
    print("Labels only in nomenclature: " + str(len(only_in_nomenclature)))
    print("Samples: " + str(only_in_nomenclature))
    print("Labels only in anndata: " + str(len(only_in_anndata)))
    print("Samples: " + str(only_in_anndata))
    print("")

Analyzing type: Class
Matching label count: 3
Labels only in nomenclature: 1
Samples: ['non-neural']
Labels only in anndata: 0
Samples: []

Analyzing type: Subclass
Matching label count: 15
Labels only in nomenclature: 3
Samples: ['meis2', 'l4', 'micro-pvm']
Labels only in anndata: 7
Samples: ['cr', 'smc', 'macrophage', 'prog/ip', 'opc', 'endo', 'peri']

Analyzing type: Cell Type
Matching label count: 55
Labels only in nomenclature: 55
Samples: ['l5 it_1', 'l6b shisa6_1', 'sncg calb1_2', 'sst penk', 'pvm_1', 'vip c1ql1', 'lamp5 pdlim5_2', 'vip sncg', 'l6 it_2', 'sst etv1', 'pvalb kank4', 'vip mybpc1_2', 'sst th_1', 'l6b shisa6_2', 'sst th_3', 'l6b col6a1', 'l4/5 it_1', 'sst pvalb etv1', 'vip serpinf1_3', 'sst htr1a', 'l5 it_3', 'pvalb calb1_2', 'pvalb vipr2_1', 'vip mybpc1_3', 'sst pvalb calb2', 'l6 ct kit_2', 'l4/5 it_2', 'micro', 'l6 ct grp', 'sst crhr2_2', 'l6 ct kit_1', 'sncg calb1_1', 'pvalb calb1_1', 'sst crhr2_1', 'pvalb vipr2_2', 'meis2 (cluster)', 'l5 it_2', 'sst pappa', 'pvm_