From 6e4b86d07376c1d256d76fe3a3fd32f366f5ba43 Mon Sep 17 00:00:00 2001 From: Christian Busse Date: Fri, 15 Feb 2019 19:57:11 +0100 Subject: [PATCH] Include recent MiAIRR refinements The recent changes introduced via the `miairr_refinement` branch (PR #155) will potentially conflict with the changes in CRWG-API. Resolve conflicts. --- AIRR_Minimal_Standard_Data_Elements.tsv | 30 ++++++++++++------------- specs/airr-schema.yaml | 12 +++++----- specs/miairr.yaml | 30 ++++++++++++------------- 3 files changed, 36 insertions(+), 36 deletions(-) diff --git a/AIRR_Minimal_Standard_Data_Elements.tsv b/AIRR_Minimal_Standard_Data_Elements.tsv index d73538d71..d97d8db42 100644 --- a/AIRR_Minimal_Standard_Data_Elements.tsv +++ b/AIRR_Minimal_Standard_Data_Elements.tsv @@ -1,25 +1,25 @@ MiAIRR data set / subset MiAIRR field designation Data type Content format MiAIRR content definition Field value example AIRR Formats WG field name 1 / study Study string Free text Unique ID assigned by study registry PRJNA001 study_id 1 / study Study title string Free text Descriptive study title Effects of sun light exposure of the Treg repertoire study_title -1 / study Study type string Controlled vocabulary Generic study design Placebo controlled phase 3 clinical trial study_description +1 / study Study type string {"ontology": "NCIT", "top_node": "Study Design", "draft": true} Generic study design Case-Control Study study_description 1 / study Study inclusion/exclusion criteria string Free text List of criteria for inclusion/exclusion for the study Include: Clinical P. falciparum infection; Exclude: Seropositive for HIV inclusion_exclusion_criteria 1 / study Grant funding agency string Free text Funding agencies and grant numbers NIH, award number R01GM987654 grants -1 / study Contact information (data collection) string Free text Full contact information of the corresponding author, i.e. who is legally responsible for data collection and release. This should include an e-mail address. p.stibbons@unseenu.edu collected_by -1 / study Lab name string Free text Department of corresponding author Stibbons Lab lab_name -1 / study Lab address string Free text Institutional address of corresponding author School of Medicine, Unseen University, Ankh-Morpork, Disk World lab_address -1 / study Contact information (data deposition) string Free text Full contact information of the submitter, i.e. the person deposition the data Dr. P. Stibbons submitted_by +1 / study Contact information (data collection) string Free text Full contact information of the data collector, i.e. the person who is legally responsible for data collection and release. This should include an e-mail address. Dr. P. Stibbons, p.stibbons@unseenu.edu collected_by +1 / study Lab name string Free text Department of data collector Department for Planar Immunology lab_name +1 / study Lab address string Free text Institution and institutional address of data collector School of Medicine, Unseen University, Ankh-Morpork, Disk World lab_address +1 / study Contact information (data deposition) string Free text Full contact information of the data depositor, i.e. the person submitting the data to a repository. This is supposed to be a short-lived and technical role until the submission is relased. Adrian Turnipseed, a.turnipseed@unseenu.edu submitted_by 1 / study Relevant publications string Valid PubMed ID Publications describing the rationale and/or outcome of the study PMID85642 pub_ids 1 / subject Subject ID string Free text Subject ID assigned by submitter, unique within study SUB856413 subject_id 1 / subject Synthetic library boolean TRUE/FALSE TRUE for libraries in which the diversity has been synthetically generated (e.g. phage display) FALSE synthetic -1 / subject Organism string Controlled vocabulary Binomial designation of subject's species Homo sapiens organism -1 / subject Sex string /(male|female|pooled|hermaphrodite|intersex|not collected|not applicable)/ Biological sex of subject female sex +1 / subject Organism string {"ontology": "NCBITAXON", "top_node": "Gnathostomata", "draft": false} Species of subject (using binomial nomenclature) Homo sapiens organism +1 / subject Sex string {"controlled_vocabulary": ["male", "female", "pooled", "hermaphrodite", "intersex", "not collected", "not applicable"]} Biological sex of subject female sex 1 / subject Age string Time duration and unit Absolute age of subject at time point `Age event` 65 a age -1 / subject Age event string Free text Event in the study schedule to which `Age` refers to. For NCBI BioSample this MUST be `sampling`. For other implementations submitters need to be aware that there is currently no mechanism to encode to potential delta between `Age event` and `Sample collection time`, hence the chosen events should be in temporal proximity. enrollment age_event +1 / subject Age event string Free text Event in the study schedule to which `Age` refers. For NCBI BioSample this MUST be `sampling`. For other implementations submitters need to be aware that there is currently no mechanism to encode to potential delta between `Age event` and `Sample collection time`, hence the chosen events should be in temporal proximity. enrollment age_event 1 / subject Ancestry population string Free text Broad geographic origin of ancestry (continent) list of continents, mixed or unknown ancestry_population 1 / subject Ethnicity string Free text Ethnic group of subject (defined as cultural/language-based membership) English, Kurds, Manchu, Yakuts (and other fields from Wikipedia) ethnicity 1 / subject Race string Free text Racial group of subject (as defined by NIH) White, American Indian or Alaska Native, Black, Asian, Native Hawaiian or Other Pacific Islander, Other race 1 / subject Strain name string Free text Non-human: designation of the strain or breed of animal used C57BL/6J strain_name -1 / subject Relation to other subjects string Free text Subject ID to which `Relation type` refers to SUB1355648 linked_subjects +1 / subject Relation to other subjects string Free text Subject ID to which `Relation type` refers SUB1355648 linked_subjects 1 / subject Relation type string Free text Relation between subject and `linked_subjects`, can be genetic or environmental (e.g.exposure) father, daughter, household link_type 1 / diag. & intervent. Study group description string Free text Designation of study arm to which the subject is assigned to control study_group_description 1 / diag. & intervent. Diagnosis string Free text Diagnosis of subject Multiple myeloma disease_diagnosis @@ -38,26 +38,26 @@ MiAIRR data set / subset MiAIRR field designation Data type Content format MiAIR 2 / sample Collection time event string Free text Event in the study schedule to which `Sample collection time` relates to Primary vaccination collection_time_point_reference 2 / sample Biomaterial provider string Free text Name and address of the entity providing the sample Tissues-R-Us, Tampa, FL, USA biomaterial_provider 3 / process (cell) Tissue processing string Free text Enzymatic digestion and/or physical methods used to isolate cells from sample Collagenase A/Dnase I digested, followed by Percoll gradient tissue_processing -3 / process (cell) Cell subset string Controlled vocabulary Commonly-used designation of isolated cell population Class-switched Memory B cells cell_subset +3 / process (cell) Cell subset string {"ontology": "CL", "top_node": "lymphocyte", "draft": true} Commonly-used designation of isolated cell population class switched memory B cell cell_subset 3 / process (cell) Cell subset phenotype string Free text List of cellular markers and their expression levels used to isolate the cell population CD19+ CD38+ CD27+ IgM- IgD- cell_phenotype 3 / process (cell) Single-cell sort boolean TRUE/FALSE TRUE if single cells were isolated into separate compartments FALSE single_cell 3 / process (cell) Number of cells in experiment integer Number Total number of cells that went into the experiment 1000000 cell_number -3 / process (cell) Number of cells per sequencing reaction integer Number Number of cells for each biological repicate 50000 cells_per_reaction +3 / process (cell) Number of cells per sequencing reaction integer Number Number of cells for each biological replicate 50000 cells_per_reaction 3 / process (cell) Cell storage boolean TRUE/FALSE TRUE if cells were cryo-preserved between isolation and further processing TRUE cell_storage 3 / process (cell) Cell quality string Free text Relative amount of viable cells after preparation and (if applicable) thawing 90% viability as determined by 7-AAD cell_quality 3 / process (cell) Cell isolation / enrichment procedure string Free text Description of the procedure used for marker-based isolation or enrich cells Cells were stained with fluorochrome labeled antibodies and then sorted on a FlowMerlin (CE) cytometer cell_isolation 3 / process (cell) Processing protocol string Free text Description of the methods applied to the sample including cell preparation/ isolation/enrichment and nucleic acid extraction. This should closely mirror the Materials and methods section in the manuscript Stimulated wih anti-CD3/anti-CD28 cell_processing_protocol -3 / process (nucl. acid) Target substrate string /(DNA|RNA)/ The class of nucleic acid that was used as primary starting material for the following procedures RNA template_class +3 / process (nucl. acid) Target substrate string {"controlled_vocabulary": ["DNA", "RNA"]} The class of nucleic acid that was used as primary starting material for the following procedures RNA template_class 3 / process (nucl. acid) Target substrate quality string Free text Description and results of the quality control performed on the template material RIN 9.2 template_quality 3 / process (nucl. acid) Template amount string Free text Amount of template that went into the process 1000 ng template_amount -3 / process (nucl. acid) Library generation method string Controlled vocabulary Generic type of library generation Oligo-dT primed 5' RACE library_generation_method +3 / process (nucl. acid) Library generation method string {"controlled_vocabulary": ["PCR", "RT(RHP)+PCR", "RT(oligo-dT)+PCR", "RT(oligo-dT)+TS+PCR", "RT(oligo-dT)+TS(UMI)+PCR", "RT(specific)+PCR", "RT(specific)+TS+PCR", "RT(specific)+TS(UMI)+PCR", "RT(specific+UMI)+PCR", "RT(specific+UMI)+TS+PCR", "RT(specific)+TS", "other"]} Generic type of library generation RT(oligo-dT)+PCR library_generation_method 3 / process (nucl. acid) Library generation protocol string Free text Description of processes applied to substrate to obtain a library that is ready for sequencing cDNA was generated using library_generation_protocol 3 / process (nucl. acid) Protocol IDs string Free text When using a library generation protocol from a commercial provider, provide the protocol version number v2.1 (2016-09-15) library_generation_kit_version 3 / process (nucl. acid [pcr]) Target locus for PCR string Free text Designation of the target locus according to standard gene nomencleature Constant region vs. V region amplification pcr_target_locus 3 / process (nucl. acid [pcr]) Forward PCR primer target location string Free text Position of the most distal nucleotide templated by the forward primer or primer mix IGHV, +23 forward_pcr_primer_target_location 3 / process (nucl. acid [pcr]) Reverse PCR primer target location string Free text Position of the most proximal nucleotide templated by the reverse primer or primer mix IGHG, +57 reverse_pcr_primer_target_location -3 / process (nucl. acid) Complete sequences string /(partial|complete|complete+untemplated)/ To be considered `complete`, the procedure used for library construction MUST generate sequences that 1) include the first V segment codon that encodes the mature polypeptide chain (i.e. after the leader sequence) and 2) include the last complete codon of the J segment (i.e. 1 bp 5' of the J->C splice site) and 3) provide sequence information for all positions between 1) and 2). To be considered `complete & untemplated`, the sections of the sequences defined in points 1) to 3) of the previous sentence MUST be untemplated, i.e. MUST NOT overlap with the primers used in library preparation. partial complete_sequences -3 / process (nucl. acid) Physical linkage of different loci string Controlled vocabulary Describes the mode of linkage if a method was used which physically links nucleic acids derived from distinct loci in a single-cell context IGH-IGK/IGL-head/head physical_linkage +3 / process (nucl. acid) Complete sequences string {"controlled_vocabulary": ["partial", "complete", "complete+untemplated"]} To be considered `complete`, the procedure used for library construction MUST generate sequences that 1) include the first V segment codon that encodes the mature polypeptide chain (i.e. after the leader sequence) and 2) include the last complete codon of the J segment (i.e. 1 bp before the J->C splice site) and 3) provide sequence information for all positions between 1) and 2). To be considered `complete & untemplated`, the sections of the sequences defined in points 1) to 3) of the previous sentence MUST be untemplated, i.e. MUST NOT overlap with the primers used in library preparation. partial complete_sequences +3 / process (nucl. acid) Physical linkage of different loci string {"controlled_vocabulary": ["none", "hetero_head-head"]} Describes the mode of linkage if a method was used which physically links nucleic acids derived from distinct loci in a single-cell context IGH-IGK/IGL-head/head physical_linkage 3 / process (sequencing) Total reads passing QC filter integer Number Number of usable reads for analysis 10365118 total_reads_passing_qc_filter 3 / process (sequencing) Sequencing platform string Free text Designation of sequencing instrument used Alumina LoSeq 1000 sequencing_platform 3 / process (sequencing) Read lengths integer Array of numbers Read length in bases for each direction [300,300] read_length diff --git a/specs/airr-schema.yaml b/specs/airr-schema.yaml index 2d4680699..d8f7c6b05 100644 --- a/specs/airr-schema.yaml +++ b/specs/airr-schema.yaml @@ -39,19 +39,19 @@ Study: x-miairr: true collected_by: type: string - description: Full contact information of the corresponding author, i.e. who is legally responsible for data collection and release. This should include an e-mail address. + description: Full contact information of the data collector, i.e. the person who is legally responsible for data collection and release. This should include an e-mail address. x-miairr: true lab_name: type: string - description: Department of corresponding author + description: Department of data collector x-miairr: true lab_address: type: string - description: Institutional address of corresponding author + description: Institution and institutional address of data collector x-miairr: true submitted_by: type: string - description: Full contact information of the submitter, i.e. the person deposition the data + description: Full contact information of the data depositor, i.e. the person submitting the data to a repository. This is supposed to be a short-lived and technical role until the submission is relased. x-miairr: true pub_ids: type: string @@ -94,7 +94,7 @@ Subject: x-miairr: true age_event: type: string - description: Event in the study schedule to which `Age` refers to. For NCBI BioSample this MUST be `sampling`. For other implementations submitters need to be aware that there is currently no mechanism to encode to potential delta between `Age event` and `Sample collection time`, hence the chosen events should be in temporal proximity. + description: Event in the study schedule to which `Age` refers. For NCBI BioSample this MUST be `sampling`. For other implementations submitters need to be aware that there is currently no mechanism to encode to potential delta between `Age event` and `Sample collection time`, hence the chosen events should be in temporal proximity. x-miairr: true ancestry_population: type: string @@ -114,7 +114,7 @@ Subject: x-miairr: true linked_subjects: type: string - description: Subject ID to which `Relation type` refers to + description: Subject ID to which `Relation type` refers x-miairr: true link_type: type: string diff --git a/specs/miairr.yaml b/specs/miairr.yaml index cc1bfaa58..809ec13d8 100644 --- a/specs/miairr.yaml +++ b/specs/miairr.yaml @@ -14,8 +14,8 @@ study_description: miairr_set: 1 miairr_subset: study miairr_name: Study type - format: controlled vocabulary - example: Placebo controlled phase 3 clinical trial + format: {"ontology": "NCIT", "top_node": "Study Design", "draft": true} + example: Case-Control Study inclusion_exclusion_criteria: miairr_set: 1 miairr_subset: study @@ -33,13 +33,13 @@ collected_by: miairr_subset: study miairr_name: Contact information (data collection) format: free text - example: p.stibbons@unseenu.edu + example: Dr. P. Stibbons, p.stibbons@unseenu.edu lab_name: miairr_set: 1 miairr_subset: study miairr_name: Lab name format: free text - example: Department of Planar Immunology + example: Department for Planar Immunology lab_address: miairr_set: 1 miairr_subset: study @@ -51,7 +51,7 @@ submitted_by: miairr_subset: study miairr_name: Contact information (data deposition) format: free text - example: Dr. P. Stibbons + example: Adrian Turnipseed, a.turnipseed@unseenu.edu pub_ids: miairr_set: 1 miairr_subset: study @@ -74,13 +74,13 @@ organism: miairr_set: 1 miairr_subset: subject miairr_name: Organism - format: controlled vocabulary + format: {"ontology": "NCBITAXON", "top_node": "Gnathostomata", "draft": false} example: Homo sapiens sex: miairr_set: 1 miairr_subset: subject miairr_name: Sex - format: /(male|female|pooled|hermaphrodite|intersex|not collected|not applicable)/ + format: {"controlled_vocabulary": ["male", "female", "pooled", "hermaphrodite", "intersex", "not collected", "not applicable"]} example: female age: miairr_set: 1 @@ -236,8 +236,8 @@ cell_subset: miairr_set: 3 miairr_subset: process (cell) miairr_name: Cell subset - format: controlled vocabulary - example: Class-switched Memory B cells + format: {"ontology": "CL", "top_node": "lymphocyte", "draft": true} + example: class switched memory B cell cell_phenotype: miairr_set: 3 miairr_subset: process (cell) @@ -290,7 +290,7 @@ template_class: miairr_set: 3 miairr_subset: process (nucleic acid) miairr_name: Target substrate - format: /(DNA|RNA)/ + format: {"controlled_vocabulary": ["DNA", "RNA"]} example: RNA template_quality: miairr_set: 3 @@ -308,8 +308,8 @@ library_generation_method: miairr_set: 3 miairr_subset: process (nucleic acid) miairr_name: Library generation method - format: controlled vocabulary - example: Oligo-dT primed 5' RACE + format: {"controlled_vocabulary": ["PCR", "RT(RHP)+PCR", "RT(oligo-dT)+PCR", "RT(oligo-dT)+TS+PCR", "RT(oligo-dT)+TS(UMI)+PCR", "RT(specific)+PCR", "RT(specific)+TS+PCR", "RT(specific)+TS(UMI)+PCR", "RT(specific+UMI)+PCR", "RT(specific+UMI)+TS+PCR", "RT(specific)+TS", "other"]} + example: RT(oligo-dT)+TS(UMI)+PCR library_generation_protocol: miairr_set: 3 miairr_subset: process (nucleic acid) @@ -344,14 +344,14 @@ complete_sequences: miairr_set: 3 miairr_subset: process (nucleic acid) miairr_name: Complete sequences - format: /(partial|complete|complete+untemplated)/ + format: {"controlled_vocabulary": ["partial", "complete", "complete+untemplated"]} example: partial physical_linkage: miairr_set: 3 miairr_subset: process (nucleic acid) miairr_name: Physical linkage of different loci - format: controlled vocabulary - example: IGH-IGK/IGL-head/head + format: {"controlled_vocabulary": ["none", "hetero_head-head"]} + example: hetero_head-head total_reads_passing_qc_filter: miairr_set: 3 miairr_subset: process (nucleic acid)