Skip to content
Permalink
master
Switch branches/tags
Go to file
 
 
Cannot retrieve contributors at this time
#
# Schema definitions for AIRR standards objects
#
Info:
title: AIRR Schema
description: Schema definitions for AIRR standards objects
version: "1.4"
contact:
name: AIRR Community
url: https://github.com/airr-community
license:
name: Creative Commons Attribution 4.0 International
url: https://creativecommons.org/licenses/by/4.0/
# Properties that are based upon an ontology use this
# standard schema definition
Ontology:
discriminator: AIRR
type: object
properties:
id:
type: string
description: CURIE of the concept, encoding the ontology and the local ID
label:
type: string
description: Label of the concept in the respective ontology
CURIEResolution:
-
curie_prefix: NCBITAXON
iri_prefix:
- "http://purl.obolibrary.org/obo/NCBITaxon_"
- "http://purl.bioontology.org/ontology/NCBITAXON/"
-
curie_prefix: NCIT
iri_prefix:
- "http://purl.obolibrary.org/obo/NCIT_"
- "http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#"
-
curie_prefix: UO
iri_prefix:
- "http://purl.obolibrary.org/obo/UO_"
-
curie_prefix: DOID
iri_prefix:
- "http://purl.obolibrary.org/obo/DOID_"
-
curie_prefix: UBERON
iri_prefix:
- "http://purl.obolibrary.org/obo/UBERON_"
-
curie_prefix: CL
iri_prefix:
- "http://purl.obolibrary.org/obo/CL_"
# AIRR specification extensions
#
# The schema definitions for AIRR standards objects is extended to
# provide a number of AIRR specific attributes. This schema definition
# specifies the structure, property names and data types. These
# attributes are attached to an AIRR field with the x-airr property.
Attributes:
discriminator: AIRR
type: object
properties:
miairr:
type: string
description: MiAIRR requirement level.
enum:
- essential
- important
- defined
default: useful
identifier:
type: boolean
description: >
True if the field is an identifier required to link metadata and/or individual
sequence records across objects in the complete AIRR Data Model and ADC API.
default: false
adc-query-support:
type: boolean
description: >
True if an ADC API implementation must support queries on the field.
If false, query support for the field in ADC API implementations is optional.
default: false
nullable:
type: boolean
description: True if the field may have a null value.
default: true
deprecated:
type: boolean
description: True if the field has been deprecated from the schema.
default: false
deprecated-description:
type: string
description: Information regarding the deprecation of the field.
deprecated-replaced-by:
type: array
items:
type: string
description: The deprecated field is replaced by this list of fields.
set:
type: integer
description: MiAIRR set
subset:
type: string
description: MiAIRR subset
name:
type: string
description: MiAIRR name
format:
type: string
description: Field format. If null then assume the full range of the field data type
enum:
- ontology
- controlled vocabulary
- physical quantity
ontology:
type: object
description: Ontology definition for field
properties:
draft:
type: boolean
description: Indicates if ontology definition is a draft
top_node:
type: object
description: >
Concept to use as top node for ontology. Note that this must have the same CURIE namespace
as the actually annotated concept.
properties:
id:
type: string
description: CURIE for the top node term
label:
type: string
description: Ontology name for the top node term
# AIRR Data File
#
# A JSON data file that holds Repertoire metadata, data processing
# analysis objects, or any object in the AIRR Data Model.
#
# It is presumed that the objects gathered together in an AIRR Data File are related
# or relevant to each other, e.g. part of the same study; thus, the ID fields can be
# internally resolved unless the ID contains an external PID. This implies that AIRR
# Data Files cannot be merged simply by concatenating arrays; any merge program
# would need to manage duplicate or conflicting ID values.
# While the properties in an AIRR Data File are not required, if one is provided then
# the value should not be null.
DataFile:
discriminator: AIRR
type: object
properties:
Info:
$ref: '#/InfoObject'
x-airr:
nullable: false
Repertoire:
type: array
description: List of Repertoire metadata
items:
$ref: '#/Repertoire'
x-airr:
nullable: false
RepertoireGroup:
type: array
description: List of Repertoire groups
items:
$ref: '#/RepertoireGroup'
x-airr:
nullable: false
DataProcessing:
type: array
description: List of data processing workflows
items:
$ref: '#/DataProcessing'
x-airr:
nullable: false
Cell:
type: array
description: List of cells
items:
$ref: '#/Cell'
x-airr:
nullable: false
Rearrangement:
type: array
description: List of rearrangement records
items:
$ref: '#/Rearrangement'
x-airr:
nullable: false
Clone:
type: array
description: List of clones
items:
$ref: '#/Clone'
x-airr:
nullable: false
Tree:
type: array
description: List of trees
items:
$ref: '#/Tree'
x-airr:
nullable: false
# AIRR Info object, should be similar to openapi
# should we point to an openapi schema?
InfoObject:
discriminator: AIRR
type: object
description: Provides information about data and API responses.
required:
- title
- version
properties:
title:
type: string
x-airr:
nullable: false
version:
type: string
x-airr:
nullable: false
description:
type: string
contact:
type: object
properties:
name:
type: string
url:
type: string
email:
type: string
license:
type: object
required:
- name
properties:
name:
type: string
x-airr:
nullable: false
url:
type: string
# A time point
TimePoint:
discriminator: AIRR
description: Time point at which an observation or other action was performed.
type: object
properties:
label:
type: string
description: Informative label for the time point
example: Pre-operative sampling of cancer tissue
x-airr:
nullable: true
adc-query-support: true
value:
type: number
description: Value of the time point
example: -5.0
x-airr:
nullable: true
adc-query-support: true
unit:
$ref: '#/Ontology'
description: Unit of the time point
title: Unit of immunization schedule
example:
id: UO:0000033
label: day
x-airr:
nullable: true
adc-query-support: true
format: ontology
ontology:
draft: false
top_node:
id: UO:0000003
label: time unit
#
# Germline gene schema
#
# The GeneDescription object will be introduced at a later point here. Until
# then the term "gene description" below can be considered to be equivalent
# to "gene symbol".
#
# Gene descriptions from the same class organized together as a set
GermlineClassSet:
discriminator: AIRR
type: object
properties:
germline_class_id:
type: string
description: A unique identifier for this Germline Class Set.
germline_class:
type: string
enum:
- IGH
- IGI
- IGK
- IGL
- TRA
- TRB
- TRD
- TRG
example: IGH
x-airr:
nullable: true
adc-query-support: true
format: controlled vocabulary
germline_alleles:
type: array
description: Array of gene descriptions
items:
type: string
description: Gene description for a germline allele. If referring to a known reference sequence in a database the relevant gene/allele nomenclature should be followed (e.g., IGHV4-59*01 if using IMGT/GENE-DB).
x-airr:
nullable: true
adc-query-support: true
germline_process:
type: string
enum:
- genomic_sequencing
- repertoire_sequencing
description: Information on how the germline was acquired. Controlled vocabulary.
title: Germline acquisition process
example: repertoire_sequencing
x-airr:
nullable: true
adc-query-support: true
format: controlled vocabulary
# List of germline class sets used for analysis or to describe
# a subject's genotype
GermlineSet:
discriminator: AIRR
type: object
properties:
germline_set_id:
type: string
description: A unique identifier for this Germline Set.
germline_class_list:
description: List of classes included in this germline set.
type: array
items:
$ref: '#/GermlineClassSet'
MHCGermlineClassSet:
discriminator: AIRR
type: object
properties:
germline_class_id:
type: string
description: A unique identifier for this Germline Set, assumed to be unique in the context of the study.
germline_class:
type: string
enum:
- MHC
example: MHC
x-airr:
nullable: true
adc-query-support: true
format: controlled vocabulary
germline_alleles:
type: array
description: Array of gene descriptions
items:
type: string
description: Gene description for a germline allele. If referring to a known reference sequence in a database the relevant gene/allele nomenclature should be followed (e.g., HLA‐C*07:29).
x-airr:
nullable: true
adc-query-support: true
germline_process:
type: string
enum:
- genomic_sequencing
- repertoire_sequencing
description: Information on how the germline was acquired. Controlled vocabulary.
title: Germline acquisition process
example: repertoire_sequencing
x-airr:
nullable: true
adc-query-support: true
format: controlled vocabulary
# List of germline class sets used for analysis or to describe
# a subject's genotype
MHCGermlineSet:
discriminator: AIRR
type: object
properties:
germline_set_id:
type: string
description: A unique identifier for this Germline Set.
germline_class_list:
description: List of classes included in this germline set.
type: array
items:
$ref: '#/MHCGermlineClassSet'
#
# Repertoire metadata schema
#
# The overall study with a globally unique study_id
Study:
discriminator: AIRR
type: object
required:
- study_id
- study_title
- study_type
- inclusion_exclusion_criteria
- grants
- collected_by
- lab_name
- lab_address
- submitted_by
- pub_ids
- keywords_study
properties:
study_id:
type: string
description: Unique ID assigned by study registry
title: Study ID
example: PRJNA001
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: study
name: Study ID
study_title:
type: string
description: Descriptive study title
title: Study title
example: Effects of sun light exposure of the Treg repertoire
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: study
name: Study title
study_type:
$ref: '#/Ontology'
description: Type of study design
title: Study type
example:
id: NCIT:C15197
label: Case-Control Study
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: study
name: Study type
format: ontology
ontology:
draft: false
top_node:
id: NCIT:C63536
label: Study
study_description:
type: string
description: Generic study description
title: Study description
example: Longer description
x-airr:
nullable: true
name: Study description
adc-query-support: true
inclusion_exclusion_criteria:
type: string
description: List of criteria for inclusion/exclusion for the study
title: Study inclusion/exclusion criteria
example: "Include: Clinical P. falciparum infection; Exclude: Seropositive for HIV"
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: study
name: Study inclusion/exclusion criteria
grants:
type: string
description: Funding agencies and grant numbers
title: Grant funding agency
example: NIH, award number R01GM987654
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: study
name: Grant funding agency
study_contact:
type: string
description: >
Full contact information of the contact persons for this study This should include an e-mail address and a persistent identifier such as an ORCID ID.
title: Contact information (study)
example: Dr. P. Stibbons, p.stibbons@unseenu.edu, https://orcid.org/0000-0002-1825-0097
x-airr:
nullable: true
adc-query-support: true
name: Contact information (study)
collected_by:
type: string
description: >
Full contact information of the data collector, i.e. the person who is legally responsible for
data collection and release. This should include an e-mail address.
title: Contact information (data collection)
example: Dr. P. Stibbons, p.stibbons@unseenu.edu
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: study
name: Contact information (data collection)
lab_name:
type: string
description: Department of data collector
title: Lab name
example: Department for Planar Immunology
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: study
name: Lab name
lab_address:
type: string
description: Institution and institutional address of data collector
title: Lab address
example: School of Medicine, Unseen University, Ankh-Morpork, Disk World
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: study
name: Lab address
submitted_by:
type: string
description: >
Full contact information of the data depositor, i.e. the person submitting the data to a repository.
This is supposed to be a short-lived and technical role until the submission is relased.
title: Contact information (data deposition)
example: Adrian Turnipseed, a.turnipseed@unseenu.edu
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: study
name: Contact information (data deposition)
pub_ids:
type: string
description: Publications describing the rationale and/or outcome of the study
title: Relevant publications
example: "PMID:85642"
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: study
name: Relevant publications
keywords_study:
type: array
items:
type: string
enum:
- contains_ig
- contains_tcr
- contains_single_cell
- contains_paired_chain
description: Keywords describing properties of one or more data sets in a study
title: Keywords for study
example:
- contains_ig
- contains_paired_chain
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: study
name: Keywords for study
format: controlled vocabulary
adc_publish_date:
type: string
format: date-time
description: >
Date the study was first published in the AIRR Data Commons.
title: ADC Publish Date
example: "2021-02-02"
x-airr:
nullable: true
adc-query-support: true
name: ADC Publish Date
adc_update_date:
type: string
format: date-time
description: >
Date the study data was updated in the AIRR Data Commons.
title: ADC Update Date
example: "2021-02-02"
x-airr:
nullable: true
adc-query-support: true
name: ADC Update Date
# 1-to-n relationship between a study and its subjects
# subject_id is unique within a study
Subject:
discriminator: AIRR
type: object
required:
- subject_id
- synthetic
- species
- sex
- age_min
- age_max
- age_unit
- age_event
- ancestry_population
- ethnicity
- race
- strain_name
- linked_subjects
- link_type
properties:
subject_id:
type: string
description: Subject ID assigned by submitter, unique within study
title: Subject ID
example: SUB856413
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: subject
name: Subject ID
synthetic:
type: boolean
description: TRUE for libraries in which the diversity has been synthetically generated (e.g. phage display)
title: Synthetic library
x-airr:
miairr: essential
nullable: false
adc-query-support: true
set: 1
subset: subject
name: Synthetic library
species:
$ref: '#/Ontology'
description: Binomial designation of subject's species
title: Organism
example:
id: NCBITAXON:9606
label: Homo sapiens
x-airr:
miairr: essential
nullable: false
adc-query-support: true
set: 1
subset: subject
name: Organism
format: ontology
ontology:
draft: false
top_node:
id: NCBITAXON:7776
label: Gnathostomata
organism:
$ref: '#/Ontology'
description: Binomial designation of subject's species
x-airr:
deprecated: true
deprecated-description: Field was renamed to species for clarity.
deprecated-replaced-by:
- species
sex:
type: string
enum:
- male
- female
- pooled
- hermaphrodite
- intersex
- "not collected"
- "not applicable"
description: Biological sex of subject
title: Sex
example: female
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: subject
name: Sex
format: controlled vocabulary
age_min:
type: number
description: Specific age or lower boundary of age range.
title: Age minimum
example: 60
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: subject
name: Age minimum
age_max:
type: number
description: >
Upper boundary of age range or equal to age_min for specific age.
This field should only be null if age_min is null.
title: Age maximum
example: 80
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: subject
name: Age maximum
age_unit:
$ref: '#/Ontology'
description: Unit of age range
title: Age unit
example:
id: UO:0000036
label: year
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: subject
name: Age unit
format: ontology
ontology:
draft: false
top_node:
id: UO:0000003
label: time unit
age_event:
type: string
description: >
Event in the study schedule to which `Age` refers. For NCBI BioSample this MUST be `sampling`. For other
implementations submitters need to be aware that there is currently no mechanism to encode to potential
delta between `Age event` and `Sample collection time`, hence the chosen events should be in temporal proximity.
title: Age event
example: enrollment
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: subject
name: Age event
age:
type: string
x-airr:
deprecated: true
deprecated-description: Split into two fields to specify as an age range.
deprecated-replaced-by:
- age_min
- age_max
- age_unit
ancestry_population:
type: string
description: Broad geographic origin of ancestry (continent)
title: Ancestry population
example: list of continents, mixed or unknown
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: subject
name: Ancestry population
ethnicity:
type: string
description: Ethnic group of subject (defined as cultural/language-based membership)
title: Ethnicity
example: English, Kurds, Manchu, Yakuts (and other fields from Wikipedia)
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: subject
name: Ethnicity
race:
type: string
description: Racial group of subject (as defined by NIH)
title: Race
example: White, American Indian or Alaska Native, Black, Asian, Native Hawaiian or Other Pacific Islander, Other
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: subject
name: Race
strain_name:
type: string
description: Non-human designation of the strain or breed of animal used
title: Strain name
example: C57BL/6J
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: subject
name: Strain name
linked_subjects:
type: string
description: Subject ID to which `Relation type` refers
title: Relation to other subjects
example: SUB1355648
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: subject
name: Relation to other subjects
link_type:
type: string
description: Relation between subject and `linked_subjects`, can be genetic or environmental (e.g.exposure)
title: Relation type
example: father, daughter, household
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: subject
name: Relation type
diagnosis:
type: array
description: Diagnosis information for subject
items:
$ref: '#/Diagnosis'
x-airr:
nullable: false
adc-query-support: true
germline:
type: object
description: Germline for this subject, if known, by germline class.
properties:
receptor_germline:
$ref: '#/GermlineSet'
description: Immune receptor germline set for this subject.
mhc_germline:
$ref: '#/MHCGermlineSet'
description: MHC germline set for this subject.
# 1-to-n relationship between a subject and its diagnoses
Diagnosis:
discriminator: AIRR
type: object
required:
- study_group_description
- disease_diagnosis
- disease_length
- disease_stage
- prior_therapies
- immunogen
- intervention
- medical_history
properties:
study_group_description:
type: string
description: Designation of study arm to which the subject is assigned to
title: Study group description
example: control
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: diagnosis and intervention
name: Study group description
disease_diagnosis:
$ref: '#/Ontology'
description: Diagnosis of subject
title: Diagnosis
example:
id: DOID:9538
label: multiple myeloma
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: diagnosis and intervention
name: Diagnosis
format: ontology
ontology:
draft: false
top_node:
id: DOID:4
label: disease
disease_length:
type: string
description: Time duration between initial diagnosis and current intervention
title: Length of disease
example: 23 months
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: diagnosis and intervention
name: Length of disease
format: physical quantity
disease_stage:
type: string
description: Stage of disease at current intervention
title: Disease stage
example: Stage II
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: diagnosis and intervention
name: Disease stage
prior_therapies:
type: string
description: List of all relevant previous therapies applied to subject for treatment of `Diagnosis`
title: Prior therapies for primary disease under study
example: melphalan/prednisone
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: diagnosis and intervention
name: Prior therapies for primary disease under study
immunogen:
type: string
description: Antigen, vaccine or drug applied to subject at this intervention
title: Immunogen/agent
example: bortezomib
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: diagnosis and intervention
name: Immunogen/agent
intervention:
type: string
description: Description of intervention
title: Intervention definition
example: systemic chemotherapy, 6 cycles, 1.25 mg/m2
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: diagnosis and intervention
name: Intervention definition
medical_history:
type: string
description: Medical history of subject that is relevant to assess the course of disease and/or treatment
title: Other relevant medical history
example: MGUS, first diagnosed 5 years prior
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 1
subset: diagnosis and intervention
name: Other relevant medical history
# 1-to-n relationship between a subject and its samples
# sample_id is unique within a study
Sample:
discriminator: AIRR
type: object
required:
- sample_id
- sample_type
- tissue
- anatomic_site
- disease_state_sample
- collection_time_point_relative
- collection_time_point_relative_unit
- collection_time_point_reference
- biomaterial_provider
properties:
sample_id:
type: string
description: Sample ID assigned by submitter, unique within study
title: Biological sample ID
example: SUP52415
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 2
subset: sample
name: Biological sample ID
sample_type:
type: string
description: The way the sample was obtained, e.g. fine-needle aspirate, organ harvest, peripheral venous puncture
title: Sample type
example: Biopsy
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 2
subset: sample
name: Sample type
tissue:
$ref: '#/Ontology'
description: The actual tissue sampled, e.g. lymph node, liver, peripheral blood
title: Tissue
example:
id: UBERON:0002371
label: bone marrow
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 2
subset: sample
name: Tissue
format: ontology
ontology:
draft: false
top_node:
id: UBERON:0010000
label: multicellular anatomical structure
anatomic_site:
type: string
description: The anatomic location of the tissue, e.g. Inguinal, femur
title: Anatomic site
example: Iliac crest
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 2
subset: sample
name: Anatomic site
disease_state_sample:
type: string
description: Histopathologic evaluation of the sample
title: Disease state of sample
example: Tumor infiltration
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 2
subset: sample
name: Disease state of sample
collection_time_point_relative:
type: number
description: Time point at which sample was taken, relative to `Collection time event`
title: Sample collection time
example: "14"
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 2
subset: sample
name: Sample collection time
collection_time_point_relative_unit:
$ref: '#/Ontology'
description: Unit of Sample collection time
title: Sample collection time unit
example:
id: UO:0000033
label: day
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 2
subset: sample
name: Sample collection time unit
format: ontology
ontology:
draft: false
top_node:
id: UO:0000003
label: time unit
collection_time_point_reference:
type: string
description: Event in the study schedule to which `Sample collection time` relates to
title: Collection time event
example: Primary vaccination
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 2
subset: sample
name: Collection time event
biomaterial_provider:
type: string
description: Name and address of the entity providing the sample
title: Biomaterial provider
example: Tissues-R-Us, Tampa, FL, USA
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 2
subset: sample
name: Biomaterial provider
# 1-to-n relationship between a sample and processing of its cells
CellProcessing:
discriminator: AIRR
type: object
required:
- tissue_processing
- cell_subset
- cell_phenotype
- single_cell
- cell_number
- cells_per_reaction
- cell_storage
- cell_quality
- cell_isolation
- cell_processing_protocol
properties:
tissue_processing:
type: string
description: Enzymatic digestion and/or physical methods used to isolate cells from sample
title: Tissue processing
example: Collagenase A/Dnase I digested, followed by Percoll gradient
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 3
subset: process (cell)
name: Tissue processing
cell_subset:
$ref: '#/Ontology'
description: Commonly-used designation of isolated cell population
title: Cell subset
example:
id: CL:0000972
label: class switched memory B cell
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 3
subset: process (cell)
name: Cell subset
format: ontology
ontology:
draft: false
top_node:
id: CL:0000542
label: lymphocyte
cell_phenotype:
type: string
description: List of cellular markers and their expression levels used to isolate the cell population
title: Cell subset phenotype
example: CD19+ CD38+ CD27+ IgM- IgD-
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 3
subset: process (cell)
name: Cell subset phenotype
cell_species:
$ref: '#/Ontology'
description: >
Binomial designation of the species from which the analyzed cells originate. Typically, this value
should be identical to `species`, if which case it SHOULD NOT be set explicitly. Howver, there are
valid experimental setups in which the two might differ, e.g. chimeric animal models. If set, this
key will overwrite the `species` information for all lower layers of the schema.
title: Cell species
example:
id: NCBITAXON:9606
label: Homo sapiens
x-airr:
miairr: defined
nullable: true
adc-query-support: true
set: 3
subset: process (cell)
name: Cell species
format: ontology
ontology:
draft: false
top_node:
id: NCBITAXON:7776
label: Gnathostomata
single_cell:
type: boolean
description: TRUE if single cells were isolated into separate compartments
title: Single-cell sort
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 3
subset: process (cell)
name: Single-cell sort
cell_number:
type: integer
description: Total number of cells that went into the experiment
title: Number of cells in experiment
example: 1000000
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 3
subset: process (cell)
name: Number of cells in experiment
cells_per_reaction:
type: integer
description: Number of cells for each biological replicate
title: Number of cells per sequencing reaction
example: 50000
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 3
subset: process (cell)
name: Number of cells per sequencing reaction
cell_storage:
type: boolean
description: TRUE if cells were cryo-preserved between isolation and further processing
title: Cell storage
example: TRUE
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 3
subset: process (cell)
name: Cell storage
cell_quality:
type: string
description: Relative amount of viable cells after preparation and (if applicable) thawing
title: Cell quality
example: 90% viability as determined by 7-AAD
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 3
subset: process (cell)
name: Cell quality
cell_isolation:
type: string
description: Description of the procedure used for marker-based isolation or enrich cells
title: Cell isolation / enrichment procedure
example: >
Cells were stained with fluorochrome labeled antibodies and then sorted on a FlowMerlin (CE) cytometer.
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 3
subset: process (cell)
name: Cell isolation / enrichment procedure
cell_processing_protocol:
type: string
description: >
Description of the methods applied to the sample including cell preparation/ isolation/enrichment and
nucleic acid extraction. This should closely mirror the Materials and methods section in the manuscript.
title: Processing protocol
example: Stimulated wih anti-CD3/anti-CD28
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 3
subset: process (cell)
name: Processing protocol
# object for PCR primer targets
PCRTarget:
discriminator: AIRR
type: object
required:
- pcr_target_locus
- forward_pcr_primer_target_location
- reverse_pcr_primer_target_location
properties:
pcr_target_locus:
type: string
enum:
- IGH
- IGI
- IGK
- IGL
- TRA
- TRB
- TRD
- TRG
description: >
Designation of the target locus. Note that this field uses a controlled vocubulary that is meant to
provide a generic classification of the locus, not necessarily the correct designation according to
a specific nomenclature.
title: Target locus for PCR
example: IGK
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 3
subset: process (nucleic acid [pcr])
name: Target locus for PCR
format: controlled vocabulary
forward_pcr_primer_target_location:
type: string
description: Position of the most distal nucleotide templated by the forward primer or primer mix
title: Forward PCR primer target location
example: IGHV, +23
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 3
subset: process (nucleic acid [pcr])
name: Forward PCR primer target location
reverse_pcr_primer_target_location:
type: string
description: Position of the most proximal nucleotide templated by the reverse primer or primer mix
title: Reverse PCR primer target location
example: IGHG, +57
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 3
subset: process (nucleic acid [pcr])
name: Reverse PCR primer target location
# generally, a 1-to-1 relationship between a CellProcessing and processing of its nucleic acid
# but may be 1-to-n for technical replicates.
NucleicAcidProcessing:
discriminator: AIRR
type: object
required:
- template_class
- template_quality
- template_amount
- template_amount_unit
- library_generation_method
- library_generation_protocol
- library_generation_kit_version
- complete_sequences
- physical_linkage
properties:
template_class:
type: string
enum:
- DNA
- RNA
description: >
The class of nucleic acid that was used as primary starting material for the following procedures
title: Target substrate
example: RNA
x-airr:
miairr: essential
nullable: false
adc-query-support: true
set: 3
subset: process (nucleic acid)
name: Target substrate
format: controlled vocabulary
template_quality:
type: string
description: Description and results of the quality control performed on the template material
title: Target substrate quality
example: RIN 9.2
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 3
subset: process (nucleic acid)
name: Target substrate quality
template_amount:
type: number
description: Amount of template that went into the process
title: Template amount
example: 1000
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 3
subset: process (nucleic acid)
name: Template amount
template_amount_unit:
$ref: '#/Ontology'
description: Unit of template amount
title: Template amount time unit
example:
id: UO:0000024
label: nanogram
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 3
subset: process (nucleic acid)
name: Template amount time unit
format: ontology
ontology:
draft: false
top_node:
id: UO:0000002
label: physical quantity
library_generation_method:
type: string
enum:
- "PCR"
- "RT(RHP)+PCR"
- "RT(oligo-dT)+PCR"
- "RT(oligo-dT)+TS+PCR"
- "RT(oligo-dT)+TS(UMI)+PCR"
- "RT(specific)+PCR"
- "RT(specific)+TS+PCR"
- "RT(specific)+TS(UMI)+PCR"
- "RT(specific+UMI)+PCR"
- "RT(specific+UMI)+TS+PCR"
- "RT(specific)+TS"
- "other"
description: Generic type of library generation
title: Library generation method
example: RT(oligo-dT)+TS(UMI)+PCR
x-airr:
miairr: essential
nullable: false
adc-query-support: true
set: 3
subset: process (nucleic acid)
name: Library generation method
format: controlled vocabulary
library_generation_protocol:
type: string
description: Description of processes applied to substrate to obtain a library that is ready for sequencing
title: Library generation protocol
example: cDNA was generated using
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 3
subset: process (nucleic acid)
name: Library generation protocol
library_generation_kit_version:
type: string
description: When using a library generation protocol from a commercial provider, provide the protocol version number
title: Protocol IDs
example: v2.1 (2016-09-15)
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 3
subset: process (nucleic acid)
name: Protocol IDs
pcr_target:
type: array
description: >
If a PCR step was performed that specifically targets the IG/TR loci, the target and primer locations
need to be provided here. This field holds an array of PCRTarget objects, so that multiplex PCR setups
amplifying multiple loci at the same time can be annotated using one record per locus. PCR setups not
targeting any specific locus must not annotate this field but select the appropriate
library_generation_method instead.
items:
$ref: '#/PCRTarget'
x-airr:
nullable: false
adc-query-support: true
complete_sequences:
type: string
enum:
- partial
- complete
- "complete+untemplated"
- mixed
description: >
To be considered `complete`, the procedure used for library construction MUST generate sequences that
1) include the first V gene codon that encodes the mature polypeptide chain (i.e. after the
leader sequence) and 2) include the last complete codon of the J gene (i.e. 1 bp 5' of the J->C
splice site) and 3) provide sequence information for all positions between 1) and 2). To be considered
`complete & untemplated`, the sections of the sequences defined in points 1) to 3) of the previous
sentence MUST be untemplated, i.e. MUST NOT overlap with the primers used in library preparation.
`mixed` should only be used if the procedure used for library construction will likely produce multiple
categories of sequences in the given experiment. It SHOULD NOT be used as a replacement of a NULL value.
title: Complete sequences
example: partial
x-airr:
miairr: essential
nullable: false
adc-query-support: true
set: 3
subset: process (nucleic acid)
name: Complete sequences
format: controlled vocabulary
physical_linkage:
type: string
enum:
- none
- "hetero_head-head"
- "hetero_tail-head"
- "hetero_prelinked"
description: >
In case an experimental setup is used that physically links nucleic acids derived from distinct
`Rearrangements` before library preparation, this field describes the mode of that linkage. All
`hetero_*` terms indicate that in case of paired-read sequencing, the two reads should be expected
to map to distinct IG/TR loci. `*_head-head` refers to techniques that link the 5' ends of transcripts
in a single-cell context. `*_tail-head` refers to techniques that link the 3' end of one transcript to
the 5' end of another one in a single-cell context. This term does not provide any information whether
a continuous reading-frame between the two is generated. `*_prelinked` refers to constructs in which
the linkage was already present on the DNA level (e.g. scFv).
title: Physical linkage of different rearrangements
example: hetero_head-head
x-airr:
miairr: essential
nullable: false
adc-query-support: true
set: 3
subset: process (nucleic acid)
name: Physical linkage of different rearrangements
format: controlled vocabulary
# 1-to-n relationship between a NucleicAcidProcessing and SequencingRun with resultant raw sequence file(s)
SequencingRun:
discriminator: AIRR
type: object
required:
- sequencing_run_id
- total_reads_passing_qc_filter
- sequencing_platform
- sequencing_facility
- sequencing_run_date
- sequencing_kit
properties:
sequencing_run_id:
type: string
description: ID of sequencing run assigned by the sequencing facility
title: Batch number
example: 160101_M01234
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 3
subset: process (sequencing)
name: Batch number
total_reads_passing_qc_filter:
type: integer
description: Number of usable reads for analysis
title: Total reads passing QC filter
example: 10365118
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 3
subset: process (sequencing)
name: Total reads passing QC filter
sequencing_platform:
type: string
description: Designation of sequencing instrument used
title: Sequencing platform
example: Alumina LoSeq 1000
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 3
subset: process (sequencing)
name: Sequencing platform
sequencing_facility:
type: string
description: Name and address of sequencing facility
title: Sequencing facility
example: Seqs-R-Us, Vancouver, BC, Canada
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 3
subset: process (sequencing)
name: Sequencing facility
sequencing_run_date:
type: string
description: Date of sequencing run
title: Date of sequencing run
format: date
example: 2016-12-16
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 3
subset: process (sequencing)
name: Date of sequencing run
sequencing_kit:
type: string
description: Name, manufacturer, order and lot numbers of sequencing kit
title: Sequencing kit
example: "FullSeq 600, Alumina, #M123456C0, 789G1HK"
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 3
subset: process (sequencing)
name: Sequencing kit
sequencing_files:
$ref: '#/RawSequenceData'
description: Set of sequencing files produced by the sequencing run
x-airr:
nullable: false
adc-query-support: true
# Resultant raw sequencing files from a SequencingRun
RawSequenceData:
discriminator: AIRR
type: object
required:
- file_type
- filename
- read_direction
- read_length
- paired_filename
- paired_read_direction
- paired_read_length
properties:
file_type:
type: string
description: File format for the raw reads or sequences
title: Raw sequencing data file type
enum:
- fasta
- fastq
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 4
subset: data (raw reads)
name: Raw sequencing data file type
format: controlled vocabulary
filename:
type: string
description: File name for the raw reads or sequences. The first file in paired-read sequencing.
title: Raw sequencing data file name
example: MS10R-NMonson-C7JR9_S1_R1_001.fastq
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 4
subset: data (raw reads)
name: Raw sequencing data file name
read_direction:
type: string
description: Read direction for the raw reads or sequences. The first file in paired-read sequencing.
title: Read direction
example: forward
enum:
- forward
- reverse
- mixed
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 4
subset: data (raw reads)
name: Read direction
format: controlled vocabulary
read_length:
type: integer
description: Read length in bases for the first file in paired-read sequencing
title: Forward read length
example: 300
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 4
subset: process (sequencing)
name: Forward read length
paired_filename:
type: string
description: File name for the second file in paired-read sequencing
title: Paired raw sequencing data file name
example: MS10R-NMonson-C7JR9_S1_R2_001.fastq
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 4
subset: data (raw reads)
name: Paired raw sequencing data file name
paired_read_direction:
type: string
description: Read direction for the second file in paired-read sequencing
title: Paired read direction
example: reverse
enum:
- forward
- reverse
- mixed
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 4
subset: data (raw reads)
name: Paired read direction
format: controlled vocabulary
paired_read_length:
type: integer
description: Read length in bases for the second file in paired-read sequencing
title: Paired read length
example: 300
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 4
subset: process (sequencing)
name: Paired read length
# 1-to-n relationship between a repertoire and data processing
#
# Set of annotated rearrangement sequences produced by
# data processing upon the raw sequence data for a repertoire.
DataProcessing:
discriminator: AIRR
type: object
required:
- software_versions
- paired_reads_assembly
- quality_thresholds
- primer_match_cutoffs
- collapsing_method
- data_processing_protocols
- germline_database
properties:
data_processing_id:
type: string
description: Identifier for the data processing object.
title: Data processing ID
x-airr:
nullable: true
name: Data processing ID
adc-query-support: true
identifier: true
primary_annotation:
type: boolean
default: false
description: >
If true, indicates this is the primary or default data processing for
the repertoire and its rearrangements. If false, indicates this is a secondary
or additional data processing.
title: Primary annotation
x-airr:
nullable: false
adc-query-support: true
identifier: true
software_versions:
type: string
description: Version number and / or date, include company pipelines
title: Software tools and version numbers
example: IgBLAST 1.6
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 5
subset: process (computational)
name: Software tools and version numbers
paired_reads_assembly:
type: string
description: How paired end reads were assembled into a single receptor sequence
title: Paired read assembly
example: PandaSeq (minimal overlap 50, threshold 0.8)
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 5
subset: process (computational)
name: Paired read assembly
quality_thresholds:
type: string
description: How sequences were removed from (4) based on base quality scores
title: Quality thresholds
example: Average Phred score >=20
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 5
subset: process (computational)
name: Quality thresholds
primer_match_cutoffs:
type: string
description: How primers were identified in the sequences, were they removed/masked/etc?
title: Primer match cutoffs
example: Hamming distance <= 2
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 5
subset: process (computational)
name: Primer match cutoffs
collapsing_method:
type: string
description: The method used for combining multiple sequences from (4) into a single sequence in (5)
title: Collapsing method
example: MUSCLE 3.8.31
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 5
subset: process (computational)
name: Collapsing method
data_processing_protocols:
type: string
description: General description of how QC is performed
title: Data processing protocols
example: Data was processed using [...]
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 5
subset: process (computational)
name: Data processing protocols
data_processing_files:
type: array
items:
type: string
description: Array of file names for data produced by this data processing.
title: Processed data file names
example:
- 'ERR1278153_aa.txz'
- 'ERR1278153_ab.txz'
- 'ERR1278153_ac.txz'
x-airr:
nullable: true
adc-query-support: true
name: Processed data file names
data_processing_germline:
type: object
description: Germline used for this data processing process
properties:
receptor_germline:
$ref: '#/GermlineSet'
description: Immune receptor germline set for this data processing process.
mhc_germline:
$ref: '#/MHCGermlineSet'
description: MHC germline set for this data processing process.
germline_database:
type: string
description: Source of germline V(D)J genes with version number or date accessed.
title: V(D)J germline reference database
example: ENSEMBL, Homo sapiens build 90, 2017-10-01
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 5
subset: data (processed sequence)
name: V(D)J germline reference database
analysis_provenance_id:
type: string
description: Identifier for machine-readable PROV model of analysis provenance
title: Analysis provenance ID
x-airr:
nullable: true
adc-query-support: true
SampleProcessing:
discriminator: AIRR
type: object
properties:
sample_processing_id:
type: string
description: >
Identifier for the sample processing object. This field should be unique within the repertoire.
This field can be used to uniquely identify the combination of sample, cell processing,
nucleic acid processing and sequencing run information for the repertoire.
title: Sample processing ID
x-airr:
nullable: true
name: Sample processing ID
adc-query-support: true
identifier: true
# The composite schema for the repertoire object
#
# This represents a sample repertoire as defined by the study
# and experimentally observed by raw sequence data. A repertoire
# can only be for one subject but may include multiple samples.
Repertoire:
discriminator: AIRR
type: object
required:
- study
- subject
- sample
- data_processing
properties:
repertoire_id:
type: string
description: >
Identifier for the repertoire object. This identifier should be globally unique so that repertoires
from multiple studies can be combined together without conflict. The repertoire_id is used to link
other AIRR data to a Repertoire. Specifically, the Rearrangements Schema includes repertoire_id for
referencing the specific Repertoire for that Rearrangement.
title: Repertoire ID
x-airr:
nullable: true
adc-query-support: true
identifier: true
repertoire_name:
type: string
description: Short generic display name for the repertoire
title: Repertoire name
x-airr:
nullable: true
name: Repertoire name
adc-query-support: true
repertoire_description:
type: string
description: Generic repertoire description
title: Repertoire description
x-airr:
nullable: true
name: Repertoire description
adc-query-support: true
study:
$ref: '#/Study'
description: Study object
x-airr:
nullable: false
adc-query-support: true
subject:
$ref: '#/Subject'
description: Subject object
x-airr:
nullable: false
adc-query-support: true
sample:
type: array
description: List of Sample objects
items:
allOf:
- $ref: '#/SampleProcessing'
- $ref: '#/Sample'
- $ref: '#/CellProcessing'
- $ref: '#/NucleicAcidProcessing'
- $ref: '#/SequencingRun'
x-airr:
nullable: false
adc-query-support: true
data_processing:
type: array
description: List of Data Processing objects
items:
$ref: '#/DataProcessing'
x-airr:
nullable: false
adc-query-support: true
# A collection of repertoires for analysis purposes, includes optional time course
RepertoireGroup:
discriminator: AIRR
type: object
required:
- repertoire_group_id
- repertoires
properties:
repertoire_group_id:
type: string
description: Identifier for this repertoire group
repertoire_group_name:
type: string
description: Short display name for this repertoire group
repertoire_group_description:
type: string
description: Repertoire group description
repertoires:
type: array
description: >
List of repertoires in this group with an associated description and time point designation
items:
type: object
properties:
repertoire_id:
type: string
description: Identifier to the repertoire
x-airr:
nullable: false
adc-query-support: true
repertoire_description:
type: string
description: Description of this repertoire within the group
x-airr:
nullable: true
adc-query-support: true
time_point:
$ref: '#/TimePoint'
description: Time point designation for this repertoire within the group
x-airr:
nullable: true
adc-query-support: true
Alignment:
discriminator: AIRR
type: object
required:
- sequence_id
- segment
- call
- score
- cigar
properties:
sequence_id:
type: string
description: >
Unique query sequence identifier within the file. Most often this will be the input sequence
header or a substring thereof, but may also be a custom identifier defined by the tool in
cases where query sequences have been combined in some fashion prior to alignment.
segment:
type: string
description: >
The segment for this alignment. One of V, D, J or C.
rev_comp:
type: boolean
description: >
Alignment result is from the reverse complement of the query sequence.
call:
type: string
description: >
Gene assignment with allele.
score:
type: number
description: >
Alignment score.
identity:
type: number
description: >
Alignment fractional identity.
support:
type: number
description: >
Alignment E-value, p-value, likelihood, probability or other similar measure of
support for the gene assignment as defined by the alignment tool.
cigar:
type: string
description: >
Alignment CIGAR string.
sequence_start:
type: integer
description: >
Start position of the segment in the query sequence (1-based closed interval).
sequence_end:
type: integer
description: >
End position of the segment in the query sequence (1-based closed interval).
germline_start:
type: integer
description: >
Alignment start position in the reference sequence (1-based closed interval).
germline_end:
type: integer
description: >
Alignment end position in the reference sequence (1-based closed interval).
rank:
type: integer
description: >
Alignment rank.
rearrangement_id:
type: string
description: >
Identifier for the Rearrangement object. May be identical to sequence_id,
but will usually be a universally unique record locator for database applications.
x-airr:
deprecated: true
deprecated-description: Field has been merged with sequence_id to avoid confusion.
deprecated-replaced-by:
- sequence_id
data_processing_id:
type: string
description: >
Identifier to the data processing object in the repertoire metadata
for this rearrangement. If this field is empty than the primary data processing object is assumed.
germline_database:
type: string
description: Source of germline V(D)J genes with version number or date accessed.
example: ENSEMBL, Homo sapiens build 90, 2017-10-01
x-airr:
deprecated: true
deprecated-description: Field was moved up to the DataProcessing level to avoid data duplication.
deprecated-replaced-by:
- "DataProcessing:germline_database"
# The extended rearrangement object
Rearrangement:
discriminator: AIRR
type: object
required:
- sequence_id
- sequence
- rev_comp
- productive
- v_call
- d_call
- j_call
- sequence_alignment
- germline_alignment
- junction
- junction_aa
- v_cigar
- d_cigar
- j_cigar
properties:
sequence_id:
type: string
description: >
Unique query sequence identifier for the Rearrangement. Most often this will be the input sequence
header or a substring thereof, but may also be a custom identifier defined by the tool in
cases where query sequences have been combined in some fashion prior to alignment. When
downloaded from an AIRR Data Commons repository, this will usually be a universally unique
record locator for linking with other objects in the AIRR Data Model.
x-airr:
adc-query-support: true
identifier: true
sequence:
type: string
description: >
The query nucleotide sequence. Usually, this is the unmodified input sequence, which may be
reverse complemented if necessary. In some cases, this field may contain consensus sequences or
other types of collapsed input sequences if these steps are performed prior to alignment.
quality:
type: string
description: >
The Sanger/Phred quality scores for assessment of sequence quality.
Phred quality scores from 0 to 93 are encoded using ASCII 33 to 126 (Used by Illumina from v1.8.)
sequence_aa:
type: string
description: >
Amino acid translation of the query nucleotide sequence.
rev_comp:
type: boolean
description: >
True if the alignment is on the opposite strand (reverse complemented) with respect to the
query sequence. If True then all output data, such as alignment coordinates and sequences,
are based on the reverse complement of 'sequence'.
productive:
type: boolean
description: >
True if the V(D)J sequence is predicted to be productive.
x-airr:
adc-query-support: true
vj_in_frame:
type: boolean
description: True if the V and J gene alignments are in-frame.
stop_codon:
type: boolean
description: True if the aligned sequence contains a stop codon.
complete_vdj:
type: boolean
description: >
True if the sequence alignment spans the entire V(D)J region. Meaning,
sequence_alignment includes both the first V gene codon that encodes the
mature polypeptide chain (i.e., after the leader sequence) and the last
complete codon of the J gene (i.e., before the J-C splice site).
This does not require an absence of deletions within the internal
FWR and CDR regions of the alignment.
locus:
type: string
enum:
- IGH
- IGI
- IGK
- IGL
- TRA
- TRB
- TRD
- TRG
description: >
Gene locus (chain type). Note that this field uses a controlled vocabulary that is meant to provide a
generic classification of the locus, not necessarily the correct designation according to a specific
nomenclature.
title: Gene locus
example: IGH
x-airr:
nullable: true
adc-query-support: true
name: Gene locus
format: controlled vocabulary
v_call:
type: string
description: >
V gene with allele. If referring to a known reference sequence in a database
the relevant gene/allele nomenclature should be followed (e.g., IGHV4-59*01 if using IMGT/GENE-DB).
title: V gene with allele
example: IGHV4-59*01
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 6
subset: data (processed sequence)
name: V gene with allele
d_call:
type: string
description: >
First or only D gene with allele. If referring to a known reference sequence in a database
the relevant gene/allele nomenclature should be followed (e.g., IGHD3-10*01 if using IMGT/GENE-DB).
title: D gene with allele
example: IGHD3-10*01
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 6
subset: data (processed sequence)
name: D gene with allele
d2_call:
type: string
description: >
Second D gene with allele. If referring to a known reference sequence in a database the relevant
gene/allele nomenclature should be followed (e.g., IGHD3-10*01 if using IMGT/GENE-DB).
example: IGHD3-10*01
j_call:
type: string
description: >
J gene with allele. If referring to a known reference sequence in a database the relevant
gene/allele nomenclature should be followed (e.g., IGHJ4*02 if using IMGT/GENE-DB).
title: J gene with allele
example: IGHJ4*02
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 6
subset: data (processed sequence)
name: J gene with allele
c_call:
type: string
description: >
Constant region gene with allele. If referring to a known reference sequence in a database the
relevant gene/allele nomenclature should be followed (e.g., IGHG1*01 if using IMGT/GENE-DB).
title: C region
example: IGHG1*01
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 6
subset: data (processed sequence)
name: C region
sequence_alignment:
type: string
description: >
Aligned portion of query sequence, including any indel corrections or numbering spacers,
such as IMGT-gaps. Typically, this will include only the V(D)J region, but that is not
a requirement.
quality_alignment:
type: string
description: >
Sanger/Phred quality scores for assessment of sequence_alignment quality.
Phred quality scores from 0 to 93 are encoded using ASCII 33 to 126 (Used by Illumina from v1.8.)
sequence_alignment_aa:
type: string
description: >
Amino acid translation of the aligned query sequence.
germline_alignment:
type: string
description: >
Assembled, aligned, full-length inferred germline sequence spanning the same region
as the sequence_alignment field (typically the V(D)J region) and including the same set
of corrections and spacers (if any).
germline_alignment_aa:
type: string
description: >
Amino acid translation of the assembled germline sequence.
junction:
type: string
description: >
Junction region nucleotide sequence, where the junction is defined as
the CDR3 plus the two flanking conserved codons.
title: IMGT-JUNCTION nucleotide sequence
example: TGTGCAAGAGCGGGAGTTTACGACGGATATACTATGGACTACTGG
x-airr:
miairr: important
nullable: true
set: 6
subset: data (processed sequence)
name: IMGT-JUNCTION nucleotide sequence
junction_aa:
type: string
description: >
Amino acid translation of the junction.
title: IMGT-JUNCTION amino acid sequence
example: CARAGVYDGYTMDYW
x-airr:
miairr: important
nullable: true
adc-query-support: true
set: 6
subset: data (processed sequence)
name: IMGT-JUNCTION amino acid sequence
np1:
type: string
description: >
Nucleotide sequence of the combined N/P region between the V gene and
first D gene alignment or between the V gene and J gene alignments.
np1_aa:
type: string
description: >
Amino acid translation of the np1 field.
np2:
type: string
description: >
Nucleotide sequence of the combined N/P region between either the first D gene and J gene
alignments or the first D gene and second D gene alignments.
np2_aa:
type: string
description: >
Amino acid translation of the np2 field.
np3:
type: string
description: >
Nucleotide sequence of the combined N/P region between the second D gene
and J gene alignments.
np3_aa:
type: string
description: >
Amino acid translation of the np3 field.
cdr1:
type: string
description: >
Nucleotide sequence of the aligned CDR1 region.
cdr1_aa:
type: string
description: >
Amino acid translation of the cdr1 field.
cdr2:
type: string
description: >
Nucleotide sequence of the aligned CDR2 region.
cdr2_aa:
type: string
description: >
Amino acid translation of the cdr2 field.
cdr3:
type: string
description: >
Nucleotide sequence of the aligned CDR3 region.
cdr3_aa:
type: string
description: >
Amino acid translation of the cdr3 field.
fwr1:
type: string
description: >
Nucleotide sequence of the aligned FWR1 region.
fwr1_aa:
type: string
description: >
Amino acid translation of the fwr1 field.
fwr2:
type: string
description: >
Nucleotide sequence of the aligned FWR2 region.
fwr2_aa:
type: string
description: >
Amino acid translation of the fwr2 field.
fwr3:
type: string
description: >
Nucleotide sequence of the aligned FWR3 region.
fwr3_aa:
type: string
description: >
Amino acid translation of the fwr3 field.
fwr4:
type: string
description: >
Nucleotide sequence of the aligned FWR4 region.
fwr4_aa:
type: string
description: >
Amino acid translation of the fwr4 field.
v_score:
type: number
description: Alignment score for the V gene.
v_identity:
type: number
description: Fractional identity for the V gene alignment.
v_support:
type: number
description: >
V gene alignment E-value, p-value, likelihood, probability or other similar measure of
support for the V gene assignment as defined by the alignment tool.
v_cigar:
type: string
description: CIGAR string for the V gene alignment.
d_score:
type: number
description: Alignment score for the first or only D gene alignment.
d_identity:
type: number
description: Fractional identity for the first or only D gene alignment.
d_support:
type: number
description: >
D gene alignment E-value, p-value, likelihood, probability or other similar measure of
support for the first or only D gene as defined by the alignment tool.
d_cigar:
type: string
description: CIGAR string for the first or only D gene alignment.
d2_score:
type: number
description: Alignment score for the second D gene alignment.
d2_identity:
type: number
description: Fractional identity for the second D gene alignment.
d2_support:
type: number
description: >
D gene alignment E-value, p-value, likelihood, probability or other similar measure of
support for the second D gene as defined by the alignment tool.
d2_cigar:
type: string
description: CIGAR string for the second D gene alignment.
j_score:
type: number
description: Alignment score for the J gene alignment.
j_identity:
type: number
description: Fractional identity for the J gene alignment.
j_support:
type: number
description: >
J gene alignment E-value, p-value, likelihood, probability or other similar measure of
support for the J gene assignment as defined by the alignment tool.
j_cigar:
type: string
description: CIGAR string for the J gene alignment.
c_score:
type: number
description: Alignment score for the C gene alignment.
c_identity:
type: number
description: Fractional identity for the C gene alignment.
c_support:
type: number
description: >
C gene alignment E-value, p-value, likelihood, probability or other similar measure of
support for the C gene assignment as defined by the alignment tool.
c_cigar:
type: string
description: CIGAR string for the C gene alignment.
v_sequence_start:
type: integer
description: >
Start position of the V gene in the query sequence (1-based closed interval).
v_sequence_end:
type: integer
description: >
End position of the V gene in the query sequence (1-based closed interval).
v_germline_start:
type: integer
description: >
Alignment start position in the V gene reference sequence (1-based closed interval).
v_germline_end:
type: integer
description: >
Alignment end position in the V gene reference sequence (1-based closed interval).
v_alignment_start:
type: integer
description: >
Start position of the V gene alignment in both the sequence_alignment and germline_alignment
fields (1-based closed interval).
v_alignment_end:
type: integer
description: >
End position of the V gene alignment in both the sequence_alignment and germline_alignment
fields (1-based closed interval).
d_sequence_start:
type: integer
description: >
Start position of the first or only D gene in the query sequence.
(1-based closed interval).
d_sequence_end:
type: integer
description: >
End position of the first or only D gene in the query sequence.
(1-based closed interval).
d_germline_start:
type: integer
description: >
Alignment start position in the D gene reference sequence for the first or only
D gene (1-based closed interval).
d_germline_end:
type: integer
description: >
Alignment end position in the D gene reference sequence for the first or only
D gene (1-based closed interval).
d_alignment_start:
type: integer
description: >
Start position of the first or only D gene in both the sequence_alignment
and germline_alignment fields (1-based closed interval).
d_alignment_end:
type: integer
description: >
End position of the first or only D gene in both the sequence_alignment
and germline_alignment fields (1-based closed interval).
d2_sequence_start:
type: integer
description: >
Start position of the second D gene in the query sequence (1-based closed interval).
d2_sequence_end:
type: integer
description: >
End position of the second D gene in the query sequence (1-based closed interval).
d2_germline_start:
type: integer
description: >
Alignment start position in the second D gene reference sequence (1-based closed interval).
d2_germline_end:
type: integer
description: >
Alignment end position in the second D gene reference sequence (1-based closed interval).
d2_alignment_start:
type: integer
description: >
Start position of the second D gene alignment in both the sequence_alignment and
germline_alignment fields (1-based closed interval).
d2_alignment_end:
type: integer
description: >
End position of the second D gene alignment in both the sequence_alignment and
germline_alignment fields (1-based closed interval).
j_sequence_start:
type: integer
description: >
Start position of the J gene in the query sequence (1-based closed interval).
j_sequence_end:
type: integer
description: >
End position of the J gene in the query sequence (1-based closed interval).
j_germline_start:
type: integer
description: >
Alignment start position in the J gene reference sequence (1-based closed interval).
j_germline_end:
type: integer
description: >
Alignment end position in the J gene reference sequence (1-based closed interval).
j_alignment_start:
type: integer
description: >
Start position of the J gene alignment in both the sequence_alignment and germline_alignment
fields (1-based closed interval).
j_alignment_end:
type: integer
description: >
End position of the J gene alignment in both the sequence_alignment and germline_alignment
fields (1-based closed interval).
c_sequence_start:
type: integer
description: >
Start position of the C gene in the query sequence (1-based closed interval).
c_sequence_end:
type: integer
description: >
End position of the C gene in the query sequence (1-based closed interval).
c_germline_start:
type: integer
description: >
Alignment start position in the C gene reference sequence (1-based closed interval).
c_germline_end:
type: integer
description: >
Alignment end position in the C gene reference sequence (1-based closed interval).
c_alignment_start:
type: integer
description: >
Start position of the C gene alignment in both the sequence_alignment and germline_alignment
fields (1-based closed interval).
c_alignment_end:
type: integer
description: >
End position of the C gene alignment in both the sequence_alignment and germline_alignment
fields (1-based closed interval).
cdr1_start:
type: integer
description: CDR1 start position in the query sequence (1-based closed interval).
cdr1_end:
type: integer
description: CDR1 end position in the query sequence (1-based closed interval).
cdr2_start:
type: integer
description: CDR2 start position in the query sequence (1-based closed interval).
cdr2_end:
type: integer
description: CDR2 end position in the query sequence (1-based closed interval).
cdr3_start:
type: integer
description: CDR3 start position in the query sequence (1-based closed interval).
cdr3_end:
type: integer
description: CDR3 end position in the query sequence (1-based closed interval).
fwr1_start:
type: integer
description: FWR1 start position in the query sequence (1-based closed interval).
fwr1_end:
type: integer
description: FWR1 end position in the query sequence (1-based closed interval).
fwr2_start:
type: integer
description: FWR2 start position in the query sequence (1-based closed interval).
fwr2_end:
type: integer
description: FWR2 end position in the query sequence (1-based closed interval).
fwr3_start:
type: integer
description: FWR3 start position in the query sequence (1-based closed interval).
fwr3_end:
type: integer
description: FWR3 end position in the query sequence (1-based closed interval).
fwr4_start:
type: integer
description: FWR4 start position in the query sequence (1-based closed interval).
fwr4_end:
type: integer
description: FWR4 end position in the query sequence (1-based closed interval).
v_sequence_alignment:
type: string
description: >
Aligned portion of query sequence assigned to the V gene, including any
indel corrections or numbering spacers.
v_sequence_alignment_aa:
type: string
description: >
Amino acid translation of the v_sequence_alignment field.
d_sequence_alignment:
type: string
description: >
Aligned portion of query sequence assigned to the first or only D gene, including any
indel corrections or numbering spacers.
d_sequence_alignment_aa:
type: string
description: >
Amino acid translation of the d_sequence_alignment field.
d2_sequence_alignment:
type: string
description: >
Aligned portion of query sequence assigned to the second D gene, including any
indel corrections or numbering spacers.
d2_sequence_alignment_aa:
type: string
description: >
Amino acid translation of the d2_sequence_alignment field.
j_sequence_alignment:
type: string
description: >
Aligned portion of query sequence assigned to the J gene, including any
indel corrections or numbering spacers.
j_sequence_alignment_aa:
type: string
description: >
Amino acid translation of the j_sequence_alignment field.
c_sequence_alignment:
type: string
description: >
Aligned portion of query sequence assigned to the constant region, including
any indel corrections or numbering spacers.
c_sequence_alignment_aa:
type: string
description: >
Amino acid translation of the c_sequence_alignment field.
v_germline_alignment:
type: string
description: >
Aligned V gene germline sequence spanning the same region
as the v_sequence_alignment field and including the same set
of corrections and spacers (if any).
v_germline_alignment_aa:
type: string
description: >
Amino acid translation of the v_germline_alignment field.
d_germline_alignment:
type: string
description: >
Aligned D gene germline sequence spanning the same region
as the d_sequence_alignment field and including the same set
of corrections and spacers (if any).
d_germline_alignment_aa:
type: string
description: >
Amino acid translation of the d_germline_alignment field.
d2_germline_alignment:
type: string
description: >
Aligned D gene germline sequence spanning the same region
as the d2_sequence_alignment field and including the same set
of corrections and spacers (if any).
d2_germline_alignment_aa:
type: string
description: >
Amino acid translation of the d2_germline_alignment field.
j_germline_alignment:
type: string
description: >
Aligned J gene germline sequence spanning the same region
as the j_sequence_alignment field and including the same set
of corrections and spacers (if any).
j_germline_alignment_aa:
type: string
description: >
Amino acid translation of the j_germline_alignment field.
c_germline_alignment:
type: string
description: >
Aligned constant region germline sequence spanning the same region
as the c_sequence_alignment field and including the same set
of corrections and spacers (if any).
c_germline_alignment_aa:
type: string
description: >
Amino acid translation of the c_germline_aligment field.
junction_length:
type: integer
description: Number of nucleotides in the junction sequence.
junction_aa_length:
type: integer
description: Number of amino acids in the junction sequence.
x-airr:
adc-query-support: true
np1_length:
type: integer
description: >
Number of nucleotides between the V gene and first D gene alignments or
between the V gene and J gene alignments.
np2_length:
type: integer
description: >
Number of nucleotides between either the first D gene and J gene alignments
or the first D gene and second D gene alignments.
np3_length:
type: integer
description: >
Number of nucleotides between the second D gene and J gene alignments.
n1_length:
type: integer
description: Number of untemplated nucleotides 5' of the first or only D gene alignment.
n2_length:
type: integer
description: Number of untemplated nucleotides 3' of the first or only D gene alignment.
n3_length:
type: integer
description: Number of untemplated nucleotides 3' of the second D gene alignment.
p3v_length:
type: integer
description: Number of palindromic nucleotides 3' of the V gene alignment.
p5d_length:
type: integer
description: Number of palindromic nucleotides 5' of the first or only D gene alignment.
p3d_length:
type: integer
description: Number of palindromic nucleotides 3' of the first or only D gene alignment.
p5d2_length:
type: integer
description: Number of palindromic nucleotides 5' of the second D gene alignment.
p3d2_length:
type: integer
description: Number of palindromic nucleotides 3' of the second D gene alignment.
p5j_length:
type: integer
description: Number of palindromic nucleotides 5' of the J gene alignment.
v_frameshift:
type: boolean
description: >
True if the V gene in the query nucleotide sequence contains a translational
frameshift relative to the frame of the V gene reference sequence.
j_frameshift:
type: boolean
description: >
True if the J gene in the query nucleotide sequence contains a translational
frameshift relative to the frame of the J gene reference sequence.
d_frame:
type: integer
description: >
Numerical reading frame (1, 2, 3) of the first or only D gene in the query nucleotide sequence,
where frame 1 is relative to the first codon of D gene reference sequence.
d2_frame:
type: integer
description: >
Numerical reading frame (1, 2, 3) of the second D gene in the query nucleotide sequence,
where frame 1 is relative to the first codon of D gene reference sequence.
consensus_count:
type: integer
description: >
Number of reads contributing to the (UMI) consensus for this sequence.
For example, the sum of the number of reads for all UMIs that contribute to
the query sequence.
duplicate_count:
type: integer
description: >
Copy number or number of duplicate observations for the query sequence.
For example, the number of UMIs sharing an identical sequence or the number
of identical observations of this sequence absent UMIs.
title: Read count
example: 123
x-airr:
miairr: important
nullable: true
set: 6
subset: data (processed sequence)
name: Read count
cell_id:
type: string
description: >
Identifier defining the cell of origin for the query sequence.
title: Cell index
example: W06_046_091
x-airr:
miairr: important
nullable: true
adc-query-support: true
identifier: true
set: 6
subset: data (processed sequence)
name: Cell index
clone_id:
type: string
description: Clonal cluster assignment for the query sequence.
x-airr:
nullable: true
adc-query-support: true
identifier: true
repertoire_id:
type: string
description: Identifier to the associated repertoire in study metadata.
x-airr:
nullable: true
adc-query-support: true
identifier: true
sample_processing_id:
type: string
description: >
Identifier to the sample processing object in the repertoire metadata
for this rearrangement. If the repertoire has a single sample then
this field may be empty or missing. If the repertoire has multiple samples then
this field may be empty or missing if the sample cannot be differentiated or
the relationship is not maintained by the data processing.
x-airr:
nullable: true
adc-query-support: true
identifier: true
data_processing_id:
type: string
description: >
Identifier to the data processing object in the repertoire metadata
for this rearrangement. If this field is empty than the primary data processing object is assumed.
x-airr:
nullable: true
adc-query-support: true
identifier: true
rearrangement_id:
type: string
description: >
Identifier for the Rearrangement object. May be identical to sequence_id,
but will usually be a universally unique record locator for database applications.
x-airr:
deprecated: true
deprecated-description: Field has been merged with sequence_id to avoid confusion.
deprecated-replaced-by:
- sequence_id
rearrangement_set_id:
type: string
description: >
Identifier for grouping Rearrangement objects.
x-airr:
deprecated: true
deprecated-description: Field has been replaced by other specialized identifiers.
deprecated-replaced-by:
- repertoire_id
- sample_processing_id
- data_processing_id
germline_database:
type: string
description: Source of germline V(D)J genes with version number or date accessed.
example: ENSEMBL, Homo sapiens build 90, 2017-10-01
x-airr:
deprecated: true
deprecated-description: Field was moved up to the DataProcessing level to avoid data duplication.
deprecated-replaced-by:
- "DataProcessing:germline_database"
# A unique inferred clone object that has been constructed within a single data processing
# for a single repertoire and a subset of its sequences and/or rearrangements.
Clone:
discriminator: AIRR
type: object
required:
- clone_id
- germline_alignment
properties:
clone_id:
type: string
description: Identifier for the clone.
repertoire_id:
type: string
description: Identifier to the associated repertoire in study metadata.
x-airr:
nullable: true
adc-query-support: true
data_processing_id:
type: string
description: Identifier of the data processing object in the repertoire metadata for this clone.
x-airr:
nullable: true
adc-query-support: true
sequences:
type: array
items:
type: string
description: >
List sequence_id strings that act as keys to the Rearrangement records for members of the clone.
v_call:
type: string
description: >
V gene with allele of the inferred ancestral of the clone. For example, IGHV4-59*01.
example: IGHV4-59*01
d_call:
type: string
description: >
D gene with allele of the inferred ancestor of the clone. For example, IGHD3-10*01.
example: IGHD3-10*01
j_call:
type: string
description: >
J gene with allele of the inferred ancestor of the clone. For example, IGHJ4*02.
example: IGHJ4*02
junction:
type: string
description: >
Nucleotide sequence for the junction region of the inferred ancestor of the clone,
where the junction is defined as the CDR3 plus the two flanking conserved codons.
junction_aa:
type: string
description: >
Amino acid translation of the junction.
junction_length:
type: integer
description: Number of nucleotides in the junction.
junction_aa_length:
type: integer
description: Number of amino acids in junction_aa.
germline_alignment:
type: string
description: >
Assembled, aligned, full-length inferred ancestor of the clone spanning the same region
as the sequence_alignment field of nodes (typically the V(D)J region) and including the
same set of corrections and spacers (if any).
germline_alignment_aa:
type: string
description: >
Amino acid translation of germline_alignment.
v_alignment_start:
type: integer
description: >
Start position in the V gene alignment in both the sequence_alignment and germline_alignment
fields (1-based closed interval).
v_alignment_end:
type: integer
description: >
End position in the V gene alignment in both the sequence_alignment and germline_alignment
fields (1-based closed interval).
d_alignment_start:
type: integer
description: >
Start position of the D gene alignment in both the sequence_alignment and germline_alignment
fields (1-based closed interval).
d_alignment_end:
type: integer
description: >
End position of the D gene alignment in both the sequence_alignment and germline_alignment
fields (1-based closed interval).
j_alignment_start:
type: integer
description: >
Start position of the J gene alignment in both the sequence_alignment and germline_alignment
fields (1-based closed interval).
j_alignment_end:
type: integer
description: >
End position of the J gene alignment in both the sequence_alignment and germline_alignment
fields (1-based closed interval).
junction_start:
type: integer
description: Junction region start position in the alignment (1-based closed interval).
junction_end:
type: integer
description: Junction region end position in the alignment (1-based closed interval).
sequence_count:
type: integer
description: Number of Rearrangement records (sequences) included in this clone.
clone_abundance:
type: integer
description: Non-normalized absolute count of the number of members (immune cells) in this clone.
seed_id:
type: string
description: sequence_id of the seed sequence. Empty string (or null) if there is no seed sequence.
# 1-to-n relationship for a clone to its trees.
Tree:
discriminator: AIRR
type: object
required:
- tree_id
- clone_id
- newick
properties:
tree_id:
type: string
description: Identifier for the tree.
clone_id:
type: string
description: Identifier for the clone.
newick:
type: string
description: Newick string of the tree edges.
nodes:
type: object
description: Dictionary of nodes in the tree, keyed by sequence_id string
additionalProperties:
$ref: '#/Node'
# 1-to-n relationship between a tree and its nodes
Node:
discriminator: AIRR
type: object
required:
- sequence_id
properties:
sequence_id:
type: string
description: >
Identifier for this node that matches the identifier in the newick string and, where possible,
the sequence_id in the source repertoire.
sequence_alignment:
type: string
description: >
Nucleotide sequence of the node, aligned to the germline_alignment for this clone, including
including any indel corrections or spacers.
junction:
type: string
description: >
Junction region nucleotide sequence for the node, where the junction is defined as
the CDR3 plus the two flanking conserved codons.
junction_aa:
type: string
description: >
Amino acid translation of the junction.
# The cell object acts as point of reference for all data that can be related
# to an individual cell, either by direct observation or inference.
Cell:
discriminator: AIRR
type: object
required:
- cell_id #redefined cell_id > how to centralize it in the yaml
- rearrangements
- repertoire_id
- virtual_pairing
properties:
cell_id:
type: string
description: >
Identifier defining the cell of origin for the query sequence.
title: Cell index
example: W06_046_091
x-airr:
miairr: defined
nullable: false
adc-query-support: true
name: Cell index
rearrangements:
type: array
description: >
Array of sequence identifiers defined for the Rearrangement object
title: Cell-associated rearrangements
items:
type: string
example: [id1, id2] #empty vs NULL?
x-airr:
miairr: defined
nullable: true
adc-query-support: true
name: Cell-associated rearrangements
receptors:
type: array
description: >
Array of receptor identifiers defined for the Receptor object
title: Cell-associated receptors
items:
type: string
example: [id1, id2] #empty vs NULL?
x-airr:
miairr: defined
nullable: true
adc-query-support: true
name: Cell-associated receptors
repertoire_id:
type: string
description: Identifier to the associated repertoire in study metadata.
title: Parental repertoire of cell
x-airr:
miairr: defined
nullable: true
adc-query-support: true
name: Parental repertoire of cell
data_processing_id:
type: string
description: Identifier of the data processing object in the repertoire metadata for this clone.
title: Data processing for cell
x-airr:
miairr: defined
nullable: true
adc-query-support: true
name: Data processing for cell
expression_study_method:
type: string
enum:
- "flow cytometry"
- "single-cell transcriptome"
description: >
keyword describing the methodology used to assess expression. This values for this field MUST come from a controlled vocabulary
x-airr:
miairr: defined
nullable: true
adc-api-optional: true
expression_raw_doi:
type: string
description: >
DOI of raw data set containing the current event
x-airr:
miairr: defined
nullable: true
adc-api-optional: true
expression_index:
type: string
description: >
Index addressing the current event within the raw data set.
x-airr:
miairr: defined
nullable: true
adc-api-optional: true
expression_tabular:
type: array
description: >
Expression definitions for single-cell
items:
type: object
properties:
expression_marker:
type: string
description: >
standardized designation of the transcript or epitope
example: CD27
expression_value:
type: number
description: >
transformed and normalized expression level.
example: 14567.30
virtual_pairing:
type: boolean
description: >
boolean to indicate if pairing was inferred.
title: Virtual pairing
x-airr:
miairr: defined
nullable: true # assuming only done for sc experiments, otherwise does not exist
adc-query-support: true
name: Virtual pairing