-
Notifications
You must be signed in to change notification settings - Fork 11
/
data_dictionary.json
362 lines (362 loc) · 19.2 KB
/
data_dictionary.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "epiparameter data",
"description": "The database of epidemiological distributions for a variety of infectious diseases and pathogens. Last modified 11/05/2023.",
"type": "array",
"items": {
"type": "object",
"properties": {
"disease": {
"description": "The infectious disease specified in the study.",
"examples": ["ebola", "measles"],
"type": "string"
},
"pathogen": {
"description": "Causative agent of disease specified in study. NA if not specified in study.",
"examples": ["SARS-CoV-2", "monkeypox virus"],
"type": ["string", "null"]
},
"epi_distribution": {
"description": "The name of the epidemiological distribution type. Names should be whitespace separated within each string.",
"examples": ["incubation period", "serial interval", "offspring distribution"],
"type": "string",
"enum": ["incubation period", "onset to hospitalisation", "onset to death", "serial interval", "generation time", "offspring distribution", "hospitalisation to death", "hospitalisation to discharge", "notification to death", "notification to discharge", "onset to discharge", "onset to ventilation"]
},
"probability_distribution": {
"description": "An object containing the name of the probability distribution and its porameters if available.",
"type": "object",
"properties": {
"prob_distribution": {
"description": "The name of the probability distribution, following R language distribution naming. A probability distribution can be null in cases when summary statistics are reported by a distribution is not given.",
"examples": ["lnorm", "gamma"],
"type": ["string", "null"],
"enum": ["lnorm", "gamma", "weibull", "nbinom", "geom", "pois", "norm", null]
},
"parameters": {
"type": "object",
"properties": {
"shape": {
"description": "The shape parameter of either the gamma or Weibull distribution.",
"examples": [2.0, 4.5],
"type": "number"
},
"shape_ci_limits": {
"description": "The confidence interval of the distribution's shape parameter, specified by two numbers in an array.",
"examples": [[0.3, 3.2],[0.5, 0.7]],
"type": "array",
"items": {
"type": "number"
}
},
"shape_ci": {
"description": "The interval of the uncertainty around the shape parameter of the distribution, for example 95% confidence interval would be 95.",
"examples": [95, 90, 80],
"type": "number"
},
"scale": {
"description": "The scale parameter of either the gamma or Weibull distribution.",
"examples": [0.9, 1.1],
"type": "number"
},
"scale_ci_limits": {
"description": "The confidence interval of the distribution's scale parameter, specified by two numbers in an array.",
"examples": [[0.2, 0.5], [1.1, 1.7]],
"type": "array",
"items": {
"type": "number"
}
},
"scale_ci": {
"description": "The interval of the uncertainty around the scale parameter of the distribution, for example 95% confidence interval would be 95.",
"examples": [95, 90, 80],
"type": "number"
},
"meanlog": {
"description": "The meanlog parameter of the lognormal distribution.",
"examples": [4.3, 7],
"type": "number"
},
"meanlog_ci_limits": {
"description": "The confidence interval of the distribution's meanlog parameter, specified by two numbers in an array.",
"examples": [[0.4, 0.9], [1.5, 3.2]],
"type": "array",
"items": {
"type": "number"
}
},
"meanlog_ci": {
"description": "The interval of the uncertainty around the meanlog parameter of the distribution, for example 95% confidence interval would be 95.",
"examples": [95, 90, 80],
"type": "number"
},
"sdlog": {
"description": "The sdlog parameter of the lognormal distribution.",
"examples": [1.2, 1.4],
"type": "number"
},
"sdlog_ci_limits": {
"description": "The confidence interval of the distribution's sdlog parameter, specified by two numbers in an array.",
"examples": [[0.3, 0.8], [1.4, 1.45]],
"type": "array",
"items": {
"type": "number"
}
},
"sdlog_ci": {
"description": "The interval of the uncertainty around the sdlog parameter of the distribution, for example 95% confidence interval would be 95.",
"examples": [95, 90, 80],
"type": ["number", "null"]
},
"dispersion": {
"description": "The dispersion factor of the lognormal distribution. This can be used with the median to calculate the bounds that contain approximately two-thirds of The data.",
"examples": [1.2, 1.4],
"type": "number"
},
"dispersion_ci_limits": {
"description": "The confidence interval of the distribution's dispersion, specified by two numbers in an array.",
"examples": [[0.3, 0.8], [1.4, 1.45]],
"type": ["array", "null"],
"items": {
"type": "number"
}
},
"dispersion_ci": {
"description": "The interval of the uncertainty around the dispersion of the distribution, for example 95% confidence interval would be 95.",
"examples": [95, 90, 80],
"type": "number"
},
"precision": {
"description": "The precision parameter of a distribution. It is common to parameterise a distribution using the precision parameter in Bayesian inference.",
"examples": [1.2, 1.4],
"type": ["number", "null"]
},
"precision_ci_limits": {
"description": "The confidence interval of the distribution's precision parameter, specified by two numbers in an array.",
"examples": [[0.3, 0.8], [1.4, 1.45]],
"type": ["array", "null"],
"items": {
"type": "number"
}
},
"precision_ci": {
"description": "The interval of the uncertainty around the precision parameter, for example 95% confidence interval would be 95.",
"examples": [95, 90, 80],
"type": ["number", "null"]
}
}
}
},
"required": ["prob_distribution"]
},
"summary_statistics": {
"type": "object",
"properties": {
"mean": {
"description": "The mean value (expectation) of the distribution. If the mean is not reported put NA.",
"examples": [5, 3.2],
"type": "number"
},
"mean_ci_limits": {
"description": "The confidence interval of the distribution mean specified by two numbers in an array.",
"examples": [[3.1, 5.5], [2, 4.2]],
"type": "array",
"items": {
"type": "number"
}
},
"mean_ci": {
"description": "The interval of the uncertainty around the mean of the distribution, for example 95% confidence interval would be 95.",
"examples": [95, 90, 80],
"type": ["number", "null"]
},
"sd": {
"description": "The standard deviation of the distribution.",
"examples": [2.3, 0.5],
"type": "number"
},
"sd_ci_limits": {
"description": "The confidence interval of the distribution standard deviation specified by two numbers in an array.",
"examples": [[0.2, 0.9], [0.8, 1.2]],
"type": "array",
"items": {
"type": "number"
}
},
"sd_ci": {
"description": "The interval of the uncertainty around the standard deviation of the distribution, for example 95% confidence interval would be 95.",
"examples": [95, 90, 80],
"type": "number"
},
"median": {
"description": "The 50th quantile (or median) of the distribution.",
"examples": [1.7, 2.5],
"type": ["number", "null"]
},
"median_ci_limits": {
"description": "The confidence interval of the distribution median specified by two numbers in an array.",
"examples": [[2.1, 2.2], [4.5, 5.1]],
"type": "array",
"items": {
"type": "number"
}
},
"median_ci": {
"description": "The interval of the uncertainty around the median of the distribution, for example 95% confidence interval would be 95.",
"examples": [95, 90, 80],
"type": ["number", "null"]
},
"quantile_values": {
"description": "The values at the quantiles of the distribution.",
"examples": [[2.1, 5.5], [4.3, 9.2]],
"type": "array",
"items": {
"type": "number"
}
},
"quantile_names": {
"description": "The names of the quantiles of the distribution, for example 50 is the median and 95 is the 95th quantile of the distribution. These names need to correspond to the quantile values. The first element in quantile_values is named by the first element in quantile_names, etc.",
"examples": [["2.5", "97.5"], ["25", "75"]],
"type": "array",
"items": {
"type": "string"
}
},
"lower_range": {
"description": "The lower bound of the range of the data which is used to infer the parameters of the distribution when they are not specified.",
"examples": [0.2, 0.4],
"type": "number"
},
"upper_range": {
"description": "The upper bound of the range of the data which is used to infer the parameters of the distribution when they are not specified.",
"examples": [4.5, 6.7],
"type": "number"
}
}
},
"citation": {
"type": "object",
"properties": {
"author": {
"type": "array",
"items": {
"type": "object",
"properties": {
"given": {
"description": "A character string with the given name(s) of the author. Middle name, initialised or not, should be included in the given name, not the family name. This is combined with the family name field to create the full name. Give all authors of the publication as separate authors. Following R's person class, give teams, institutes and companies in the given name and not in the family name.",
"examples": ["John", "Amy", "WHO Team"],
"type": "string"
},
"family": {
"description": "A character string with the family name(s) of the author. Middle name, initialised or not, should be included in the given name, not the family name. This is combined with the given name field to create the full name. When organisations or teams are supplied as a given name, the family name is null.",
"examples": ["Smith", "Jones"],
"type": ["string", "null"]
}
},
"required": ["given", "family"]
}
},
"title": {
"description": "The title of the article that published the epidemiological parameters.",
"examples": ["Incubation period of COVID-19", "Serial interval of Ebola"],
"type": "string"
},
"journal": {
"description": "The name of the journal that published the article that published the epidemiological parameters. This can also be a pre-print server, e.g., medRxiv.",
"examples": ["The Lancet", "PLoS One", "medRxiv"],
"type": "string"
},
"year": {
"description": "The year the paper or report was published.",
"examples": ["2019", "2020"],
"type": "number",
"exclusiveMinimum": 0
},
"pmid": {
"description": "A PubMed unique identifier number assigned to papers to give them a unique identifier within PubMed.",
"examples": [18183754, 23749571],
"type": ["number", "null"]
},
"doi": {
"description": "A Digital Object Identifier (DOI) assigned to papers which are unique to each paper.",
"examples": ["doi:10.1080/02626667.2018.1560449", "https://doi.org/10.1111/hex.12487", "https://dx.doi.org/10.1080/02626667.2018.1560449", "https://doi.org/10.1016/j.jpsychires.2017.11.014"],
"type": "string"
}
},
"required": ["author", "title", "journal", "year", "doi"]
},
"metadata": {
"type": "object",
"properties": {
"sample_size": {
"description": "The sample of the data used to fit the delay distribution. This is usually the number of people with data on a primary and possibly secondary event of interest. In cases where the sample size is not stated NA can be used.",
"examples": [25, 150],
"type": ["number", "null"]
},
"region": {
"description": "The geographical location the data was collected. This can either be given at sub-national, national, continental. Multiple nested regions can be given and are comma separated. When the region is not specified NA can be given.",
"examples": ["England", "Wuhan, China"],
"type": ["string", "null"]
},
"transmission_mode": {
"description": "A character string defining the mode of transmission of the infectious agent, specific to the data used in the study. The defines whether a pathogen is vector-borne (i.e. is transmitted between humans through a intermediate vector), is transmitted from human-to-human ( natural_human_to_human) or other. If multiple modes of transmission are recorded then mixed can be given. In cases where no mode of transmission if known or given in the study unknown can be stated.",
"examples": ["vector_borne", "natural_human_to_human", "unknown"],
"type": "string"
},
"vector": {
"description": "The name of the vector transmitting the vector-borne disease. This can be a common name, or a latin binomial name of a specific vector species (see examples). Both the common name and taxonomic name can be given with one given in parentheses. When a disease is not vector-borne NA should be given.",
"examples": ["mosquito", "Aedes aegypti", "mosquito (Aedes aegypti)", "Aedes aegypti (mosquito)"],
"type": "string"
},
"extrinsic": {
"description": "A boolean value defining whether the data entry is an extrinsic delay distribution, such as the extrinsic incubation period. This field is required because intrinsic and extrinsic delay distributions are stored as separate entries in the database and can be linked. When the disease is not vector-borne FALSE should be given.",
"examples": [true, false],
"type": "boolean"
},
"inference_method": {
"description": "The type of inference used to fit the delay distribution to the data. Abbreviations of model fitting techniques can be specified as long as they are non-ambiguous. This field is only used to determine whether the uncertainty intervals possibly specified in the other fields are: confidence intervals (in the case of maximum likelihood), or credible intervals (in the case of bayesian inference). Uncertainty bounds for another types of inference methods, or if the inference method is unstated are assumed to be confidence intervals. When the inference method is unknown or a disease does not have a probability distribution NA can be given.",
"examples": ["bayesian", "maximum likelihood", "mle"],
"type": ["string", "null"],
"enum": ["mle", "bayesian", null]
}
}
},
"method_assessment": {
"type": "object",
"properties": {
"truncation": {
"description": "The truncation point for the distribution when right-truncation is used to account for the bias in survival data when fitting the distribution.",
"examples": [10, 15.5],
"type": ["number", "null"]
},
"discretised": {
"description": "A boolean indicating whether the distribution fit to the data was discretised. If discretisation is not mentioned in the paper it is assumed the distribution fit to the data was continuous.",
"examples": [true, false],
"type": "boolean"
},
"censored": {
"description": "A boolean indicating whether the distribution fit to the data correctly handled censored data. This is where the timing of events is either not known with precision or is known by recorded to a coarse level.",
"examples": [true, false],
"type": ["boolean", "null"]
},
"right_truncated": {
"description": "A boolean indicating whether the distribution fit to the data correctly handled right-truncation to account for the fact that people in the cohort that experienced a primary event did not experience a secondary event and so shorter delay times are over-represented in the data.",
"examples": [true, false],
"type": ["boolean", "null"]
},
"phase_bias_adjusted": {
"description": "A boolean indicating whether the distribution fit took into account whether the outbreak was in a stage of growth or decline which can bias the results.",
"examples": [true, false],
"type": ["boolean", "null"]
}
}
},
"notes": {
"description": "A notes field to add extra information that can be used to inform the user of the data but could not be recorded in one of the previous fields. When no notes are given use NA.",
"examples": ["No additional notes", "Pooled data from many papers was used to fit distribution"],
"type": ["string", "null"]
}
},
"required": ["disease", "epi_distribution", "summary_statistics", "citation", "metadata", "method_assessment", "notes"],
"additionalProperties": false
}
}