-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
adding NYU mapping into DATS2.2m + tentative transformation pipeline …
…(just a stub for now, need checking with Jeff Grethe), also adding NYU native json, single record and full list
- Loading branch information
1 parent
ee5bf69
commit 8901d8f
Showing
5 changed files
with
18,529 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
NYU JSON schema DATS | ||
id number dataset.identifier | ||
dataset_title string dataset.title | ||
dataset_alt_title [string] dataset.extraproperties. | ||
origin string dataset.extraproperties. | ||
description string dataset.description | ||
dataset_end_date date dataset.dates.date dataset.dates.date.type = "end date" | ||
dataset_start_date date dataset.dates.date dataset.dates.date.type = "start date" | ||
local_experts [person] dataset.creators.Person.roles.Annotation.value="local experts" | ||
authors [person] dataset.creators.Person | ||
person.full_name string dataset.creators.Person.fullName | ||
person.first_name string dataset.creators.Person.firstName | ||
person.last_name string dataset.creators.Person.lastName | ||
person.orcidid string dataset.creators.Person.identifiers.identifier dataset.creators.Person.identifiers.identifierSource="orcid" | ||
person.bio_url string dataset.creators.Person.extraproperties. | ||
person.email string dataset.creators.Person.email | ||
corresponding_authors [person] dataset.creators.Person.roles.Annotation.value="corresponding author" | ||
date_added date | ||
date_added.date date (ISO8601) dataset.dates.date dataset.dates.date.type = "record addition date" | ||
date_added.timezone_type integer no mapping possible as Data as no link to extraProperties (DATS extension mechanism) | ||
date_added.timezone string no mapping possible as Data as no link to extraProperties (DATS extension mechanism) | ||
datasets.formats [string] dataset.distributions.Distribution.identifier=$count dataset.distributions.Distribution.format=$value | ||
data_types [string] dataset.types.DataType.method=$value | ||
study_types [string] dataset.producedBy.Study.types.Annotation.value=$value | ||
collection_standards [] | ||
measurement_standard_name string dataset.distributions.Distribution.identifiers.identifier=$count dataset.distributions.Distribution.conformsTo.DataStandard.name | ||
measurement_standard_authority string dataset.distributions.Distribution.identifiers.identifier=$count dataset.distributions.Distribution.conformsTo.DataStandard.extraProperties.categoryValuePairs.category="measurement standard authority" dataset.distributions.Distribution.conformsTo.DataStandard.extraProperties.categoryValuePairs.value=$value | ||
awards [award] | ||
awards.award string dataset.acknowledges.Grant.name | ||
awards.award_funder string dataset.acknowledges.Grant.funder.Organisation.name | ||
awards.award_url string dataset.acknowledges.Grant.identifiers.identifier | ||
awards.funder_type string dataset.acknowledges.Grant.extraProperties.categoryValuePairs.category="funder type" dataset.acknowledges.Grant.extraProperties.categoryValuePairs.value=$value | ||
access_restrictions [string] dataset.distributions.Distribution.identifiers.identifier=$count dataset.distributions.Distribution.access.Access.identifiers.identifier=$count dataset.distributions.Distribution.access.Access.authorization. | ||
subject_population_ages [string] dataset.producedBy.Study.studyGroups.StudyGroup.identifiers.identifier=$count dataset.producedBy.Study.studyGroups.StudyGroup.name=$value | ||
subject_geographic_area [] | ||
subject_geographic_area.geographic_area_name string dataset.spatialCoverage.Place.identifiers.identifier=$count dataset.spatialCoverage.Place.name | ||
subject_geographic_area.geographic_area_authority string dataset.spatialCoverage.Place.extraProperties.categoryValuePairs.category="geographic authority" dataset.spatialCoverage.Place.extraProperties.categoryValuePairs.values=$value | ||
subject_domain [subject_domain] | ||
subject_domain.subject_domain string dataset.isAbout.value=$value | ||
subject_domain.mesh_code string dataset.isAbout.ontologyTermIRI=$value | ||
subject_keywords string dataset.keywords.value=$value | ||
publications [citations] | ||
citation string dataset.citations.Publication.title=$value | ||
citation.url string dataset.citations.Publication.identifiers.identifier dataset.citations.Publication.identifiers.identifierSource="doi_url" | ||
citation.doi string dataset.citations.Publication.identifiers.identifier dataset.citations.Publication.identifiers.identifierSource="doi" | ||
publishers [] | ||
publisher_name | ||
publisher_url | ||
subject_of_study [] dataset.isAbout.value=$value | ||
related_software [software] | ||
software_name string dataset.types.DataType.method.value=$value | ||
software_description string | ||
software_url string dataset.types.DataType.method.valueIRI=$value | ||
related_equipment [equipment] | ||
related_equipment string dataset.types.DataType.instrument=$value | ||
equipment_description string | ||
equipment_url string dataset.types.DataType.method.valueIRI=$value | ||
related_datasets [] | ||
related_dataset_uid string dataset.types.DataType.relatedIdentifiers.identifier=$value | ||
relationship_attributes string dataset.types.DataType.relatedIdentifiers.relationType=$value | ||
relationship_notes string dataset.types.DataType.extraProperties.categoryValuePairs.category="relationship notes" dataset.types.DataType.extraProperties.categoryValuePairs.values=$value | ||
parent_dataset_uid string dataset.types.DataType.relatedIdentifiers.identifier=$value dataset.types.DataType.relatedIdentifiers.relationType="parent dataset" | ||
other_resources [resource] | ||
resource_name dataset.types.DataType.extraProperties.categoryValuePairs.category="resource name" dataset.types.DataType.extraProperties.categoryValuePairs.values=$value | ||
resource_description dataset.types.DataType.extraProperties.categoryValuePairs.category="resource description" dataset.types.DataType.extraProperties.categoryValuePairs.values=$value | ||
resource_url dataset.types.DataType.extraProperties.categoryValuePairs.category="resource url" dataset.types.DataType.extraProperties.categoryValuePairs.values=$value | ||
published boolean dataset.extraProperties.CategoryValuePairs. | ||
dataset_size string dataset.distributions.Distribution.identifier=$count dataset.distributions.Distribution.size=$value dataset.distributions.Distribution.unit="" | ||
subscriber string dataset.extraProperties.categoryValuePairs.category="subscriber" dataset.extraProperties.categoryValuePairs.values=$value | ||
access_instructions string dataset.availability=$value | ||
|
||
date_updated | ||
date_updated.date date ISO8601 dataset.dates.date dataset.dates.date.type = "record update date" | ||
date_updated.timezone_type integer no mapping possible | ||
date_updated.timezone string no mapping possible | ||
date_archived | ||
date_archived.date date ISO8601 dataset.dates.date dataset.dates.date.type = "record archival date" | ||
date_archived.timezone_type integer no mapping possible | ||
date_archived.timezone string no mapping possible | ||
archived boolean dataset.extraProperties.categoryValuePairs.category="archived status" dataset.extraProperties.categoryValuePairs.values=$value | ||
archival_notes string dataset.extraProperties.categoryValuePairs.category="archival notes" dataset.extraProperties.categoryValuePairs.values=$value | ||
last_edit_notes string dataset.extraProperties.categoryValuePairs.category="last edit notes" dataset.extraProperties.categoryValuePairs.values=$value | ||
subject_genders [string] dataset.producedBy.Study.input.Annotation.value=$value | ||
data_locations [data location] dataset.distributions.Distribution.identifier=$count | ||
data_location string dataset.distributions.Distribution.access.Access.types.value=$value | ||
location_content string ? unclear nature of the tag and assocated data -> did not map | ||
data_access_url string dataset.distributions.Distribution.access.Access.accessURL=$value | ||
accession_number string dataset.distributions.Distribution.access.Access.identifiers.identifier=$value |
32 changes: 32 additions & 0 deletions
32
examples/NYU/DATS-NYU-transformation pipeline - partial - Sheet1.tsv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
DATS | ||
|
||
/* Data Repository Information */ | ||
let "dataRepository.ID" = "SCR:XXXXXX"; | ||
let "dataRepository.name" = "NYU"; | ||
let "dataRepository.homePage" = "https://datacatalog.med.nyu.edu/"; | ||
|
||
/* Organization Information */ | ||
let "organization.ID"= "SCR:YYYYYY"; | ||
let "organization.name" = "NYU Health Sciences Library"; | ||
let "organization.abbreviation" = "HSL NYU; | ||
let "organization.homePage" = "https://hsl.med.nyu.edu/"; | ||
|
||
/* Dataset Landing Page*/ | ||
transform column "$.'title'" to "access.landingPage" apply {{ result='https://datacatalog.med.nyu.edu/' + value}}; | ||
|
||
/* Dataset Creator */ | ||
let "dataset.creators[0]" = "NULL"; | ||
|
||
/* Dataset Release Date Information */ | ||
let "dataset.dateReleased" = "NULL"; | ||
|
||
/* Dataset Qualifiers */ | ||
transform column $.'access_restrictions' to "dataset.availability" apply {{ result = value }} ; | ||
|
||
transform column $.'data_types' to "dataset.types" apply {{ result = value }} ; | ||
let "dataset.aggregation" = "dataset summary"; | ||
|
||
let "dataset.refinement" = "curated"; | ||
|
||
/* Dataset Distributions */ | ||
let "datasetDistributions[0].format" = "JSON"; |
155 changes: 155 additions & 0 deletions
155
examples/NYU/NYU-native-json-feed/NYU_datacatalog_single_record-10040.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
[ | ||
{ | ||
"id": 10040, | ||
"dataset_title": "CHART New York Smoking-Cessation Interventions for Urban Hospital Patients Dataset", | ||
"dataset_alt_title": [], | ||
"origin": "Internal", | ||
"description": "The Consortium of Hospitals Advancing Research on Tobacco (CHART) is a network of six projects and a coordinating center aimed at assessing the effectiveness and cost-effectiveness of smoking cessation interventions initiated during hospitalizations and continued post-discharge. The CHART New York Smoking-Cessation Interventions for Urban Hospital Patients trial looked at the effectiveness of a pro-active, multi-session telephone counseling program with motivational enhancement as compared to the New York State Quitline. Data was collected from a randomized trial aimed at comparing effectiveness of two approaches of smoking cessation interventions. A total of 1618 adult smokers were enrolled from Bellevue Hospital and the Manhattan VA. The data examines smoking abstinence at baseline, and two-, six-month follow up periods. Measures include socio-demographics, smoking history, health habits, health status, depression, biochemical validation, participant satisfaction and intervention fidelity.", | ||
"dataset_end_date": "2015", | ||
"dataset_start_date": "2011", | ||
"local_experts": [], | ||
"authors": [ | ||
{ | ||
"full_name": "Donna Shelley", | ||
"last_name": null, | ||
"first_name": null, | ||
"orcid_id": null, | ||
"bio_url": "http:\/\/www.med.nyu.edu\/biosketch\/shelld01", | ||
"email": null | ||
}, | ||
{ | ||
"full_name": "Erin Rogers", | ||
"last_name": null, | ||
"first_name": null, | ||
"orcid_id": null, | ||
"bio_url": "http:\/\/www.med.nyu.edu\/biosketch\/rogere02\/", | ||
"email": null | ||
}, | ||
{ | ||
"full_name": "Scott Sherman", | ||
"last_name": null, | ||
"first_name": null, | ||
"orcid_id": "http:\/\/orcid.org\/0000-0003-1752-7303", | ||
"bio_url": "http:\/\/www.med.nyu.edu\/biosketch\/ses2127", | ||
"email": null | ||
} | ||
], | ||
"corresponding_authors": [], | ||
"date_added": { | ||
"date": "2015-05-22 00:00:00.000000", | ||
"timezone_type": 3, | ||
"timezone": "America\/New_York" | ||
}, | ||
"dataset_formats": [], | ||
"data_types": [ | ||
"Surveys", | ||
"Administrative", | ||
"Behavioral" | ||
], | ||
"study_types": [ | ||
"Interventional" | ||
], | ||
"collection_standards": [], | ||
"awards": [ | ||
{ | ||
"award": "1U01HL105229-01", | ||
"award_funder": "NHLBI", | ||
"award_url": "http:\/\/projectreporter.nih.gov\/project_info_description.cfm?aid=8015408&icde=24767540&ddparam=&ddvalue=&ddsub=&cr=6&csb=default&cs=ASC", | ||
"funder_type": "Federal, NIH" | ||
} | ||
], | ||
"access_restrictions": [ | ||
"Author approval required" | ||
], | ||
"subject_population_ages": [ | ||
"Adult (19 years - 64 years)", | ||
"Senior (65 years - 79 years)", | ||
"Aged (80 years and over)" | ||
], | ||
"subject_geographic_area": [ | ||
{ | ||
"geographic_area_name": "New York (City)", | ||
"geographic_area_authority": null | ||
} | ||
], | ||
"subject_geographic_area_details": [ | ||
{ | ||
"geographic_area_detail_name": "New York (State) - New York City", | ||
"geographic_area_detail_authority": null | ||
} | ||
], | ||
"subject_domain": [ | ||
{ | ||
"subject_domain": "Delivery of Health Care", | ||
"mesh_code": "https:\/\/id.nlm.nih.gov\/mesh\/D003695.html" | ||
}, | ||
{ | ||
"subject_domain": "Health Care Costs", | ||
"mesh_code": "https:\/\/id.nlm.nih.gov\/mesh\/D017048.html" | ||
}, | ||
{ | ||
"subject_domain": "Health Status", | ||
"mesh_code": "https:\/\/id.nlm.nih.gov\/mesh\/D006304.html" | ||
}, | ||
{ | ||
"subject_domain": "Risk Factors", | ||
"mesh_code": "https:\/\/id.nlm.nih.gov\/mesh\/D012307.html" | ||
} | ||
], | ||
"subject_keywords": [ | ||
"Epidemiology", | ||
"Health expenditures", | ||
"Health services", | ||
"Outpatients", | ||
"Patient discharge", | ||
"Quitline", | ||
"Randomized trial", | ||
"Smoking", | ||
"Smoking cessation", | ||
"Telephone counseling", | ||
"Tobacco use", | ||
"Underserved population", | ||
"Urban hospitals" | ||
], | ||
"publications": [ | ||
{ | ||
"citation": "Grossman E, Shelley D, Braithwaite RS, Lobach I, Goffin A, Rogers E, et al. Effectiveness of smoking-cessation interventions for urban hospital patients: study protocol for a randomized controlled trial. Trials. 2012;13:126.", | ||
"url": "http:\/\/www.ncbi.nlm.nih.gov\/pmc\/articles\/PMC3502597\/", | ||
"doi": "10.1186\/1745-6215-13-126" | ||
}, | ||
{ | ||
"citation": "Rogers ES, Smelson DA, Gillespie CC, Elbel B, Poole S, Hagedorn HJ, Kalman D, Krebs P, Fang XY, Wang BH, Sherman SE. Telephone Smoking-Cessation Counseling for Smokers in Mental Health Clinics A Patient-Randomized Controlled Trial. Am J Prev Med. 2016 Apr;50(4):518-27.", | ||
"url": "https:\/\/dx.doi.org\/10.1016\/j.amepre.2015.10.004", | ||
"doi": "10.1016\/j.amepre.2015.10.004" | ||
} | ||
], | ||
"publishers": [], | ||
"subject_of_study": [], | ||
"related_software": [], | ||
"related_equipment": [], | ||
"related_datasets": [], | ||
"other_resources": [], | ||
"published": true, | ||
"dataset_size": null, | ||
"subscriber": null, | ||
"access_instructions": "Contact the author.", | ||
"date_updated": { | ||
"date": "2017-06-14 00:00:00.000000", | ||
"timezone_type": 3, | ||
"timezone": "America\/New_York" | ||
}, | ||
"date_archived": null, | ||
"archived": false, | ||
"archival_notes": null, | ||
"last_edit_notes": null, | ||
"subject_genders": [], | ||
"data_locations": [ | ||
{ | ||
"data_location": "Author", | ||
"location_content": null, | ||
"data_access_url": "mailto:scott.sherman@nyumc.org?Subject=NYULMC%20Dataset%20Request&cc=datacatalog@med.nyu.edu", | ||
"accession_number": null | ||
} | ||
] | ||
} | ||
] |
Oops, something went wrong.