Skip to content

Commit

Permalink
adding NYU mapping into DATS2.2m + tentative transformation pipeline …
Browse files Browse the repository at this point in the history
…(just a stub for now, need checking with Jeff Grethe), also adding NYU native json, single record and full list
  • Loading branch information
proccaserra committed Mar 1, 2018
1 parent ee5bf69 commit 8901d8f
Show file tree
Hide file tree
Showing 5 changed files with 18,529 additions and 0 deletions.
88 changes: 88 additions & 0 deletions examples/NYU/DATS-NYU-mapping - Sheet2.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
NYU JSON schema DATS
id number dataset.identifier
dataset_title string dataset.title
dataset_alt_title [string] dataset.extraproperties.
origin string dataset.extraproperties.
description string dataset.description
dataset_end_date date dataset.dates.date dataset.dates.date.type = "end date"
dataset_start_date date dataset.dates.date dataset.dates.date.type = "start date"
local_experts [person] dataset.creators.Person.roles.Annotation.value="local experts"
authors [person] dataset.creators.Person
person.full_name string dataset.creators.Person.fullName
person.first_name string dataset.creators.Person.firstName
person.last_name string dataset.creators.Person.lastName
person.orcidid string dataset.creators.Person.identifiers.identifier dataset.creators.Person.identifiers.identifierSource="orcid"
person.bio_url string dataset.creators.Person.extraproperties.
person.email string dataset.creators.Person.email
corresponding_authors [person] dataset.creators.Person.roles.Annotation.value="corresponding author"
date_added date
date_added.date date (ISO8601) dataset.dates.date dataset.dates.date.type = "record addition date"
date_added.timezone_type integer no mapping possible as Data as no link to extraProperties (DATS extension mechanism)
date_added.timezone string no mapping possible as Data as no link to extraProperties (DATS extension mechanism)
datasets.formats [string] dataset.distributions.Distribution.identifier=$count dataset.distributions.Distribution.format=$value
data_types [string] dataset.types.DataType.method=$value
study_types [string] dataset.producedBy.Study.types.Annotation.value=$value
collection_standards []
measurement_standard_name string dataset.distributions.Distribution.identifiers.identifier=$count dataset.distributions.Distribution.conformsTo.DataStandard.name
measurement_standard_authority string dataset.distributions.Distribution.identifiers.identifier=$count dataset.distributions.Distribution.conformsTo.DataStandard.extraProperties.categoryValuePairs.category="measurement standard authority" dataset.distributions.Distribution.conformsTo.DataStandard.extraProperties.categoryValuePairs.value=$value
awards [award]
awards.award string dataset.acknowledges.Grant.name
awards.award_funder string dataset.acknowledges.Grant.funder.Organisation.name
awards.award_url string dataset.acknowledges.Grant.identifiers.identifier
awards.funder_type string dataset.acknowledges.Grant.extraProperties.categoryValuePairs.category="funder type" dataset.acknowledges.Grant.extraProperties.categoryValuePairs.value=$value
access_restrictions [string] dataset.distributions.Distribution.identifiers.identifier=$count dataset.distributions.Distribution.access.Access.identifiers.identifier=$count dataset.distributions.Distribution.access.Access.authorization.
subject_population_ages [string] dataset.producedBy.Study.studyGroups.StudyGroup.identifiers.identifier=$count dataset.producedBy.Study.studyGroups.StudyGroup.name=$value
subject_geographic_area []
subject_geographic_area.geographic_area_name string dataset.spatialCoverage.Place.identifiers.identifier=$count dataset.spatialCoverage.Place.name
subject_geographic_area.geographic_area_authority string dataset.spatialCoverage.Place.extraProperties.categoryValuePairs.category="geographic authority" dataset.spatialCoverage.Place.extraProperties.categoryValuePairs.values=$value
subject_domain [subject_domain]
subject_domain.subject_domain string dataset.isAbout.value=$value
subject_domain.mesh_code string dataset.isAbout.ontologyTermIRI=$value
subject_keywords string dataset.keywords.value=$value
publications [citations]
citation string dataset.citations.Publication.title=$value
citation.url string dataset.citations.Publication.identifiers.identifier dataset.citations.Publication.identifiers.identifierSource="doi_url"
citation.doi string dataset.citations.Publication.identifiers.identifier dataset.citations.Publication.identifiers.identifierSource="doi"
publishers []
publisher_name
publisher_url
subject_of_study [] dataset.isAbout.value=$value
related_software [software]
software_name string dataset.types.DataType.method.value=$value
software_description string
software_url string dataset.types.DataType.method.valueIRI=$value
related_equipment [equipment]
related_equipment string dataset.types.DataType.instrument=$value
equipment_description string
equipment_url string dataset.types.DataType.method.valueIRI=$value
related_datasets []
related_dataset_uid string dataset.types.DataType.relatedIdentifiers.identifier=$value
relationship_attributes string dataset.types.DataType.relatedIdentifiers.relationType=$value
relationship_notes string dataset.types.DataType.extraProperties.categoryValuePairs.category="relationship notes" dataset.types.DataType.extraProperties.categoryValuePairs.values=$value
parent_dataset_uid string dataset.types.DataType.relatedIdentifiers.identifier=$value dataset.types.DataType.relatedIdentifiers.relationType="parent dataset"
other_resources [resource]
resource_name dataset.types.DataType.extraProperties.categoryValuePairs.category="resource name" dataset.types.DataType.extraProperties.categoryValuePairs.values=$value
resource_description dataset.types.DataType.extraProperties.categoryValuePairs.category="resource description" dataset.types.DataType.extraProperties.categoryValuePairs.values=$value
resource_url dataset.types.DataType.extraProperties.categoryValuePairs.category="resource url" dataset.types.DataType.extraProperties.categoryValuePairs.values=$value
published boolean dataset.extraProperties.CategoryValuePairs.
dataset_size string dataset.distributions.Distribution.identifier=$count dataset.distributions.Distribution.size=$value dataset.distributions.Distribution.unit=""
subscriber string dataset.extraProperties.categoryValuePairs.category="subscriber" dataset.extraProperties.categoryValuePairs.values=$value
access_instructions string dataset.availability=$value

date_updated
date_updated.date date ISO8601 dataset.dates.date dataset.dates.date.type = "record update date"
date_updated.timezone_type integer no mapping possible
date_updated.timezone string no mapping possible
date_archived
date_archived.date date ISO8601 dataset.dates.date dataset.dates.date.type = "record archival date"
date_archived.timezone_type integer no mapping possible
date_archived.timezone string no mapping possible
archived boolean dataset.extraProperties.categoryValuePairs.category="archived status" dataset.extraProperties.categoryValuePairs.values=$value
archival_notes string dataset.extraProperties.categoryValuePairs.category="archival notes" dataset.extraProperties.categoryValuePairs.values=$value
last_edit_notes string dataset.extraProperties.categoryValuePairs.category="last edit notes" dataset.extraProperties.categoryValuePairs.values=$value
subject_genders [string] dataset.producedBy.Study.input.Annotation.value=$value
data_locations [data location] dataset.distributions.Distribution.identifier=$count
data_location string dataset.distributions.Distribution.access.Access.types.value=$value
location_content string ? unclear nature of the tag and assocated data -> did not map
data_access_url string dataset.distributions.Distribution.access.Access.accessURL=$value
accession_number string dataset.distributions.Distribution.access.Access.identifiers.identifier=$value
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
DATS

/* Data Repository Information */
let "dataRepository.ID" = "SCR:XXXXXX";
let "dataRepository.name" = "NYU";
let "dataRepository.homePage" = "https://datacatalog.med.nyu.edu/";

/* Organization Information */
let "organization.ID"= "SCR:YYYYYY";
let "organization.name" = "NYU Health Sciences Library";
let "organization.abbreviation" = "HSL NYU;
let "organization.homePage" = "https://hsl.med.nyu.edu/";

/* Dataset Landing Page*/
transform column "$.'title'" to "access.landingPage" apply {{ result='https://datacatalog.med.nyu.edu/' + value}};

/* Dataset Creator */
let "dataset.creators[0]" = "NULL";

/* Dataset Release Date Information */
let "dataset.dateReleased" = "NULL";

/* Dataset Qualifiers */
transform column $.'access_restrictions' to "dataset.availability" apply {{ result = value }} ;

transform column $.'data_types' to "dataset.types" apply {{ result = value }} ;
let "dataset.aggregation" = "dataset summary";

let "dataset.refinement" = "curated";

/* Dataset Distributions */
let "datasetDistributions[0].format" = "JSON";
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
[
{
"id": 10040,
"dataset_title": "CHART New York Smoking-Cessation Interventions for Urban Hospital Patients Dataset",
"dataset_alt_title": [],
"origin": "Internal",
"description": "The Consortium of Hospitals Advancing Research on Tobacco (CHART) is a network of six projects and a coordinating center aimed at assessing the effectiveness and cost-effectiveness of smoking cessation interventions initiated during hospitalizations and continued post-discharge. The CHART New York Smoking-Cessation Interventions for Urban Hospital Patients trial looked at the effectiveness of a pro-active, multi-session telephone counseling program with motivational enhancement as compared to the New York State Quitline. Data was collected from a randomized trial aimed at comparing effectiveness of two approaches of smoking cessation interventions. A total of 1618 adult smokers were enrolled from Bellevue Hospital and the Manhattan VA. The data examines smoking abstinence at baseline, and two-, six-month follow up periods. Measures include socio-demographics, smoking history, health habits, health status, depression, biochemical validation, participant satisfaction and intervention fidelity.",
"dataset_end_date": "2015",
"dataset_start_date": "2011",
"local_experts": [],
"authors": [
{
"full_name": "Donna Shelley",
"last_name": null,
"first_name": null,
"orcid_id": null,
"bio_url": "http:\/\/www.med.nyu.edu\/biosketch\/shelld01",
"email": null
},
{
"full_name": "Erin Rogers",
"last_name": null,
"first_name": null,
"orcid_id": null,
"bio_url": "http:\/\/www.med.nyu.edu\/biosketch\/rogere02\/",
"email": null
},
{
"full_name": "Scott Sherman",
"last_name": null,
"first_name": null,
"orcid_id": "http:\/\/orcid.org\/0000-0003-1752-7303",
"bio_url": "http:\/\/www.med.nyu.edu\/biosketch\/ses2127",
"email": null
}
],
"corresponding_authors": [],
"date_added": {
"date": "2015-05-22 00:00:00.000000",
"timezone_type": 3,
"timezone": "America\/New_York"
},
"dataset_formats": [],
"data_types": [
"Surveys",
"Administrative",
"Behavioral"
],
"study_types": [
"Interventional"
],
"collection_standards": [],
"awards": [
{
"award": "1U01HL105229-01",
"award_funder": "NHLBI",
"award_url": "http:\/\/projectreporter.nih.gov\/project_info_description.cfm?aid=8015408&icde=24767540&ddparam=&ddvalue=&ddsub=&cr=6&csb=default&cs=ASC",
"funder_type": "Federal, NIH"
}
],
"access_restrictions": [
"Author approval required"
],
"subject_population_ages": [
"Adult (19 years - 64 years)",
"Senior (65 years - 79 years)",
"Aged (80 years and over)"
],
"subject_geographic_area": [
{
"geographic_area_name": "New York (City)",
"geographic_area_authority": null
}
],
"subject_geographic_area_details": [
{
"geographic_area_detail_name": "New York (State) - New York City",
"geographic_area_detail_authority": null
}
],
"subject_domain": [
{
"subject_domain": "Delivery of Health Care",
"mesh_code": "https:\/\/id.nlm.nih.gov\/mesh\/D003695.html"
},
{
"subject_domain": "Health Care Costs",
"mesh_code": "https:\/\/id.nlm.nih.gov\/mesh\/D017048.html"
},
{
"subject_domain": "Health Status",
"mesh_code": "https:\/\/id.nlm.nih.gov\/mesh\/D006304.html"
},
{
"subject_domain": "Risk Factors",
"mesh_code": "https:\/\/id.nlm.nih.gov\/mesh\/D012307.html"
}
],
"subject_keywords": [
"Epidemiology",
"Health expenditures",
"Health services",
"Outpatients",
"Patient discharge",
"Quitline",
"Randomized trial",
"Smoking",
"Smoking cessation",
"Telephone counseling",
"Tobacco use",
"Underserved population",
"Urban hospitals"
],
"publications": [
{
"citation": "Grossman E, Shelley D, Braithwaite RS, Lobach I, Goffin A, Rogers E, et al. Effectiveness of smoking-cessation interventions for urban hospital patients: study protocol for a randomized controlled trial. Trials. 2012;13:126.",
"url": "http:\/\/www.ncbi.nlm.nih.gov\/pmc\/articles\/PMC3502597\/",
"doi": "10.1186\/1745-6215-13-126"
},
{
"citation": "Rogers ES, Smelson DA, Gillespie CC, Elbel B, Poole S, Hagedorn HJ, Kalman D, Krebs P, Fang XY, Wang BH, Sherman SE. Telephone Smoking-Cessation Counseling for Smokers in Mental Health Clinics A Patient-Randomized Controlled Trial. Am J Prev Med. 2016 Apr;50(4):518-27.",
"url": "https:\/\/dx.doi.org\/10.1016\/j.amepre.2015.10.004",
"doi": "10.1016\/j.amepre.2015.10.004"
}
],
"publishers": [],
"subject_of_study": [],
"related_software": [],
"related_equipment": [],
"related_datasets": [],
"other_resources": [],
"published": true,
"dataset_size": null,
"subscriber": null,
"access_instructions": "Contact the author.",
"date_updated": {
"date": "2017-06-14 00:00:00.000000",
"timezone_type": 3,
"timezone": "America\/New_York"
},
"date_archived": null,
"archived": false,
"archival_notes": null,
"last_edit_notes": null,
"subject_genders": [],
"data_locations": [
{
"data_location": "Author",
"location_content": null,
"data_access_url": "mailto:scott.sherman@nyumc.org?Subject=NYULMC%20Dataset%20Request&cc=datacatalog@med.nyu.edu",
"accession_number": null
}
]
}
]

0 comments on commit 8901d8f

Please sign in to comment.