In [17]:
import os
os.chdir('..')  # Go up one level to project root
print(os.getcwd())  # Should now show the project root


c:\Users\berni\OneDrive - TU Wien\THESIS\Code\DMP-Evaluation


In [19]:
from FIP_Mapping.mapping import load_mapping
from FIP_Mapping.utils  import transform_mapping, get_mapped_status
from Evaluator.evaluator import load_dmp, evaluate_dmp_against_fip, summarize_results


In [20]:
# Load JSON explicitly
mapping_data = load_mapping('FIP_Mapping/fip_madmp_mapping.json')
mapping_data

{'FIP_maDMP_Mapping': [{'FAIR_principle': 'F1',
   'FIP_question': 'What globally unique, persistent, resolvable identifiers do you use for metadata records?',
   'maDMP_field': 'dataset.dataset_id.identifier',
   'Mapping_status': 'Mapped',
   'Comments': 'Clearly mapped'},
  {'FAIR_principle': 'F1',
   'FIP_question': 'What globally unique, persistent, resolvable identifiers do you use for datasets?',
   'maDMP_field': 'dataset.dataset_id.identifier',
   'Mapping_status': 'Mapped',
   'Comments': "Clearly mapped. But it's strange that always separate datasets and metadata records"},
  {'FAIR_principle': 'F2',
   'FIP_question': 'Which metadata schemas do you use for findability?',
   'maDMP_field': 'dataset.metadata.metadata_standard_id.identifier',
   'Mapping_status': 'Mapped',
   'Comments': 'Could also be dataset.metadata.metadata_standard_id.type'},
  {'FAIR_principle': 'F3',
   'FIP_question': 'What is the technology that links the persistent identifiers of your data to the met

In [21]:
# Transform mapping explicitly
fip_to_madmp = transform_mapping(mapping_data)
fip_to_madmp

{'What globally unique, persistent, resolvable identifiers do you use for metadata records?': {'FAIR_principle': 'F1',
  'maDMP_field': 'dataset.dataset_id.identifier',
  'Mapping_status': 'Mapped',
  'Comments': 'Clearly mapped'},
 'What globally unique, persistent, resolvable identifiers do you use for datasets?': {'FAIR_principle': 'F1',
  'maDMP_field': 'dataset.dataset_id.identifier',
  'Mapping_status': 'Mapped',
  'Comments': "Clearly mapped. But it's strange that always separate datasets and metadata records"},
 'Which metadata schemas do you use for findability?': {'FAIR_principle': 'F2',
  'maDMP_field': 'dataset.metadata.metadata_standard_id.identifier',
  'Mapping_status': 'Mapped',
  'Comments': 'Could also be dataset.metadata.metadata_standard_id.type'},
 'What is the technology that links the persistent identifiers of your data to the metadata description?': {'FAIR_principle': 'F3',
  'maDMP_field': 'dataset.distribution.host.pid_system',
  'Mapping_status': 'Mapped',
  

In [22]:
# Example question explicitly defined
question_example = "Which metadata schemas do you use for findability?"

# Print explicitly the details
print(fip_to_madmp[question_example])

{'FAIR_principle': 'F2', 'maDMP_field': 'dataset.metadata.metadata_standard_id.identifier', 'Mapping_status': 'Mapped', 'Comments': 'Could also be dataset.metadata.metadata_standard_id.type'}


In [23]:
# Retrieve explicitly mapped and not mapped items
mapped_items = get_mapped_status(fip_to_madmp, "Mapped")
partially_mapped_items = get_mapped_status(fip_to_madmp, "Partially Mapped")
not_mapped_items = get_mapped_status(fip_to_madmp, "Not Mapped")

# Print mapped questions
print("Mapped Questions:")
for q in mapped_items:
    print(f"- {q}")

# Print partially mapped explicitly
print("\nPartially Mapped Questions:")
for q in partially_mapped_items:
    print(f"- {q}")

# Print not mapped questions
print("\nNot Mapped Questions:")
for q in not_mapped_items:
    print(f"- {q}")

Mapped Questions:
- What globally unique, persistent, resolvable identifiers do you use for metadata records?
- What globally unique, persistent, resolvable identifiers do you use for datasets?
- Which metadata schemas do you use for findability?
- What is the technology that links the persistent identifiers of your data to the metadata description?
- Which standardized communication protocol do you use for metadata records?
- Which standardized communication protocol do you use for datasets?
- Which models, schema(s) do you use for your metadata records?
- Which models, schema(s) do you use for your datasets?
- Which usage license do you use for your metadata records?
- Which usage license do you use for your datasets?

Partially Mapped Questions:
- Which authentication & authorisation technique do you use for metadata records?
- Which authentication & authorisation technique do you use for datasets?
- Which structured vocabularies do you use to annotate your metadata records?
- Which

In [24]:
# Load the real, long maDMP
dmp_real = load_dmp('examples/ex9-dmp-long.json')

# Evaluate
results_real = evaluate_dmp_against_fip(dmp_real, fip_to_madmp)

# Display evaluation results
for question, mapping_status, present in results_real:
    print(f"{question[:60]:<60} | Mapping: {mapping_status:<15} | Present: {present}")

# Summary
summarize_results(results_real)


What globally unique, persistent, resolvable identifiers do  | Mapping: Mapped          | Present: Present
What globally unique, persistent, resolvable identifiers do  | Mapping: Mapped          | Present: Present
Which metadata schemas do you use for findability?           | Mapping: Mapped          | Present: Not Present
What is the technology that links the persistent identifiers | Mapping: Mapped          | Present: Present
In which search engines are your metadata records indexed?   | Mapping: Not Mapped      | Present: None
In which search engines are your datasets indexed?           | Mapping: Not Mapped      | Present: None
Which standardized communication protocol do you use for met | Mapping: Mapped          | Present: Present
Which standardized communication protocol do you use for dat | Mapping: Mapped          | Present: Present
Which authentication & authorisation technique do you use fo | Mapping: Partially Mapped | Present: Present
Which authentication & authorisation t

(9, 21)