In [1]:
import json
import jsonschema
import os
import urllib
from zipfile import ZipFile

%cd ..

/Users/cl/Documents/masters/maDMP-rocrates-maDMP


In [2]:
class color:
   PURPLE = '\033[95m'
   CYAN = '\033[96m'
   DARKCYAN = '\033[36m'
   BLUE = '\033[94m'
   GREEN = '\033[92m'
   YELLOW = '\033[93m'
   RED = '\033[91m'
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'

def read_json_from_url(url):
    response = urllib.request.urlopen(url)
    data = json.loads(response.read())
    return data


def read_json_local(path):
    with open(path, 'r') as in_file:
        data = json.load(in_file)
    return data


def check_valid_dmp(dmp, schema):
    print(color.BOLD + 'Checking if valid maDMP' + color.END)
    try:
        jsonschema.validate(instance=dmp, schema=schema)
        print(color.GREEN + 'VALID' + color.END)
        valid = 1
    except Exception as e:
        valid = 0
        print(color.RED + 'NOT VALID' + color.END)
        print('with the following exceptions:\n')
        print(e)
        pass
    return valid
    

def extract_zip(file_path_local, path_local):
    with ZipFile(file_path_local, 'r') as zipObj:
       # Extract all the contents of zip file in current directory
       zipObj.extractall(path_local)


def get_filename(path_local):
    file_list = os.listdir(path_local)
    file_path_local = [filename for filename in file_list \
                       if filename.find('.zip') == -1 \
                       and filename.find('.json') >= 0 \
                       and filename != 'dmp_from_rocrates.json']
    return file_path_local[0]

    
def save_url_to_disk(path, url):
    filename = os.path.split(url)[-1]
    dirname = filename.split('.')[0]
    path_local = os.path.join(path, dirname)
    if not os.path.isdir(path_local):
        os.mkdir(path_local)
    
    file_path_local = os.path.join(path, dirname, filename)
    urllib.request.urlretrieve(url, file_path_local)
    if not filename.find('zip') == -1:
        extract_zip(file_path_local, path_local)
    
    json_filename = get_filename(path_local)
    file_path_local = os.path.join(path_local, json_filename)
    # data = read_json_local(file_path_local)
    return file_path_local




## run dmp to rocrates and back to dmp examples

In [3]:
dmp_examples = [
    'https://zenodo.org/record/3758653/files/dmp.zip',  # 10.5281/zenodo.3758653
    'https://zenodo.org/record/3754740/files/SwedishMotorInsurance_maDMP_Sibincic.zip',
    'https://zenodo.org/record/3742227/files/maDMP_COVID-19_GoldPrice_analysis.zip',
    'https://zenodo.org/record/3738548/files/dsue1-part3-madmp-01627775.json.zip',
    'https://zenodo.org/record/3749776/files/mDMP_01625723.zip',
    'https://raw.githubusercontent.com/RDA-DMP-Common/RDA-DMP-Common-Standard/master/examples/JSON/ex9-dmp-long.json',
    'https://raw.githubusercontent.com/RDA-DMP-Common/RDA-DMP-Common-Standard/master/examples/JSON/ex8-dmp-minimal-content.json',
    'https://raw.githubusercontent.com/RDA-DMP-Common/RDA-DMP-Common-Standard/master/examples/JSON/ex7-dataset-many.json',
    'https://raw.githubusercontent.com/RDA-DMP-Common/RDA-DMP-Common-Standard/master/examples/JSON/ex6-dataset-closed.json',
    'https://raw.githubusercontent.com/RDA-DMP-Common/RDA-DMP-Common-Standard/master/examples/JSON/ex5-dataset-planned-host.json',
]

path = 'examples'
url_schema = 'https://raw.githubusercontent.com/RDA-DMP-Common/RDA-DMP-Common-Standard/master/examples/JSON/JSON-schema/1.0/maDMP-schema-1.0.json'
maDMP_schema = read_json_from_url(url_schema)
for url in dmp_examples:
    print(url)
    file_path_local  = save_url_to_disk(path, url)
    print(file_path_local)
    dmp = read_json_local(file_path_local)
    valid = check_valid_dmp(dmp, maDMP_schema)
    
    if valid:
        print(color.BOLD + 'converting maDMP to rocrates' + color.END)
        %run src/madmp_to_rocrates.py --path $file_path_local
        print(color.BOLD + 'converting rocrates to maDMP' + color.END)
        dir_path_local = os.path.split(file_path_local)[0]
        %run src/rocrates_to_madmp.py --path $dir_path_local
        print('#'*80)
        print(color.BOLD + 'comparing original and recreated maDMPS' + color.END)
        print('#'*80)
        d2_path = os.path.join(dir_path_local, 'dmp_from_rocrates.json')
        %run src/compare_dictionaries.py --path_d1 $file_path_local --path_d2 $d2_path
        print('#'*80)
        print(color.BOLD + 'done' + color.END)
        print('#'*80 + '\n'*3)
        
    

https://zenodo.org/record/3758653/files/dmp.zip
examples/dmp/dmp.json
[1mChecking if valid maDMP[0m
[92mVALID[0m
[1mconverting maDMP to rocrates[0m
[1mChecking if valid maDMP[0m
[92mVALID[0m
processing dataset 1 of 2 datasets
processing dataset 2 of 2 datasets
[1mconverting rocrates to maDMP[0m
[1mChecking if valid maDMP[0m
[92mVALID[0m
################################################################################
[1mcomparing original and recreated maDMPS[0m
################################################################################
comparing d1 to d2:
dmp :
 -  title  :  Hypertree Decompositions for Query Optimization in Postgres
 +  title  :  DMP created from rocrates
dmp :
 -  description  :  Development and Benchmark of a new strategy for query execution. The data is collected for the purpose of benchmarking the new, optimized queries and measure the effectiveness of the technique against the default postgres
optimizer
 +  description  :  a RDA-DMP-Common-

################################################################################
[1mdone[0m
################################################################################



https://zenodo.org/record/3738548/files/dsue1-part3-madmp-01627775.json.zip
examples/dsue1-part3-madmp-01627775/dsue1-part3-madmp-01627775.json
[1mChecking if valid maDMP[0m
[92mVALID[0m
[1mconverting maDMP to rocrates[0m
[1mChecking if valid maDMP[0m
[92mVALID[0m
processing dataset 1 of 1 datasets
[1mconverting rocrates to maDMP[0m
[1mChecking if valid maDMP[0m
[92mVALID[0m
################################################################################
[1mcomparing original and recreated maDMPS[0m
################################################################################
comparing d1 to d2:
dmp->dmp_id :
 -  identifier  :  https://doi.org/10.5281/zenodo.3737787
 +  identifier  :  
dmp->dmp_id :
 -  type  :  doi
 +  type  :  other
dmp->dmp_id :
 -  title  :  Finding patterns in the inte

KeyError: 'distribution'

################################################################################
[1mcomparing original and recreated maDMPS[0m
################################################################################


FileNotFoundError: [Errno 2] No such file or directory: 'examples/ex8-dmp-minimal-content/dmp_from_rocrates.json'

################################################################################
[1mdone[0m
################################################################################



https://raw.githubusercontent.com/RDA-DMP-Common/RDA-DMP-Common-Standard/master/examples/JSON/ex7-dataset-many.json
examples/ex7-dataset-many/ex7-dataset-many.json
[1mChecking if valid maDMP[0m
[92mVALID[0m
[1mconverting maDMP to rocrates[0m
[1mChecking if valid maDMP[0m
[92mVALID[0m
processing dataset 1 of 2 datasets
processing dataset 2 of 2 datasets
[1mconverting rocrates to maDMP[0m
[1mChecking if valid maDMP[0m
[92mVALID[0m
################################################################################
[1mcomparing original and recreated maDMPS[0m
################################################################################
comparing d1 to d2:
dmp :
 -  title  :  DMP with two datasets
 +  title  :  DMP created from rocrates
dmp :
 -  description  :  DMP describing more than one datase

## run rocrate to dmp examples

In [4]:
rocrate_examples = [
    'examples/simple-dataset-0.1.0'
]

url_schema = 'https://raw.githubusercontent.com/RDA-DMP-Common/RDA-DMP-Common-Standard/master/examples/JSON/JSON-schema/1.0/maDMP-schema-1.0.json'
maDMP_schema = read_json_from_url(url_schema)
for file_path in rocrate_examples:
    print('#'*80)
    print(file_path)
    print(color.BOLD + 'converting rocrates to maDMP' + color.END)
    %run src/rocrates_to_madmp.py --path $file_path
    print('#'*80 + '\n'*3)
    
    

################################################################################
examples/simple-dataset-0.1.0
[1mconverting rocrates to maDMP[0m
argh
[1mChecking if valid maDMP[0m
[92mVALID[0m
################################################################################



