In [1]:
import os  
import sys
import pathlib

repo_path = pathlib.Path('.').absolute().parent
src_path = repo_path / 'src'

sys.path.insert(0, str(src_path)) 

In [2]:
import pymongo

In [391]:
import fairscape_mds.mds.config as mds_config

server_config = mds_config.get_fairscape_config('../deploy/local.env')

server_config.setup_mongo()

server_config.setup_minio()

In [396]:
mongoClient = server_config.CreateMongoClient()

mongoDB = mongoClient[server_config.mongo.db]

mongoUserCollection = mongoDB[server_config.mongo.user_collection]
mongoIdentifierCollection = mongoDB[server_config.mongo.identifier_collection]

In [28]:
result = mongoUserCollection.find_one({"@id": "not a user"})

In [30]:
result is None

True

## API Requests

In [241]:
import requests
fairscape_host = "http://localhost:8080/"

## Users

In [242]:
def print_response(response):
    print(response.status_code)
    print(response.content)

In [365]:
# creating a user
user_metadata = {
    "@id": "ark:59852/jdoe",
    "@type": "Person",
    "name": "John Doe",
    "email": "jdoe@example.org",
    "password": "examplepassword"
}

create_user_response = requests.post(
    fairscape_host + 'user',
    json=user_metadata
    )

print(create_user_response.status_code)
print(create_user_response.content)

201
b'{"created":{"@id":"ark:59852/jdoe","@type":"Person","name":"John Doe"}}'


In [366]:
# get a user
get_user_response = requests.get(
    fairscape_host + f'user/{user_metadata.get("@id")}'
)

print(get_user_response.status_code)
print(get_user_response.content)

200
b'{"@id":"ark:59852/jdoe","@type":"Person","@context":{"@vocab":"https://schema.org/","evi":"https://w3id.org/EVI#"},"url":null,"organizations":[],"projects":[],"datasets":[],"downloads":[],"rocrates":[],"software":[],"computations":[],"evidencegraphs":[],"name":"John Doe","email":"jdoe@example.org","password":"examplepassword"}'


In [367]:
# get a user that doestn't exist
get_user_response = requests.get(
    fairscape_host + 'user/ark:99999/fakeuser'
)

print(get_user_response.status_code)
print(get_user_response.content)

404
b'{"error":"No record found"}'


In [368]:
# list users
list_user_response = requests.get(
    fairscape_host + 'user'
)

print_response(list_user_response)

200
b'{"users":[{"@id":"ark:59852/softwaretester","@type":"Person","name":"John Doe"},{"@id":"ark:59852/jdoe","@type":"Person","name":"John Doe"}]}'


In [369]:
# delete user
delete_user_response = requests.delete(
    fairscape_host +  f'user/{user_metadata.get("@id")}'
)

print_response(delete_user_response)

200
b'{"deleted":{"@id":"ark:59852/jdoe","@type":"Person","name":"John Doe","email":"jdoe@example.org","datasets":[],"software":[],"computatations":[],"rocrates":[],"evidencegraphs":[]}}'


## Dataset

In [370]:
# creating a user
test_user_metadata = {
    "@id": "ark:59852/softwaretester",
    "@type": "Person",
    "name": "John Doe",
    "email": "jdoe@example.org",
    "password": "examplepassword"
}

create_user_response = requests.post(
    fairscape_host + 'user',
    json=test_user_metadata
    )

In [371]:
# create a dataset

test_dataset_metadata = {
    "@id": "ark:59852/test-dataset",
    "@type": "evi:Dataset",
    "name": "test dataset",
    "description": "an example metadata set for a test dataset",
    "keywords": [ "test", "fair"],
    "owner": test_user_metadata['@id'],
    "author": "John Doe"
}

create_dataset_response = requests.post(
    fairscape_host + 'dataset',
    json = test_dataset_metadata
)

print(create_dataset_response.status_code)
print(create_dataset_response.content)

201
b'{"created":{"@id":"ark:59852/test-dataset","@type":"evi:Dataset","name":"test dataset"}}'


In [294]:
# list datasets
list_dataset_response = requests.get(
    fairscape_host + 'dataset'
)

print(list_dataset_response.status_code)
print(list_dataset_response.content)

200
b'{"datasets":[{"@id":"ark:59852/test-dataset","@type":"evi:Dataset","name":"test dataset"}]}'


In [297]:
# get dataset
get_dataset_response = requests.get(
    fairscape_host + f'dataset/{test_dataset_metadata["@id"]}'
)

print(get_dataset_response.status_code)
print(get_dataset_response.content)

200
b'{"@id":"ark:59852/test-dataset","@type":"evi:Dataset","@context":{"@vocab":"https://schema.org/","evi":"https://w3id.org/EVI#"},"url":null,"license":" https://creativecommons.org/licenses/by/4.0/","keywords":["test","fair"],"published":false,"distribution":[],"includedInDataCatalog":null,"sourceOrganization":null,"author":"John Doe","dateCreated":"2024-04-10T15:36:25.564000","dateModified":"2024-04-10T15:36:25.564000","usedBy":[],"generatedBy":null,"name":"test dataset","description":"an example metadata set for a test dataset","owner":"ark:59852/softwaretester"}'


In [283]:
# update dataset

In [None]:
# delete dataset
delete_dataset_response = requests.delete(
    fairscape_host + f'dataset/{test_dataset_metadata["@id"]}'
)

print(delete_dataset_response.status_code)
print(delete_dataset_response.content)

## Software

In [None]:
# create software
test_software_metadata = {
    "@id": "ark:59852/test-software",
    "@type": "evi:Software",
    "owner": test_user_metadata.get('@id'),
    "name": "Test Software",
    "author": "Max Levinson",
    "keywords": ['evi'],
    "description": "an example software model"
}

# create a dataset
create_software_response = requests.post(
    fairscape_host + 'software',
    json = test_software_metadata
)

print_response(create_software_response)

In [304]:
# get software

get_software_response = requests.get(
    fairscape_host + f'software/{test_software_metadata.get("@id")}'
)

print_response(get_software_response)

200
b'{"@id":"ark:59852/test-software","@type":"evi:Software","name":"Test Software","@context":{"@vocab":"https://schema.org/","evi":"https://w3id.org/EVI#"},"url":null,"description":"an example software model","license":" https://creativecommons.org/licenses/by/4.0/","keywords":["evi"],"published":false,"owner":"ark:59852/softwaretester","author":"Max Levinson","citation":null,"dateCreated":"2024-04-10T15:36:43.467000","distribution":[],"usedBy":[],"sourceOrganization":null,"includedInDataCatalog":null}'


In [301]:
# list software
list_software_response = requests.get(
    fairscape_host + 'software'
)

print_response(list_software_response)

200
b'{"software":[{"@id":"ark:59852/test-software","@type":"evi:Software","name":"Test Software"}]}'


In [303]:
# delete software
delete_software_response = requests.delete(
    fairscape_host + 'software/' + test_software_metadata.get("@id")
)

print_response(delete_software_response)

500
b'Internal Server Error'


## Computation

In [None]:
# setup_computation

In [310]:
# create a dataset

used_by_dataset_metadata = {
    "@id": "ark:59852/comp-dataset-usedBy",
    "@type": "evi:Dataset",
    "name": "test dataset",
    "description": "an example metadata set for a test dataset",
    "keywords": [ "test", "fair"],
    "owner": test_user_metadata['@id'],
    "author": "John Doe"
}

comp_create_dataset_response = requests.post(
    fairscape_host + 'dataset',
    json = used_by_dataset_metadata
)

print_response(comp_create_dataset_response)

# generatedBy
generated_by_dataset_metadata = {
    "@id": "ark:59852/comp-dataset-generatedBy",
    "@type": "evi:Dataset",
    "name": "test dataset",
    "description": "an example metadata set for a test dataset",
    "keywords": [ "test", "fair"],
    "owner": test_user_metadata['@id'],
    "author": "John Doe"
}

comp_create_dataset_response = requests.post(
    fairscape_host + 'dataset',
    json = generated_by_dataset_metadata
)

print_response(comp_create_dataset_response)

# create software
comp_test_software_metadata = {
    "@id": "ark:59852/comp-test-software",
    "@type": "evi:Software",
    "owner": test_user_metadata.get('@id'),
    "name": "Test Software",
    "author": "Max Levinson",
    "keywords": ['evi'],
    "description": "an example software model"
}

# create a dataset
create_software_response = requests.post(
    fairscape_host + 'software',
    json = comp_test_software_metadata
)

print_response(create_software_response)

201
b'{"created":{"@id":"ark:59852/comp-dataset-usedBy","@type":"evi:Dataset","name":"test dataset"}}'
201
b'{"created":{"@id":"ark:59852/comp-dataset-generatedBy","@type":"evi:Dataset","name":"test dataset"}}'
201
b'{"created":{"@id":"ark:59852/comp-test-software","@type":"evi:Software","name":"Test Software","description":"an example software model","author":"Max Levinson"}}'


In [305]:
# create a generated dataset

test_generated_metadata = {
    "@id": "ark:59852/test-generated",
    "@type": "evi:Dataset",
    "name": "test generated dataset",
    "description": "an example generated dataset set",
    "keywords": [ "test", "fair"],
    "owner": test_user_metadata['@id'],
    "author": "John Doe"
}

create_generated_response = requests.post(
    fairscape_host + 'dataset',
    json = test_generated_metadata
)

print(create_generated_response.status_code)
print(create_generated_response.content)

201
b'{"created":{"@id":"ark:59852/test-generated","@type":"evi:Dataset","name":"test generated dataset"}}'


In [234]:
requests.delete(
      fairscape_host + f'dataset/{test_generated_metadata["@id"]}'
      ).json()

{'deleted': {'@id': 'ark:59852/test-generated',
  '@type': 'evi:Dataset',
  'name': 'test generated dataset'}}

In [311]:
# create computation
test_computation_metadata = {
    "@id": "ark:59852/test-computation",
    "@type": "evi:Computation",
    "name": "Test Computation",
    "url": "https://github.com",
    "owner": test_user_metadata.get("@id"),
    "author": "Max Levinson",
    "keywords": ['test'],
    "description": "An example computation for testing",
    "command": "echo 'hello world'",
    "container": None,
    "usedSoftware": comp_test_software_metadata.get("@id"),
    "usedDataset": [used_by_dataset_metadata.get("@id")],
    "generated": [generated_by_dataset_metadata.get("@id")]
}

In [312]:
# create computation
create_computation_response = requests.post(
    fairscape_host + 'computation',
    json = test_computation_metadata
)

print_response(create_computation_response)

201
b'{"created":{"@id":"ark:59852/test-computation","@type":"evi:Computation","name":"Test Computation"}}'


In [313]:
# get computation
get_computation_response = requests.get(
    fairscape_host + f'computation/{test_computation_metadata.get("@id")}',
)

print_response(get_computation_response)

200
b'{"@id":"ark:59852/test-computation","@type":"evi:Computation","name":"Test Computation","@context":{"@vocab":"https://schema.org/","evi":"https://w3id.org/EVI#"},"url":"https://github.com","description":"An example computation for testing","license":" https://creativecommons.org/licenses/by/4.0/","keywords":["test"],"published":true,"owner":"ark:59852/softwaretester","author":"Max Levinson","dateCreated":null,"dateFinished":null,"sourceOrganization":null,"includedInDataCatalog":null,"command":"echo \'hello world\'","usedSoftware":"ark:59852/comp-test-software","usedDataset":["ark:59852/comp-dataset-usedBy"],"generated":["ark:59852/comp-dataset-generatedBy"],"container":null}'


### Check Inverse Edges for Prov are maintained

In [None]:
# check that dataset have inverse edges
get_software_response = requests.get(
    fairscape_host + 'software/' + comp_test_software_metadata['@id']
)

softwareJSON = get_software_response.json()
assert test_computation_metadata['@id'] in softwareJSON.get("usedBy")

# check that dataset have inverse edges
get_dataset_response = requests.get(
    fairscape_host + 'dataset/' + used_by_dataset_metadata['@id']
)

datasetJSON = get_dataset_response.json()
assert test_computation_metadata['@id'] in datasetJSON.get("usedBy")

# check that generatedBy inverse is materialized
get_dataset_response = requests.get(
    fairscape_host + 'dataset/' + generated_by_dataset_metadata['@id']
)
generatedByJSON = get_dataset_response.json()
assert test_computation_metadata['@id'] == generatedByJSON['generatedBy']

# check that user has inverse edges
# check that dataset have inverse edges
get_user_response = requests.get(
    fairscape_host + 'user/' + test_user_metadata['@id']
)

userJSON = get_user_response.json()
assert test_computation_metadata['@id'] in userJSON.get("computations")

In [321]:
# list computation
list_computation_response = requests.get(
    fairscape_host + 'computation'
)

print_response(list_computation_response)

200
b'{"computations":[{"@id":"ark:59852/test-computation","@type":"evi:Computation","name":"Test Computation"}]}'


In [338]:
# delete computation
delete_computation_response = requests.delete(
    fairscape_host + f'computation/{test_computation_metadata["@id"]}'
)
print_response(delete_computation_response)

200
b'{"deleted":{"@id":"ark:59852/test-computation","@type":"evi:Computation","name":"Test Computation","@context":{"@vocab":"https://schema.org/","evi":"https://w3id.org/EVI#"},"url":"https://github.com","description":"An example computation for testing","license":" https://creativecommons.org/licenses/by/4.0/","keywords":["test"],"published":false,"owner":"ark:59852/softwaretester","author":"Max Levinson","dateCreated":null,"dateFinished":null,"sourceOrganization":null,"includedInDataCatalog":null,"command":"echo \'hello world\'","usedSoftware":"ark:59852/comp-test-software","usedDataset":["ark:59852/comp-dataset-usedBy"],"generated":["ark:59852/comp-dataset-generatedBy"],"container":null}}'


In [339]:
get_software_response = requests.get(
    fairscape_host + 'software/' + comp_test_software_metadata['@id']
)

softwareJSON = get_software_response.json()
assert test_computation_metadata['@id'] not in softwareJSON.get("usedBy")

# check that dataset have inverse edges
get_dataset_response = requests.get(
    fairscape_host + 'dataset/' + used_by_dataset_metadata['@id']
)

datasetJSON = get_dataset_response.json()
assert test_computation_metadata['@id'] not in datasetJSON.get("usedBy")

# check that generatedBy inverse is materialized
get_dataset_response = requests.get(
    fairscape_host + 'dataset/' + generated_by_dataset_metadata['@id']
)
generatedByJSON = get_dataset_response.json()
assert test_computation_metadata['@id'] != generatedByJSON['generatedBy']

# check that user has inverse edges
# check that dataset have inverse edges
get_user_response = requests.get(
    fairscape_host + 'user/' + test_user_metadata['@id']
)

userJSON = get_user_response.json()
assert test_computation_metadata['@id'] not in userJSON.get("computations")

In [328]:
# check that user has inverse edges removed
get_user_response = requests.get(
    fairscape_host + 'user/ark:59852/softwaretester'
)

userJSON = get_user_response.json()
assert test_computation_metadata['@id'] not in userJSON.get("computations")

In [329]:
# check that dataset have inverse edges removed
get_software_response = requests.get(
    fairscape_host + 'software/ark:59852/test-software'
)

softwareJSON = get_software_response.json()
assert test_computation_metadata['@id'] not in softwareJSON.get("usedBy")

In [331]:
# check that dataset have inverse edges removed
get_dataset_response = requests.get(
    fairscape_host + 'dataset/ark:59852/test-software'
)

datasetJSON = get_dataset_response.json()
assert test_computation_metadata['@id'] not in datasetJSON.get("usedBy")

In [None]:
# assert 

## DataDownload

In [342]:
from requests_toolbelt.multipart.encoder import MultipartEncoder

In [345]:
import json

In [435]:
# create data download
download_metadata = {
    "@id": "ark:59852/test-data-download",
    "@type": "evi:DataDownload",
    "name": "test download",
    "description": "an example metadata set for a test dataset",
    "keywords": [ "test", "fair"],
    "owner": test_user_metadata['@id'],
    "author": "John Doe",
    "encodesCreativeWork": test_dataset_metadata['@id'],
    "encodingFormat": "txt",
    "filename": "fake_data.txt"
}


mp_encoder = MultipartEncoder(
    fields={
        'download': json.dumps(download_metadata),
        # plain file object, no filename or mime type produces a
        # Content-Disposition header with just the part name
        'file': ('fake_data.txt', open('fake_data.txt', 'rb'), 'text/plain'),
    }
)

# upload a software to minio object store
# dataset_upload = {'download': str(download_data), 'file': open("tests/test-data.csv", "rb")}

In [436]:

data_download_upload = requests.post(
    fairscape_host + "register",                              
    data=mp_encoder,  # The MultipartEncoder is posted as data, don't use files=...!
    # The MultipartEncoder provides the content-type header with the boundary:
    headers={'Content-Type': mp_encoder.content_type}
                )
                    
data_download_upload.json()

{'created': {'@id': 'ark:59852/test-data-download',
  '@type': 'Download',
  'name': 'test download'}}

In [463]:
data_download_get_metadata = requests.get(
    fairscape_host + f"download/{download_metadata['@id']}"
    )

In [464]:
data_download_get_metadata.json()

'{"@id":"ark:59852/test-data-download","@type":"evi:DataDownload","name":"test download","@context":{"@vocab":"https://schema.org/","evi":"https://w3id.org/EVI#"},"url":null,"description":"an example metadata set for a test dataset","license":" https://creativecommons.org/licenses/by/4.0/","keywords":["test","fair"],"published":false,"encodingFormat":"txt","owner":"ark:59852/softwaretester","contentSize":"12","encodesCreativeWork":"ark:59852/test-dataset","sha256":null,"filename":"fake_data.txt","uploadDate":"2024-04-15T14:06:49Z","version":"0.1.0","sourceOrganization":null,"includedInDataCatalog":null,"minioPath":null,"contentURL":"0.0.0.0/datadownload/ark:59852/test-data-download/download","author":"John Doe"}'

In [460]:
# get file content
data_download_get_content = requests.get(
    download_url
    )
data_download_get_content.content

b'{"@id":"ark:59852/test-data-download","error":"download ark:59852/test-data-download has no data stored in fairscape"}'

In [452]:
download_url = fairscape_host + f"download/{download_metadata['@id']}/download"
output_filename = "read_data.txt"

In [458]:
# streaming back file
def stream_file(request_url, local_filename):
    with requests.get(request_url, stream=True) as r:
        r.raise_for_status()
        with open(local_filename, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192): 
                # If you have chunk encoded response uncomment if
                # and set chunk_size parameter to None.
                #if chunk: 
                f.write(chunk)
    return local_filename

In [459]:
stream_file(download_url, output_filename)

HTTPError: 404 Client Error: Not Found for url: http://localhost:8080/download/ark:59852/test-data-download/download

In [457]:
# delete download
download_delete = requests.delete(
    fairscape_host + f"download/{download_metadata['@id']}"
    )

### Remove Content

In [420]:
import minio

In [392]:
# clear contents from minio
minio_client = server_config.CreateMinioClient()

In [432]:
# delete download metadata
mongoIdentifierCollection.delete_one({"@id": download_metadata.get("@id")})

removeObjResult = minio_client.remove_object(
    server_config.minio.default_bucket,
    f"test-data-download/{download_metadata.get('filename')}"
)

In [430]:
try:
    minio_client.stat_object(
        server_config.minio.default_bucket,
        f"test-data-download/{download_metadata.get('filename')}"
    )

except minio.S3Error as S3Exception:
    print(S3Exception.__dict__)

{'_code': 'NoSuchKey', '_message': 'Object does not exist', '_resource': '/default/test-data-download/fake_data.txt', '_request_id': '17C6792E9A930CB0', '_host_id': 'dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8', '_response': <urllib3.response.HTTPResponse object at 0x1156696f0>, '_bucket_name': 'default', '_object_name': 'test-data-download/fake_data.txt'}


In [415]:
removeObjResult is None

True

In [416]:
removeObjResult

In [395]:
removeObjResult

In [None]:
mong

## ROCrate

In [None]:
# create rocrate

## Resolver

In [153]:
get_user_resolver = requests.get(
    fairscape_host + test_user_metadata.get("@id")
)

print(get_user_resolver.status_code)
print(get_user_resolver.content)

NameError: name 'test_user_metadata' is not defined

In [143]:
get_software_resolver = requests.get(
    fairscape_host + test_software_metadata.get("@id")
)

print(get_software_resolver.status_code)
print(get_software_resolver.content)

404
b'{"detail":"Not Found"}'


In [152]:
get_dataset_resolver = requests.get(
    fairscape_host + test_dataset_metadata.get("@id")
)

print(get_dataset_resolver.status_code)
print(get_dataset_resolver.content)

500
b'{"error":"error returning ark metadata","message":"Object of type datetime is not JSON serializable","identifier":"ark:59852/test-dataset","metadata":"{\'@id\': \'ark:59852/test-dataset\', \'@type\': \'evi:Dataset\', \'name\': \'test dataset\', \'@context\': {\'@vocab\': \'https://schema.org/\', \'evi\': \'https://w3id.org/EVI#\'}, \'url\': None, \'description\': \'an example metadata set for a test dataset\', \'license\': \' https://creativecommons.org/licenses/by/4.0/\', \'keywords\': [\'test\', \'fair\'], \'owner\': \'ark:59852/softwaretester\', \'distribution\': [], \'includedInDataCatalog\': None, \'sourceOrganization\': None, \'author\': \'John Doe\', \'dateCreated\': datetime.datetime(2024, 4, 8, 16, 30, 55, 437000), \'dateModified\': datetime.datetime(2024, 4, 8, 16, 30, 55, 437000), \'usedBy\': []}"}'


In [None]:
get_user_resolver = requests.get(
    fairscape_host + '/' + testing_user_metadata.get("@id")
)

print(get_user_resolver.status_code)
print(get_user_resolver.content)