In [36]:
import os
import sys
import json
sys.path.insert(0, os.path.abspath('/home/max/uva/mds_python'))

from mds.models import *
from mds.models.compact.user import UserCompactView
from mds.database.mongo import GetConfig
from mds.database.minio import GetMinioConfig

In [37]:
from mds.models.user import User
from mds.models.dataset import Dataset
from mds.models.download import Download
from mds.models.software import Software
from mds.models.computation import Computation

In [38]:
mongo_client = GetConfig()
mongo_database = mongo_client["test"]
mongo_collection = mongo_database["test_col"]

In [39]:
minio_client = GetMinioConfig()

In [40]:
test_user = User(**{
		"@id": "ark:99999/test-user1",
		"name": "Test User1",
		"type": "Person",
		"email": "testuser1@example.org",
		"password": "test1",
		"organizations": [],
		"projects": [],
		"datasets": [],
		"software": [],
		"computations": [],
		"evidencegraphs": []
		})

test_dataset = Dataset(**{
	"@id": "ark:99999/test-dataset",
	"@type": "evi:Dataset",
	"name": "test dataset",
	"owner": {
		"@id": "ark:99999/test-user1",
		"@type": "Person",
		"name": "Test User1",
		"email": "testuser1@example.org"
		}
    })

test_data_download = Download(**{
	"@id": "ark:99999/test-download",
	"name": "test-data.csv",
	"@type": "DataDownload",
	"encodingFormat": ".txt",
	"encodesCreativeWork": test_dataset.id,
    })

test_software = Software(**{
    "@id": "ark:99999/test-software",
    "name": "script1",
    "@type": "Software",
    "owner": {
		"@id": "ark:99999/test-user1",
		"@type": "Person",
		"name": "Test User1",
		"email": "testuser1@example.org"
		}
})

test_software_object = Download(**{
    "@id": "ark:99999/test-software-script",
    "name": "test-script.py",
    "@type": "DataDownload",
    "owner": {
		"@id": "ark:99999/test-user1",
		"@type": "Person",
		"name": "Test User1",
		"email": "testuser1@example.org"
		},
    "encodingFormat": ".py",
    "encodesCreativeWork": test_software.id
})

test_computation  =  Computation(**{
        "@id": "ark:99999/test-comp",
        "name":"test computation",
        "owner": {
        "@id": "ark:99999/test-user1",
        "@type": "Person",
        "name": "Test User1",
        "email": "testuser1@example.org"
        },
        "container": "fairscape-python-test",
        "command": "python3 /mnt/input/software/test-script.py",
        "usedSoftware":test_software.id ,
        "usedDataset": test_dataset.id
})

In [41]:
def create_all():
    create_user_status = test_user.create(mongo_collection)
    print(create_user_status)
    create_dataset = test_dataset.create(mongo_collection)
    print(create_dataset)
    
    with open("tests/test-data.csv", "rb") as file:
        create_data_download = test_data_download.register(file, mongo_collection, minio_client)
        print(create_data_download)
    
    create_software_metadata = test_software.create(mongo_collection)
    print(create_software_metadata)
    
    with open("tests/test-script.py", "rb") as file:
        print(test_software_object.register(file, mongo_collection, minio_client))

In [42]:
create_all()

Success: True	Message: 	StatusCode: 200
Success: True	Message: 	StatusCode: 201
Success: True	Message: 	StatusCode: 201
Success: True	Message: 	StatusCode: 201
Success: True	Message: 	StatusCode: 201


In [None]:
def clear_all():
    test_software_object.delete()
    test_software.delete()
    test_data_download.delete()
    test_dataset.delete()
    test_user.delete()
    
    

In [12]:
create_all()

Success: False	Message: document already exists	StatusCode: 400
Success: False	Message: dataset already exists	StatusCode: 400
Success: False	Message: dataDownload ark:99999/test-download already exists	StatusCode: 404
Success: False	Message: software already exists	StatusCode: 400
Success: False	Message: dataDownload ark:99999/test-software-script already exists	StatusCode: 404


## Testing Run Custom Container Code

In [57]:
import docker
import datetime
import pathlib

In [58]:
date_created = datetime.datetime.now()
script_id = test_computation.usedSoftware
dataset_id = test_computation.usedDataset

In [59]:
# find the locations of all the files
found_dataset = Dataset.construct(id=dataset_id)
read_dataset = found_dataset.read(mongo_collection)
print(read_dataset)

Success: True	Message: 	StatusCode: 200


In [60]:
dataset_files = []
dataset_files.append(found_dataset.distribution[0].contentUrl)

In [61]:
software = Software.construct(id=script_id)
print(software.read(mongo_collection))

Success: True	Message: 	StatusCode: 200


In [49]:
software.json()

'{"id": "ark:99999/test-software", "type": "evi:Software", "distribution": [{"id": "ark:99999/test-software-script", "type": "DataDownload", "name": "test-script.py", "contentUrl": "script1/test-script.py"}], "usedBy": [], "context": {"@vocab": "https://schema.org/", "evi": "https://w3id.org/EVI#"}, "@id": "ark:99999/test-software", "@type": "Software", "name": "script1", "owner": {"id": "ark:99999/test-user1", "type": "Person", "name": "Test User1", "email": "testuser1@example.org"}, "evi:usedBy": []}'

In [65]:
# create a temporary landing folder for the output
job_path = pathlib.Path(f"/tmp/test-computation")

input_directory  = job_path / "input"
software_directory = input_directory / "software"
data_directory = input_directory / "data"
output_directory = job_path / "output"

software_directory.mkdir(parents=True)
data_directory.mkdir(parents=True)
output_directory.mkdir(parents=True)

#pathlib.Path("input")

FileExistsError: [Errno 17] File exists: '/tmp/test-computation/input/software'

In [70]:
# load software into software folder
script_filename = software_directory / pathlib.Path(script_file).name

minio_client.fget_object(
"test", script_file, str(script_filename)
)

# check that file was downloaded correctly
print(list(software_directory.glob("*")))

for dataset in dataset_files:
    dataset_filename = data_directory / pathlib.Path(dataset).name
    minio_client.fget_object(
        "test", dataset, str(dataset_filename)
    )
    
print(list(data_directory.glob("*")))

[PosixPath('/tmp/test-computation/input/software/test-script.py')]
[PosixPath('/tmp/test-computation/input/data/test-data.csv')]


In [87]:
get_script = minio_client.fget_object(
"test", "not a file", str(script_filename)
)


S3Error: S3 operation failed; code: NoSuchKey, message: Object does not exist, resource: /test/not%20a%20file, request_id: 1700D52500F111CC, host_id: None, bucket_name: test, object_name: not a file

In [86]:
get_script.met

HTTPHeaderDict({'Accept-Ranges': 'bytes', 'Content-Length': '137', 'Content-Security-Policy': 'block-all-mixed-content', 'Content-Type': 'application/octet-stream', 'ETag': '"5a572e8848d1d66833331bcd9bcbb53a"', 'Last-Modified': 'Mon, 11 Jul 2022 16:47:04 GMT', 'Server': 'MinIO', 'Strict-Transport-Security': 'max-age=31536000; includeSubDomains', 'Vary': 'Origin, Accept-Encoding', 'X-Amz-Request-Id': '1700D51FDDA4DD5C', 'X-Content-Type-Options': 'nosniff', 'X-Xss-Protection': '1; mode=block', 'x-amz-meta-identifier': 'ark:99999/test-software-script', 'x-amz-meta-name': 'test-script.py', 'Date': 'Mon, 11 Jul 2022 17:03:15 GMT'})

### Creating Docker Image with Dependancies

In [71]:
# computation id for 
docker_client = docker.from_env()

In [157]:
with open("tests/test-dockerfile", "w") as file:
    file.writelines([
        "FROM python:3\n",
        "RUN /usr/local/bin/python -m pip install --upgrade pip\n",
        "RUN pip install pandas\n"])

In [158]:
# build an image for
with open("tests/test-dockerfile", "rwb") as dockerfile:
    dockerfile.writelines([
        "FROM python:3\n",
        "RUN /usr/local/bin/python -m pip install --upgrade pip\n",
        "RUN pip install pandas\n"])
    dockerfile.seek(0)
    docker_file = docker_client.images.build(
        fileobj=dockerfile,
        tag = "fairscape-python-test"
    )

### Execute Custom Container by mounting the temporary directory


In [73]:
test_computation.command

'python3 /mnt/input/software/test-script.py'

In [74]:
job_path

PosixPath('/tmp/test-computation')

In [78]:
container = docker_client.containers.create(
    image = test_computation.container,
    command = test_computation.command,
    auto_remove = False,
    volumes={
        str(job_path): {'bind': '/mnt/', 'mode': 'rw'},
    }
)

In [79]:
# update computation metadata
container.id



'f64e8c53184246221ef67b6d5a0e69b225165c74547be8d8248ef261bad5b05c'

In [80]:
# run the container 

try:
    container.start()
    
except docker.errors.ContainerError as container_error:
    print(container_error)

In [83]:
container.status

'created'

In [167]:
list(output_directory.glob("*"))

[PosixPath('/tmp/test-computation/output/test-output.csv')]

In [86]:
# reset files

# remove all files
for dataset in data_directory.glob("*"):
    dataset.unlink()
    
for software in software_directory.glob("*"):
    software.unlink()
    
for output in output_directory.glob("*"):
    output.unlink()
    
# remove all folders
data_directory.rmdir()
software_directory.rmdir()
output_directory.rmdir()

# remove parent folder
input_directory.rmdir()

In [89]:
# clear all objects


In [None]:
# execute custom container

In [None]:
test_computation.run_custom_container()

In [None]:
register_computation(test_computation.containerId)