In [35]:
import requests
import json
from requests_toolbelt.multipart.encoder import MultipartEncoder

root_url = "http://localhost:8000/"

# Create a user

In [36]:
# create a user
user1_data = {
    "@id": "ark:99999/test-user1",
    "name": "Test User1",
    "type": "Person",
    "email": "testuser1@example.org",
    "password": "test1",
    "organizations": [],
    "projects": [],
    "datasets": [],
    "software": [],
    "computations": [],
    "evidencegraphs": []
}

create_user1 = requests.post(root_url + "user", data=json.dumps(user1_data))
create_user1.json()

{'created': {'@id': 'ark:99999/test-user1',
  '@type': 'Person',
  'name': 'Test User1'}}

# Get the token from http://localhost:8000/docs#/webauth/login_login_post

In [37]:
# submit form data with email: testuser1@example.org, pw: test1
token = "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ0ZXN0dXNlcjFAZXhhbXBsZS5vcmciLCJuYW1lIjoiVGVzdCBVc2VyMSIsImlhdCI6MTY2MTU1MzE3NywiZXhwIjoxNjYxNTU2Nzc3LCJpc3MiOiJmYWlyc2NhcGUifQ.qJ751yfvRmk4BUdlFlbTp1ga_h8Pzg0ELIbw_gFQgvw"

# Create an organization

In [38]:
organization_data = {
    "@id": "ark:99999/test-org",
    "@type": "Organization",
    "name": "test organization",
    "owner": {
        "@id": user1_data['@id'],
        "name": user1_data['name'],
        "email": user1_data['email'],
        "@type": "Person"
    }
}

headers = {'Authorization': 'Bearer ' + token}
organization_create = requests.post(root_url + f"organization/", data=json.dumps(organization_data), headers=headers)
organization_create.json()

{'created': {'@id': 'ark:99999/test-org', '@type': 'Organization'}}

# Create a project

In [39]:
# create project
project_data = {
    "@id": "ark:99999/test-org/test-proj",
    "@type": "Project",
    "name": "test project",
    "owner": {
        "@id": user1_data['@id'],
        "name": user1_data['name'],
        "email": user1_data['email'],
        "@type": "Person"
    },
    "memberOf": {
        "@id": organization_data['@id'],
        "@type": organization_data['@type'],
        "name": organization_data['name']
    }
}

headers = {'Authorization': 'Bearer ' + token}
project_create = requests.post(root_url + f"project/", data=json.dumps(project_data), headers=headers)
project_create.json()

{'created': {'@id': 'ark:99999/test-org/test-proj', '@type': 'Project'}}

# Create a Software

In [40]:
software_data = {
    "@id": "ark:99999/test-org/test-proj/test-software",
    "@type": "evi:Software",
    "name": "test software",
    "owner": {
        "@id": user1_data['@id'],
        "name": user1_data['name'],
        "email": user1_data['email'],
        "@type": "Person"
    }
}

software_create = requests.post(root_url + f"software/", data=json.dumps(software_data))
software_create.json()

{'created': {'@id': 'ark:99999/test-org/test-proj/test-software',
  '@type': 'evi:Software'}}

# Register metadata and upload content

In [41]:
software_metadata = {
    "@id": "ark:99999/test-org/test-proj/test-software/sum_script.py",
    "@type": "DataDownload",
    "name": "test script",
    "encodingFormat": ".py",
    "encodesCreativeWork": {
        "@id": software_data['@id'],
        "@type": software_data['@type'],
        "name": software_data['name']
    }
}

In [42]:
mp_encoder = MultipartEncoder(
    fields={
        'download': json.dumps(software_metadata),
        # plain file object, no filename or mime type produces a
        # Content-Disposition header with just the part name
        'file': ('test-software', open('/home/sadnan/compute-test/input-script/sum_script.py', 'rb'), 'text/plain'),
    }
)
software_upload = requests.post(root_url + f"register",
                                data=mp_encoder,  # The MultipartEncoder is posted as data, don't use files=...!
                                # The MultipartEncoder provides the content-type header with the boundary:
                                headers={'Content-Type': mp_encoder.content_type}
                                )

software_upload.json()

{'created': {'@id': 'ark:99999/test-org/test-proj/test-software/sum_script.py',
  '@type': 'Download',
  'name': 'test script'}}

In [48]:
import os

path = "/home/sadnan/compute-test/data/Non-PreVent-hctsa"

for i, name in enumerate(os.listdir(path=path)):
    print(i, name)
#files = {f'file_{i}': open(f'{path}/{name}','rb') for i, name in enumerate(os.listdir(path))}

0 UVA_1119_HR.csv
1 UVA_1050_HR3.csv
2 UVA_1117_HR.csv
3 UVA_1400_HR.csv
4 UVA_1251_HR3.csv
5 UVA_1400_HR3.csv
6 UVA_1120_HR.csv
7 UVA_1050_HR.csv
8 UVA_1251_HR.csv
9 UVA_1120_HR3.csv
10 UVA_1119_HR3.csv
11 UVA_1117_HR3.csv


In [52]:
from os import walk

mypath = "/home/sadnan/vscodeprojects/HCTSA_mds_python/Non-PreVent-hctsa"
f = []
for (dirpath, dirnames, filenames) in walk(mypath):
    f.extend(dirnames)
    f.extend(filenames)
print(f)

['UVA_1050', 'UVA_1117', 'UVA_1251', 'UVA_1119', 'UVA_1400', 'UVA_1120', 'UVA_1050_HR3.csv', 'UVA_1050_HR.csv', 'UVA_1117_HR.csv', 'UVA_1117_HR3.csv', 'UVA_1251_HR3.csv', 'UVA_1251_HR.csv', 'UVA_1119_HR.csv', 'UVA_1119_HR3.csv', 'UVA_1400_HR.csv', 'UVA_1400_HR3.csv', 'UVA_1120_HR.csv', 'UVA_1120_HR3.csv']


In [58]:
from pathlib import Path, PurePath

path = Path("/home/sadnan/vscodeprojects/HCTSA_mds_python/Non-PreVent-hctsa")
for p in path.rglob("*"):
    if p.is_file():
        print(p.parent, ' - ', PurePath(p.parent).name, ' - ', p.name)
        print(p.resolve())
        #print(PurePath(p.parent).name)

/home/sadnan/vscodeprojects/HCTSA_mds_python/Non-PreVent-hctsa/UVA_1050  -  UVA_1050  -  UVA_1050_HR3.csv
/home/sadnan/vscodeprojects/HCTSA_mds_python/Non-PreVent-hctsa/UVA_1050/UVA_1050_HR3.csv
/home/sadnan/vscodeprojects/HCTSA_mds_python/Non-PreVent-hctsa/UVA_1050  -  UVA_1050  -  UVA_1050_HR.csv
/home/sadnan/vscodeprojects/HCTSA_mds_python/Non-PreVent-hctsa/UVA_1050/UVA_1050_HR.csv
/home/sadnan/vscodeprojects/HCTSA_mds_python/Non-PreVent-hctsa/UVA_1117  -  UVA_1117  -  UVA_1117_HR.csv
/home/sadnan/vscodeprojects/HCTSA_mds_python/Non-PreVent-hctsa/UVA_1117/UVA_1117_HR.csv
/home/sadnan/vscodeprojects/HCTSA_mds_python/Non-PreVent-hctsa/UVA_1117  -  UVA_1117  -  UVA_1117_HR3.csv
/home/sadnan/vscodeprojects/HCTSA_mds_python/Non-PreVent-hctsa/UVA_1117/UVA_1117_HR3.csv
/home/sadnan/vscodeprojects/HCTSA_mds_python/Non-PreVent-hctsa/UVA_1251  -  UVA_1251  -  UVA_1251_HR3.csv
/home/sadnan/vscodeprojects/HCTSA_mds_python/Non-PreVent-hctsa/UVA_1251/UVA_1251_HR3.csv
/home/sadnan/vscodeprojects/H

In [59]:
from pathlib import Path, PurePath
import uuid

path_to_data = "/home/sadnan/compute-test/data/Non-PreVent-hctsa"

path = Path(path_to_data)

for p in path.rglob("*"):
    if p.is_file():
        print(p.parent, ' - ', PurePath(p.parent).name, ' - ', p.name)
        unique_id = str(uuid.uuid4())
        full_path = p.resolve()
        dataset_data = {
            "@id": "ark:99999/test-org/test-proj/test-data" + f'-{unique_id}',
            "@type": "Dataset",
            "name": PurePath(p.parent).name,
            "owner": {
                "@id": user1_data['@id'],
                "name": user1_data['name'],
                "email": user1_data['email'],
                "@type": "Person"
            }
        }

        dataset_create = requests.post(root_url + f"dataset/", data=json.dumps(dataset_data))
        dataset_create.json()

        dataset_metadata = {
            "@id": "ark:99999/test-org/test-proj/test-data" + f'-{unique_id}' + p.name,
            "@type": "DataDownload",
            "name": p.name,
            "encodingFormat": Path(p.name).suffix,
            "encodesCreativeWork": {
                "@id": dataset_data['@id'],
                "@type": dataset_data['@type'],
                "name": dataset_data['name']
            }
        }

        mp_encoder = MultipartEncoder(
            fields={
                'download': json.dumps(dataset_metadata),
                # plain file object, no filename or mime type produces a
                # Content-Disposition header with just the part name
                'file': ('test-data', open(full_path, 'rb'), 'text/plain'),
            }
        )

        data_download_upload = requests.post(root_url + f"register",
                                             data=mp_encoder,
                                             # The MultipartEncoder is posted as data, don't use files=...!
                                             # The MultipartEncoder provides the content-type header with the boundary:
                                             headers={'Content-Type': mp_encoder.content_type}
                                             )

        data_download_upload.json()



/home/sadnan/compute-test/data/Non-PreVent-hctsa/UVA_1050  -  UVA_1050  -  UVA_1050_HR3.csv
/home/sadnan/compute-test/data/Non-PreVent-hctsa/UVA_1050  -  UVA_1050  -  UVA_1050_HR.csv
/home/sadnan/compute-test/data/Non-PreVent-hctsa/UVA_1117  -  UVA_1117  -  UVA_1117_HR.csv
/home/sadnan/compute-test/data/Non-PreVent-hctsa/UVA_1117  -  UVA_1117  -  UVA_1117_HR3.csv
/home/sadnan/compute-test/data/Non-PreVent-hctsa/UVA_1251  -  UVA_1251  -  UVA_1251_HR3.csv
/home/sadnan/compute-test/data/Non-PreVent-hctsa/UVA_1251  -  UVA_1251  -  UVA_1251_HR.csv
/home/sadnan/compute-test/data/Non-PreVent-hctsa/UVA_1119  -  UVA_1119  -  UVA_1119_HR.csv
/home/sadnan/compute-test/data/Non-PreVent-hctsa/UVA_1119  -  UVA_1119  -  UVA_1119_HR3.csv
/home/sadnan/compute-test/data/Non-PreVent-hctsa/UVA_1400  -  UVA_1400  -  UVA_1400_HR.csv
/home/sadnan/compute-test/data/Non-PreVent-hctsa/UVA_1400  -  UVA_1400  -  UVA_1400_HR3.csv
/home/sadnan/compute-test/data/Non-PreVent-hctsa/UVA_1120  -  UVA_1120  -  UVA_1120_H