# Create RO-Crate for multiple commands

Run all commands in README in ./session dir

Create script crate see https://www.researchobject.org/ro-crate/specification/1.2/workflows.html and https://github.com/ResearchObject/ro-crate-py/issues/148
https://www.researchobject.org/ro-crate/specification/1.2/provenance.html#software-used-to-create-files

In [121]:
from pathlib import Path

from rocrate.rocrate import ROCrate

In [122]:
instrument = "https://pypi.org/project/protein-quest/0.3.0/"

In [123]:
root = Path("./session")
accs = root / "uniprot_accs.txt"

In [124]:
crate = ROCrate()

In [125]:
uniprot_accs = crate.add_file(
    accs, properties={"name": accs.name, "description": "UniProt accession numbers", "encodingFormat": "text/plain"}
)
crate.add_action(
    instrument=instrument,
    properties={
        "name": "protein-quest search uniprot",
        "description": "protein-quest search uniprot --taxon-id 9606 --reviewed --subcellular-location-uniprot nucleus --subcellular-location-go GO:0005634 --molecular-function-go GO:0003677 --limit 100 uniprot_accs.txt",
    },
    result=[uniprot_accs],
)

<#f3357f37-566c-4ff9-98be-c9f7bf5dae60 CreateAction>

In [126]:
pdbs_csv = root / "pdbe.csv"
pdbs = crate.add_file(
    pdbs_csv, properties={"name": pdbs_csv.name, "description": "PDBe PDB IDs", "encodingFormat": "text/csv"}
)
crate.add_action(
    instrument=instrument,
    properties={
        "name": "protein-quest search pdbe",
        "description": "protein-quest search pdbe uniprot_accs.txt pdbe.csv",
    },
    object=[uniprot_accs],
    result=[pdbs],
)

<#c9264b3d-1244-42c0-9975-19ec0d89e4d8 CreateAction>

In [127]:
# protein-quest retrieve pdbe pdbe.csv downloads-pdbe/
pdb_dls = root / "downloads-pdbe"
pdb_dls_dir = crate.add_directory(
    pdb_dls,
    properties={
        "name": pdb_dls.name,
        "description": "Downloaded PDBe gzipped mmcif files",
        "encodingFormat": "application/gzip",
    },
)
crate.add_action(
    instrument=instrument,
    properties={
        "name": "protein-quest retrieve pdbe",
        "description": "protein-quest retrieve pdbe pdbe.csv downloads-pdbe/",
    },
    object=[pdbs],
    result=[pdb_dls_dir],
)

<#4add11e1-74b7-4f71-9b8e-19ef067e4380 CreateAction>

In [128]:
# protein-quest filter chain pdbe.csv  ./downloads-pdbe ./filtered-chains
fc_p = root / "filtered-chains"
fc_d = crate.add_directory(
    fc_p,
    properties={
        "name": fc_p.name,
        "description": "Filtered structures by chain",
        "encodingFormat": "application/gzip",
    },
)
crate.add_action(
    instrument=instrument,
    properties={
        "name": "protein-quest filter chain",
        "description": "protein-quest filter chain pdbe.csv  ./downloads-pdbe ./filtered-chains",
    },
    object=[pdbs, pdb_dls_dir],
    result=[fc_d],
)

<#26dde304-7809-4f5d-836d-efdda8f0efdd CreateAction>

In [None]:
# TODO add endTime parameter to each action
crate.write(accs.parent)