### Data removing from Terra.bio

#### Library lording and function definition

In [1]:
### autoreload ### 
%load_ext autoreload
%autoreload 2

In [2]:
### Load necessary libraries ###
from genepy import terra
import dalmatian as dm
import pandas as pd

In [3]:
# removing things from old failed workflows
def removeFromFailedWorkflows(
    workspaceid: str,
    maxtime: str="2020-06-10",
    force_remove: list=[],
    dryrun: bool=True
) -> None:
    """
    Lists all files from all jobs that have failed and deletes them.

    Can be very long

    parameters
    ----------
    workspaceid: str
        str the workspace name

    maxtime: str
        str date format (eg. 2020-06-10) does not delete files generated past this date

    force_remove: list
        list[str of submissionId] removes from these workflows even if not failed

    dryrun: bool
        bool whether or not to execute or just print commands

    Returns
    -------
    None
    """
    wm = dm.WorkspaceManager(workspaceid)
    for val in wm.list_submissions():
        if (
            val["workflowStatuses"][list(val["workflowStatuses"].keys())[0]] > 0 or val["submissionId"] in force_remove
        ) and (pd.to_datetime(val['submissionDate']).tz_localize(None)) > pd.to_datetime(maxtime):
            for w in wm.get_submission(val["submissionId"])["workflows"]:
                #if w["status"] == "Failed" or val["methodConfigurationName"] in force_remove:
                if w["status"] in ["Failed", "Aborted"] or val["submissionId"] in force_remove:
                    print(w["status"], val['submissionId'])
                    try:
                        a = w["workflowId"]
                        terra.deleteJob(workspaceid, "submissions/" + val["submissionId"], a, dryrun=dryrun)
                    # else it was not even run
                    except Exception as e:
                        if str(e) == "b'CommandException: One or more URLs matched no objects.\\n'":
                            continue
                        else:
                            print(e)

#### Workspace cleaning

In [4]:
# defining the workspace
TERRA_WS = 'whitelabgx/scRNAseq'

In [5]:
# Fetching the submission ids which we want to remove
submission_ids_all = [submission['submissionId'] for submission in dm.WorkspaceManager(TERRA_WS).list_submissions()]
print(len(submission_ids_all))

submission_ids_keep = ['c9f1ae21-46d0-4349-a049-0b9987dea21b', '4e3d5584-ac31-456a-b046-4c8554108b7d', '896f092c-853d-4481-8a9c-63aaafe4f0e4']
print(len(submission_ids_keep))

submission_ids_remove = [i for i in submission_ids_all if i not in submission_ids_keep]
print(len(submission_ids_remove))
submission_ids_remove

18
3
15


['07e6d89b-3db8-436e-bc50-fef6b40cdd04',
 '0edaa628-d5d3-4dda-a90b-3d70d4905e7a',
 '14b2f1b8-bf97-4b54-8532-777ffde081a3',
 '389ad7c2-08e1-474e-81af-a7a4071d9db1',
 '3c5c277d-7eac-4bd3-9048-e8828cfd3298',
 '5cc00e94-52c6-48fd-944c-95b436f54326',
 '61221564-5447-4d63-bf1b-88554f3ac12a',
 '743031a3-2a24-4f4c-b310-65220830572a',
 '77d5d348-4681-4060-80c9-b4bda1d3325f',
 '8ae81f38-3e77-4db7-84b2-8d9efb1371ea',
 '96ae8c73-2b91-4a30-b188-2a25a341a459',
 'b0bd6787-8365-4e1c-beba-f341c3d84955',
 'ca3a5a8d-3a78-4a10-8ea6-4f4f8eb26c5d',
 'e716946f-7ace-4411-8937-41df3f461bda',
 'ebf17150-a6b7-4c73-8f6a-f61a7cecf9ac']

In [7]:
# removing jobs from workspace
#removeFromFailedWorkflows(TERRA_WS, maxtime="2023-04-19", force_remove=submission_ids_remove)
removeFromFailedWorkflows(TERRA_WS, maxtime="2023-04-19", force_remove=submission_ids_remove, dryrun=False)

Succeeded 07e6d89b-3db8-436e-bc50-fef6b40cdd04
Succeeded 0edaa628-d5d3-4dda-a90b-3d70d4905e7a
Succeeded 14b2f1b8-bf97-4b54-8532-777ffde081a3
Failed 389ad7c2-08e1-474e-81af-a7a4071d9db1
Succeeded 3c5c277d-7eac-4bd3-9048-e8828cfd3298
Failed 5cc00e94-52c6-48fd-944c-95b436f54326
Failed 61221564-5447-4d63-bf1b-88554f3ac12a
Failed 743031a3-2a24-4f4c-b310-65220830572a
Failed 77d5d348-4681-4060-80c9-b4bda1d3325f
Failed 8ae81f38-3e77-4db7-84b2-8d9efb1371ea
Succeeded 96ae8c73-2b91-4a30-b188-2a25a341a459
Failed b0bd6787-8365-4e1c-beba-f341c3d84955
Succeeded ca3a5a8d-3a78-4a10-8ea6-4f4f8eb26c5d
Succeeded e716946f-7ace-4411-8937-41df3f461bda
Failed ebf17150-a6b7-4c73-8f6a-f61a7cecf9ac
