# Workspace Scratch Notebook
## Author: Jonn Smith
## Date: 2023/12/18
Play around with workspace assets (tables / wdls / etc.). 

The goal is to show how to manipulate workspace assets towards the goal of automation.
***

In [2]:
import os
import datetime
import gzip
import io

import pandas as pd
import firecloud.api as fapi
import numpy as np

from google.cloud import bigquery
from google.cloud import storage
from google.api_core.exceptions import NotFound

In [3]:
namespace = os.environ['WORKSPACE_NAMESPACE']
workspace = os.environ['WORKSPACE_NAME']
default_bucket = os.environ['WORKSPACE_BUCKET']

print(f"Namespace: {namespace}")
print(f"Workspace: {workspace}")
print(f"Default Bucket: {default_bucket}")

Namespace: broad-firecloud-dsde-methods
Workspace: sr-malaria
Default Bucket: gs://fc-b06e896e-cc1d-4deb-b638-f7b87c3e5dbd


In [4]:
def load_table(namespace, workspace, table_name, store_membership=False):
    ent_old = fapi.get_entities(namespace, workspace, table_name).json()
    tbl_old = None

    membership = None
    if len(ent_old) > 0:
        tbl_old = pd.DataFrame(list(map(lambda e: e['attributes'], ent_old)))
        tbl_old[f"entity:{table_name}_id"] = list(map(lambda f: f['name'], ent_old))

        if store_membership:
            membership = list(map(lambda g: set(map(lambda h: h['entityName'], g['items'])), tbl_old['samples']))
            del tbl_old['samples']

        c = list(tbl_old.columns)
        c.remove(f"entity:{table_name}_id")
        c = [f"entity:{table_name}_id"] + c
        tbl_old = tbl_old[c]
        tbl_old = tbl_old.astype(str)

    return tbl_old, membership

# Remove any `nan` values in a given dataframe.
# `nan` values are caused by a parsing issue and are artifacts.
def fix_nans(df, quiet=True):
    if not quiet: print("Replacing all `nan` values with empty strings: ")
    for c in df.columns.values:
        nan_types = ("nan", float('nan'))
        has_nan = False
        num_denaned = 0
        for n in nan_types:
            if (sum(df[c] == n) > 0):
                num_denaned += sum(df[c] == n)
                df.loc[df[c] == n, c] = ""
                has_nan = True
        if has_nan and not quiet:
            print(f"\t{c}: {num_denaned}")

    if not quiet: print("Replacing numpy nan values...")
    if not quiet: print("Done.")
    return df.replace(np.nan, "")

In [None]:
# dropna()

In [5]:
dir(fapi)

['AuthorizedSession',
 'DefaultCredentialsError',
 'FISS_USER_AGENT',
 'FireCloudServerError',
 'Iterable',
 'RefreshError',
 'Request',
 '__SESSION',
 '__USER_ID',
 '__builtins__',
 '__cached__',
 '__delete',
 '__doc__',
 '__file__',
 '__get',
 '__loader__',
 '__name__',
 '__package__',
 '__patch',
 '__post',
 '__put',
 '__spec__',
 '__version__',
 '_attr_erlcreate',
 '_attr_ladd',
 '_attr_lrem',
 '_attr_rem',
 '_attr_set',
 '_attr_vlcreate',
 '_check_response_code',
 '_fiss_agent_header',
 '_set_session',
 'abort_submission',
 'add_user_to_group',
 'clone_workspace',
 'copy_config_from_repo',
 'copy_config_to_repo',
 'copy_entities',
 'create_group',
 'create_submission',
 'create_workspace',
 'create_workspace_config',
 'delete_entities',
 'delete_entities_of_type',
 'delete_entity_type',
 'delete_group',
 'delete_pair',
 'delete_pair_set',
 'delete_participant',
 'delete_participant_set',
 'delete_repository_config',
 'delete_repository_method',
 'delete_sample',
 'delete_sample_se

In [19]:
help(fapi.list_repository_methods)

Help on function list_repository_methods in module firecloud.api:

list_repository_methods(namespace=None, name=None, snapshotId=None)
    List method(s) in the methods repository.
    
    Args:
        namespace (str): Method Repository namespace
        name (str): method name
        snapshotId (int): method snapshot ID
    
    Swagger:
        https://api.firecloud.org/#!/Method_Repository/listMethodRepositoryMethods



In [20]:
print(fapi.list_repository_methods(namespace).json())

[{'name': 'PrepareGvcfs', 'createDate': '2017-01-30T23:20:21Z', 'url': 'http://agora.dsde-prod.broadinstitute.org/api/v1/methods/broad-firecloud-dsde-methods/PrepareGvcfs/22', 'synopsis': '', 'entityType': 'Workflow', 'snapshotId': 22, 'namespace': 'broad-firecloud-dsde-methods'}, {'name': 'GenotypeToMetrics', 'createDate': '2017-02-10T19:42:28Z', 'url': 'http://agora.dsde-prod.broadinstitute.org/api/v1/methods/broad-firecloud-dsde-methods/GenotypeToMetrics/15', 'synopsis': '', 'entityType': 'Workflow', 'snapshotId': 15, 'namespace': 'broad-firecloud-dsde-methods'}, {'name': 'CombineGvcf', 'createDate': '2017-03-09T21:41:36Z', 'url': 'http://agora.dsde-prod.broadinstitute.org/api/v1/methods/broad-firecloud-dsde-methods/CombineGvcf/17', 'synopsis': '', 'entityType': 'Workflow', 'snapshotId': 17, 'namespace': 'broad-firecloud-dsde-methods'}, {'name': 'GenotypeToMetrics', 'createDate': '2017-09-02T07:01:14Z', 'url': 'http://agora.dsde-prod.broadinstitute.org/api/v1/methods/broad-firecloud

In [6]:
entity_types = fapi.list_entity_types(namespace, workspace).json()
print(entity_types)

{'sample_set_set': {'attributeNames': ['annotated_joint_vcf', 'annotated_joint_vcf_tbi', 'drug_res_report', 'expanded_drug_res_marker_table', 'genomicsDB', 'joint_mt', 'joint_recalibrated_vcf', 'joint_recalibrated_vcf_tbi', 'joint_zarr', 'raw_joint_vcf', 'raw_joint_vcf_tbi', 'sample_sets', 'snpEff_genes', 'snpEff_summary'], 'count': 17, 'idName': 'sample_set_set_id'}, 'z_external_pipeline_validation_set': {'attributeNames': ['z_external_pipeline_validations'], 'count': 2, 'idName': 'z_external_pipeline_validation_set_id'}, 'high_quality_assembly': {'attributeNames': ['DEL_truth_vcf', 'DEL_truth_vcf_tbi', 'fai', 'fasta', 'High_Conf_Core_Intervals', 'High_Conf_Intervals', 'INS_truth_vcf', 'INS_truth_vcf_tbi', 'Low_Conf_Intervals', 'MNP_truth_vcf', 'MNP_truth_vcf_tbi', 'Niare_Truth_VCF', 'Niare_Truth_VCF_Index', 'reads_illumina', 'reads_illumina_index', 'reads_pac_bio', 'reads_pac_bio_index', 'SHORT_VARIANTS_vcf', 'SHORT_VARIANTS_vcf_tbi', 'SNP_truth_schaffner_set_vcf', 'SNP_truth_schaffn

In [8]:
print(help(fapi.list_billing_projects))
print(help(fapi.list_entity_types))
print(help(fapi.list_repository_configs))
print(help(fapi.list_repository_methods))
print(help(fapi.list_submissions))
print(help(fapi.list_workspace_configs))
print(help(fapi.list_workspaces))
print(help(fapi.lock_workspace))

Help on function list_billing_projects in module firecloud.api:

list_billing_projects()
    Get activation information for the logged-in user.
    
    Swagger:
        https://api.firecloud.org/#!/Profile/billing

None
Help on function list_entity_types in module firecloud.api:

list_entity_types(namespace, workspace)
    List the entity types present in a workspace.
    
    Args:
        namespace (str): project to which workspace belongs
        workspace (str): Workspace name
    
    Swagger:
        https://api.firecloud.org/#!/Entities/getEntityTypes

None
Help on function list_repository_configs in module firecloud.api:

list_repository_configs(namespace=None, name=None, snapshotId=None)
    List configurations in the methods repository.
    
    Args:
        namespace (str): Method Repository namespace
        name (str): config name
        snapshotId (int): config snapshot ID
    
    Swagger:
        https://api.firecloud.org/#!/Method_Repository/listMethodRepositoryConf

In [11]:
print(fapi.list_billing_projects().json())
print("="*80)
print(fapi.list_entity_types(namespace, workspace).json())
print("="*80)
print(fapi.list_repository_configs(namespace, workspace).json())
print("="*80)
# print(fapi.list_repository_methods(namespace, workspace).json())
# print(fapi.list_submissions(namespace, workspace).json())
print("="*80)
print(fapi.list_workspace_configs(namespace, workspace).json())
print("="*80)
# print(fapi.list_workspaces(namespace, workspace).json())

[{'creationStatus': 'Ready', 'projectName': 'CIGASS-testing', 'role': 'Owner'}, {'creationStatus': 'Ready', 'projectName': 'broad-firecloud-dsde-methods', 'role': 'User'}, {'creationStatus': 'Ready', 'projectName': 'broad-jukebox', 'role': 'User'}, {'creationStatus': 'Ready', 'projectName': 'jbx-wfl', 'role': 'User'}, {'creationStatus': 'Ready', 'projectName': 'jillmadden', 'role': 'User'}]
{'sample_set_set': {'attributeNames': ['annotated_joint_vcf', 'annotated_joint_vcf_tbi', 'drug_res_report', 'expanded_drug_res_marker_table', 'genomicsDB', 'joint_mt', 'joint_recalibrated_vcf', 'joint_recalibrated_vcf_tbi', 'joint_zarr', 'raw_joint_vcf', 'raw_joint_vcf_tbi', 'sample_sets', 'snpEff_genes', 'snpEff_summary'], 'count': 17, 'idName': 'sample_set_set_id'}, 'z_external_pipeline_validation_set': {'attributeNames': ['z_external_pipeline_validations'], 'count': 2, 'idName': 'z_external_pipeline_validation_set_id'}, 'high_quality_assembly': {'attributeNames': ['DEL_truth_vcf', 'DEL_truth_vcf_

[]
[{'deleteIntermediateOutputFiles': False, 'methodConfigurationDeleted': True, 'methodConfigurationName': 'BenchmarkVCFs_j6HS3F-if4c', 'methodConfigurationNamespace': 'broad-firecloud-dsde-methods', 'status': 'Done', 'submissionDate': '2023-07-12T11:06:43.706Z', 'submissionEntity': {'entityType': 'validation_set', 'entityName': 'HQ_Assemblies_With_Schaffner_VCFs'}, 'submissionId': '00b8348e-ec54-4452-adb2-6590b32916e1', 'submissionRoot': 'gs://fc-b06e896e-cc1d-4deb-b638-f7b87c3e5dbd/submissions/00b8348e-ec54-4452-adb2-6590b32916e1', 'submitter': 'jonn@broadinstitute.org', 'useCallCache': True, 'userComment': '', 'workflowStatuses': {'Succeeded': 5}}, {'deleteIntermediateOutputFiles': False, 'methodConfigurationDeleted': True, 'methodConfigurationName': 'SRJointCallGVCFsWithGenomicsDB_uarRbtb2sI4', 'methodConfigurationNamespace': 'broad-firecloud-dsde-methods', 'status': 'Done', 'submissionDate': '2023-09-18T20:33:14.522Z', 'submissionEntity': {'entityType': 'sample_set_set', 'entityN