In [None]:
# Imports
import google.auth
import google.auth.transport.requests
import requests
import pandas as pd
import json 
import re
import xml.etree.ElementTree as et
import xmltodict
import datetime

# Display options
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option("display.max_colwidth", None)
pd.set_option('display.width', 1000)
pd.set_option('display.colheader_justify', 'center')
pd.set_option('display.precision', 3)

# AnVIL Resource Access Control

## Bulk Manage User Access to Workspaces (and associated Auth Domains)
List the users, the role they should have, and the workspaces those user-role combinations should apply to. The roles specified will have the following effects:
* __READER__ will add the user as a reader on the workspace and a member on any associated authorization domains
* __WRITER__ will add the user as a writer on the workspace and a member on any associated authorization domains
* __OWNER__ will add the user as an owner on the workspace and a member on any associated authorization domains
* __NO ACCESS__ will remove the user from both the workspace and any associated authorization domains

In [None]:
#############################################
## Functions
#############################################

def manage_user_ws_access(user_role_list, workspace_list):
    # Loop through and process users
    for user_role in user_role_list:
        user = user_role[0]
        role = user_role[1]
        print(f"Processing ACL updates for user: {user}")
        results = []

        # Loop through and process workspaces
        for workspace in workspace_list:

            # Initialize
            print(f"\tProcessing ACL updates for {workspace}.")
            error_list = []

            # Establish credentials
            creds, project = google.auth.default()
            auth_req = google.auth.transport.requests.Request()
            creds.refresh(auth_req)

            # Pull auth domains from workspace attributes
            ad_list = []
            ws_attributes = requests.get(
                url=f"https://api.firecloud.org/api/workspaces/anvil-datastorage/{workspace}?fields=workspace.attributes,workspace.authorizationDomain,workspace.googleProject,workspace.bucketName",
                headers={"Authorization": f"Bearer {creds.token}"}
            ).json()
            try:
                for ad in ws_attributes["workspace"]["authorizationDomain"]:
                    ad_list.append(ad["membersGroupName"])
            except:
                error_list.append(f"Error accessing workspace.")
                ad_list = []

            # For each auth domain, add/remove the user as necessary
            if role in ["READER", "WRITER", "OWNER"]:
                for auth_domain in ad_list:
                    response = requests.put(
                        url=f"https://api.firecloud.org/api/groups/{auth_domain}/member/{user}",
                        headers={"Authorization": f"Bearer {creds.token}"}
                    )
                    if response.status_code != 204:
                        error_list.append(f"Error adding to auth domain ({auth_domain})")
            elif role == "NO ACCESS":
                for auth_domain in ad_list:
                    auth_domain = ad["membersGroupName"]
                    response = requests.delete(
                        url=f"https://api.firecloud.org/api/groups/{auth_domain}/member/{user}",
                        headers={"Authorization": f"Bearer {creds.token}"}
                    )
                    if response.status_code != 204:
                        error_list.append(f"Error removing from auth domain ({auth_domain})")

            # Pull existing workspace ACLs
            response = requests.get(
                url=f"https://api.firecloud.org/api/workspaces/anvil-datastorage/{workspace}/acl",
                headers={"Authorization": f"Bearer {creds.token}"}
            )
            if response.status_code != 200:
                error_list.append("Error retrieving workspace ACL")
                ws_acl = {"acl": {}}
            else:
                ws_acl = response.json()

            # Determine if auth domain membership covers the necessary workspace access
            ad_covers_ws_acl = False
            for auth_domain in ad_list: 
                for key, val in ws_acl["acl"].items():
                    if auth_domain in key:
                        if (role == "READER" and val["accessLevel"] in ["READER", "WRITER", "OWNER"]) or (role == "WRITER" and val["accessLevel"] in ["WRITER", "OWNER"]) or (role == "OWNER" and val["accessLevel"] in ["OWNER"]):
                            ad_covers_ws_acl = True
                            break
                if ad_covers_ws_acl == True:
                    break

            # Add/remove user from workspace as necessary
            if role == "NO ACCESS" or ad_covers_ws_acl == False:
                payload = [{
                    "email": user,
                    "accessLevel": role,
                    "canShare": False,
                    "canCompute": False
                }]
                response = requests.patch(
                    url=f"https://api.firecloud.org/api/workspaces/anvil-datastorage/{workspace}/acl",
                    headers={"Authorization": f"Bearer {creds.token}"},
                    json=payload
                )
                if response.status_code != 200:
                    error_list.append("Error updating workspace ACL")

            # Record status
            status = "Success" if not error_list else "Failure"
            error_str = "; ".join(error_list)
            results.append([workspace, status, error_str])

        # Display results
        print(f"\nResults for user: {user}")
        results_df = pd.DataFrame(results, columns = ["workspace", "update_status", "errors"])
        display(results_df)

#############################################
## Input Parameters
#############################################

# Specify the users to manage access for and the role they should have:
user_role_list = [
    #["user_email", "role - READER, WRITER, OWNER, NO ACCESS"]
     ["ncalvane@broadinstitute.org", "NO ACCESS"]
]

# Specify the workspaces those user-roles should apply to:
workspace_list = [
'ANVIL_NIA_CARD_LR_WGS_NABEC_GRU',
]

#############################################
## Execution
#############################################

manage_user_ws_access(user_role_list, workspace_list)
    


## Bulk Manage User Access to Terra Groups
List the users, target Terra groups, and roles the users should have on those target Terra groups. The roles specified will have the following effects:
* __ADMIN__ will add the user as an admin on the group
* __MEMBER__ will add the user as a member on the group
* __NO ACCESS__ will remove the user from the group

In [None]:
#############################################
## Functions
#############################################

def manage_user_group_role(user_group_role_list):
    results = []
    # Loop through and process user group roles
    for user_group_role in user_group_role_list:
        
        # Initialize
        user = user_group_role[0]
        group = user_group_role[1]
        role = user_group_role[2]
        print(f"Processing user group role updates for: {user} - {group} - {role}")
        error_list = []
        
        # Validate specified role
        if role not in ["MEMBER", "ADMIN", "NO ACCESS"]:
            error_list.append("Unknown role specified. Role must be MEMBER, ADMIN or NO ACCESS.")
        else:

            # Establish credentials
            creds, project = google.auth.default()
            auth_req = google.auth.transport.requests.Request()
            creds.refresh(auth_req)

            # Get existing group membership
            response = requests.get(
                url=f"https://api.firecloud.org/api/groups/{group}",
                headers={"Authorization": f"Bearer {creds.token}"}
            )
            if response.status_code != 200:
                error_list.append("Error retrieving existing group membership.")
                curr_role = "UNKNOWN"
            else:
                response_json = response.json()
                curr_role = "NONE"
                for member in response_json["membersEmails"]:
                    if member == user:
                        curr_role = "MEMBER"
                        break
                for admin in response_json["adminsEmails"]:
                    if admin == user:
                        curr_role = "ADMIN"
                        break 

            # Process user group role updates 
            if role == "ADMIN":
                response = requests.put(
                    url=f"https://api.firecloud.org/api/groups/{group}/{role.lower()}/{user}",
                    headers={"Authorization": f"Bearer {creds.token}"}
                )
                if response.status_code != 204:
                    error_list.append("Error updating user group role.")
            elif role == "MEMBER":
                # If necessary, remove user's admin role from group
                if curr_role == "ADMIN":
                    response = requests.delete(
                        url=f"https://api.firecloud.org/api/groups/{group}/{curr_role.lower()}/{user}",
                        headers={"Authorization": f"Bearer {creds.token}"}
                    )
                    if response.status_code != 204:
                        error_list.append("Error updating user group role.") 
                # Add user as a member to group
                response = requests.put(
                    url=f"https://api.firecloud.org/api/groups/{group}/{role.lower()}/{user}",
                    headers={"Authorization": f"Bearer {creds.token}"}
                )
                if response.status_code != 204:
                    error_list.append("Error updating user group role.")
            elif role == "NO ACCESS":
                response = requests.delete(
                    url=f"https://api.firecloud.org/api/groups/{group}/{curr_role.lower()}/{user}",
                    headers={"Authorization": f"Bearer {creds.token}"}
                )
                if response.status_code != 204:
                    error_list.append("Error updating user group role.")

        # Record status
        status = "Success" if not error_list else "Failure"
        error_str = "; ".join(error_list)
        results.append([user, group, role, status, error_str])

    # Display results
    print("\nResults:")
    results_df = pd.DataFrame(results, columns = ["user", "target_group", "target_role", "update_status", "errors"])
    display(results_df)

#############################################
## Input Parameters
#############################################

# Specify the user, target group, and the role the user should have on the group:
user_group_role_list = [
    #["user_email", "terra_group_name", "role - ADMIN, MEMBER, NO ACCESS"]
['AnVIL_Devs@firecloud.org', 'AUTH_AnVIL_ccdg_asc_ndd_daly_talkowski_aleksic_asd_exome', 'MEMBER'],
['AnVIL_Devs@firecloud.org', 'AUTH_AnVIL_ccdg_asc_ndd_daly_talkowski_barbosa_asd_exome', 'MEMBER'],
]

#############################################
## Execution
#############################################

manage_user_group_role(user_group_role_list)


## Lookup Workspace Auth Domain Groups
This is intended to be a quick way to look up the authorization domain Terra group(s) for a workspace or set of workspaces in a non-case-sensitive manner without having to log into each workspace in the Terra UI. 

In [None]:
#############################################
## Functions
#############################################

def lookup_workspace_auth_domain(workspace_list):
    # Establish credentials
    creds, project = google.auth.default()
    auth_req = google.auth.transport.requests.Request()
    creds.refresh(auth_req)

    # Collect workspaces the user has access to
    ws_list = {}
    response = requests.get(
        url=f"https://api.firecloud.org/api/workspaces?fields=workspace.authorizationDomain,workspace.namespace,workspace.name",
        headers={"Authorization": f"Bearer {creds.token}"}
    )
    if response.status_code != 200:
        print("Results:")
        print("Error retrieving workspaces for user. Exited function.")
        return
    else:
        response_json = response.json()
        for ws in response_json:
            workspace_name = ws["workspace"]["name"].lower()
            ws_list[workspace_name] = ws["workspace"]["authorizationDomain"]
    
    # Loop through and look-up workspaces
    results = []
    for workspace in workspace_list:

        # Initialize
        error_list = []
        workspace_name = workspace.lower()
        
        # Look-up workspace in ws_list dictionary
        val = ws_list.get(workspace_name, "NOT FOUND")
        if val == "NOT FOUND":
            results.append([workspace, "", "Errors", "Workspace Not Found"])
        else:
            ad_list = []
            for ad in val:
                ad_list.append(ad["membersGroupName"])
            ad_str = ", ".join(ad_list)
            results.append([workspace, ad_str, "Success", ""])

    # Print results
    print("Results:")
    results_df = pd.DataFrame(results, columns = ["workspace", "auth_domains", "retrieval_status", "errors"])
    display(results_df) 

#############################################
## Input Parameters
#############################################

workspace_list = [
'AnVIL_ccdg_asc_ndd_daly_talkowski_aleksic_asd_exome',
'AnVIL_ccdg_asc_ndd_daly_talkowski_barbosa_asd_exome',
]

#############################################
## Execution
#############################################

lookup_workspace_auth_domain(workspace_list)


## Workspace Access Reports
Optionally run two reports related to AnVIL workspace access:
* The __Workspace Access Report__ pulls all of the AnVIL workspaces you have access to, and records any auth domains, any roles the auth domains have on the workspace, the roles the anvil-admins and AnVIL_Devs groups have within the auth domain, the list of dbgap groups that are part of the auth domain, and whether or not the workspace is public. 
* The __dbGaP Terra Group Report__ takes a list of user-specified dbGaP Terra Groups and returns whether that group exists as a user in Terra, and if so, which auth domains it is currently in. Note that if ONLY the dbGaP Terra Group Report is run, then the report will be unable to determine which auth domains the existing dbGaP Terra Groups are in.

In [None]:
#############################################
## Functions
#############################################

def gen_workspace_report():
    
    print("Generating Workspace Access Report:")
    # Establish credentials
    creds, project = google.auth.default()
    auth_req = google.auth.transport.requests.Request()
    creds.refresh(auth_req)

    # Collect list of AnVIL workspaces and their auth domains
    anvil_ws_list = []
    workspaces = requests.get(
        url=f"https://api.firecloud.org/api/workspaces?fields=workspace.authorizationDomain,public,workspace.namespace,workspace.name",
        headers={"Authorization": f"Bearer {creds.token}"}
    ).json()
    for workspace in workspaces:
        if workspace["workspace"]["namespace"] == "anvil-datastorage":
            if workspace["workspace"]["authorizationDomain"]:
                for ad in workspace["workspace"]["authorizationDomain"]:
                    anvil_ws_list.append([workspace["workspace"]["name"], ad["membersGroupName"], workspace["public"]])
            else:
                anvil_ws_list.append([workspace["workspace"]["name"], None, workspace["public"]])

    # Loop through AnVIL workspaces and collect additional report information
    results = []
    for idx, workspace in enumerate(anvil_ws_list):

        # Initialize
        name = workspace[0]
        ad = workspace[1]
        ad_ws_role = "NO ACCESS"
        anv_admins_role = "No Role"
        anv_devs_role = "No Role"
        dbgap_group_list = []
        public = workspace[2]
        error_list = []
        ad_str = f" - {ad}" if ad else ""
        print(f"Processing {name}{ad_str}: {str(idx+1)} of {str(len(anvil_ws_list)+1)}")

        # If AD on workspace, pull AD group membership
        if ad:
            response = requests.get(
                url=f"https://api.firecloud.org/api/groups/{ad}",
                headers={"Authorization": f"Bearer {creds.token}"}
            )
            if response.status_code != 200:
                error_list.append("Error retrieving auth domain membership")
                anv_admins_role = "Unknown"
                anv_devs_role = "Unknown"
            else:
                group_acl = response.json()
                # Determine roles/presence of anvil-admins, anvil_devs, and dbgap groups
                for admin in group_acl["adminsEmails"]:
                    if admin == "anvil-admins@firecloud.org":
                        anv_admins_role = "Admin"
                    elif admin == "AnVIL_Devs@firecloud.org":
                        anv_devs_role = "Admin"
                    elif "dbgap_anvil" in admin.lower():
                        dbgap_group_list.append(admin)
                for member in group_acl["membersEmails"]:
                    if member == "anvil-admins@firecloud.org" and anv_admins_role != "Admin":
                        anv_admins_role = "Member"
                    elif member == "AnVIL_Devs@firecloud.org" and anv_devs_role != "Admin":
                        anv_devs_role = "Member"
                    elif "dbgap_anvil" in member.lower():
                        dbgap_group_list.append(member)

        # If AD on workspace, pull workspace ACLs and determine AD role
        if ad:
            response = requests.get(
                url=f"https://api.firecloud.org/api/workspaces/anvil-datastorage/{name}/acl",
                headers={"Authorization": f"Bearer {creds.token}"}
            )
            if response.status_code != 200:
                error_list.append("Error retrieving workspace ACL")
                ad_ws_role = "Unknown"
            else:
                ws_acl = response.json()
                for key, val in ws_acl["acl"].items():
                    if ad in key:
                        ad_ws_role = val["accessLevel"]
                        break

        # Record results
        status = "Success" if not error_list else "Errors"
        error_str = "; ".join(error_list)
        results.append([name, ad, ad_ws_role, anv_admins_role, anv_devs_role, dbgap_group_list, public, status, error_str])

    # Display results
    print("\nWorkspace Access Report Results: ")
    results_df = pd.DataFrame(results, columns = ["workspace", "auth_domain", "ad_workspace_role", "anvil_adms_ad_role", "anvil_devs_ad_role", "dbgap_groups_in_ad", "workspace_public", "status", "errors"])
    display(results_df)
    print("\n")
    return results

def gen_dbgap_report(results):
    
    print("Generating dbGaP Terra Group Report:")
    # Establish credentials
    creds, project = google.auth.default()
    auth_req = google.auth.transport.requests.Request()
    creds.refresh(auth_req)

    # Loop through user-supplied dbgap Terra groups
    dbgap_results = []
    for grp in dbgap_terra_group_list:

        group_exists = False
        group_in_ads_list = []

        # Confirm group exists
        response = requests.get(
            url=f"https://sam.dsde-prod.broadinstitute.org/api/groups/v1/{grp}",
            headers={"Authorization": f"Bearer {creds.token}"}
        )
        if response.status_code == 200:
            group_exists = True

        # Loop through workspace access results to determine which ADs the group is in
        if group_exists:
            if results:
                for ws_ad_entry in results:
                    if ws_ad_entry[5]:
                        for rpt_grp in ws_ad_entry[5]:
                            if grp.lower() in rpt_grp.lower():
                                group_in_ads_list.append(ws_ad_entry[1])
            else:
                group_in_ads_list.append("Unknown - WS Access Report Not Run")

        # Record results
        dbgap_results.append([grp, group_exists, group_in_ads_list])

    print("\ndbGaP Terra Group Report Results: ")
    dbgap_results_df = pd.DataFrame(dbgap_results, columns = ["group", "group_exists", "group_in_auth_domains"])
    display(dbgap_results_df)   

#############################################
## Input Parameters
#############################################

# Specify which reports should be run:
run_ws_access_rpt = False
run_dbgap_terra_group_rpt = True

# Specify the groups to include in the dbGaP Terra Group Report:
dbgap_terra_group_list = [
'dbgap_anvil_phs001642_c1_ccdg_ibd_gru',
'dbgap_anvil_phs001642_c2_ccdg_ibd_hmb',
'dbgap_anvil_phs001642_c3_ccdg_ibd_ds-ibd',
'dbgap_anvil_phs001642_c4_ccdg_ibd_ds-gid',
]

#############################################
## Execution
#############################################

if run_ws_access_rpt:
    results = gen_workspace_report()
else:
    results = []
if run_dbgap_terra_group_rpt:
    gen_dbgap_report(results)    


## dbGaP Reports
Run a report that will fetch and parse the dbGaP XML into a more user-friendly view, for the attributes that are currently of interest to our team (the PHS ID, Study Name, Consent Codes, presense of AnVIL as a trusted partner, and dbGaP Status). This report can be run on either a specified set of PHS IDs, or on all PHS IDs present in dbGaP.

In [None]:
#############################################
## Functions
#############################################

def format_phs_id(input_str):
    try:
        num = re.search("(phs)?0*([0-9]+)", input_str, re.IGNORECASE).group(2)
    except:
        num = ""
    if num:
        output_str = "phs" + str(num).zfill(6)
    else:
        output_str = ""
    return output_str

def parse_dbgap_xml(study_dict, user_input, phs_id, limit_to_latest_version):
    parsed_results = []
    # Parse XML and pull information of interest
    try:
        if not isinstance(study_dict["dbgapss"]["Study"], list):
            study_dict["dbgapss"]["Study"] = [study_dict["dbgapss"]["Study"]]
    except:
        parsed_results.append([user_input, phs_id, "", "", "", "", "", "Failure", "Error retrieving valid dbGaP XML."])
        return parsed_results
    for study in study_dict["dbgapss"]["Study"]:
        
        # Pull base information
        accession = study["StudyInfo"]["@accession"]
        name = study["StudyInfo"]["StudyNameEntrez"]
        status = study["Status"]["@title"]
        
        # Pull consent code information, if available
        consent_codes = ""
        try:
            if isinstance(study["Policy"]["ConsentGroup"], list):
                cc_list = []
                for consent in study["Policy"]["ConsentGroup"]:
                    cc_list.append(consent["@name"])
                consent_codes = ", ".join(cc_list)
            else:
                consent_codes = study["Policy"]["ConsentGroup"]["@name"]
        except:
            pass
        
        # Pull trusted partner information, if available
        anvil_trusted_partner = False
        try:
            if isinstance(study["Policy"]["TrustedPartners"]["TrustedPartner"], list):
                for tp in study["Policy"]["TrustedPartners"]["TrustedPartner"]:
                    if tp["@trp_db_name"] == "AnVIL":
                        anvil_trusted_partner = True
                        break
            else:
                if study["Policy"]["TrustedPartners"]["TrustedPartner"]["@trp_db_name"] == "AnVIL":
                    anvil_trusted_partner = True
        except:
            pass
    
        # Aggregate results and break loop if only latest version is of interest
        parsed_results.append([user_input, phs_id, accession, name, consent_codes, anvil_trusted_partner, status, "Success", ""])
        if limit_to_latest_version: break
    return parsed_results
        
def gen_dbgap_report(phs_list):
    results = []
    dbgap_url = "https://dbgap.ncbi.nlm.nih.gov/ss/dbgapssws.cgi?request=Study&phs="
    if phs_list:
        for phs in phs_list:
            # Retrieve dbGaP XML if exists, and parse out information of interest
            phs_id = format_phs_id(phs)
            if phs_id:
                phs_num = phs_id[3:9]
            else:
                phs_num = "Invalid"
            print(f"Processing input '{phs}' ({phs_id})")
            retry_count = 0
            while retry_count <= 2:
                try:
                    response = requests.get(dbgap_url+phs_num)
                    break
                except:
                    retry_count += 1
            study_dict = xmltodict.parse(response.content)
            output = parse_dbgap_xml(study_dict, phs, phs_id, limit_to_latest_version)
            for entry in output:
                results.append(entry)
    else:
        i = 0
        error_count = 0
        print("Processing all existing dbGaP studies. Note that this can take upwards of 45 minutes to run. To collect information on specific studies, list the studies of interest in the phs_list input parameter.")
        print(f"Start time: {datetime.datetime.now()}")
        # Loop through possible phs IDs, only stopping when 100 sequential IDs yield no results
        for phs in [str(item).zfill(6) for item in list(range(1,1000000))]: 
            # Initialize
            i += 1 
            if error_count > 99: break

            # Retrieve dbGaP XML if exists, and parse out information of interest   
            phs_id = format_phs_id(phs)
            if phs_id:
                phs_num = phs_id[3:9]
            else:
                phs_num = "Invalid" 
            retry_count = 0
            while retry_count <= 2:
                try:
                    response = requests.get(dbgap_url+phs_num)
                    break
                except:
                    retry_count += 1
            study_dict = xmltodict.parse(response.content)
            output = parse_dbgap_xml(study_dict, phs, phs_id, limit_to_latest_version)

            # If no valid results return, increment error count, otherwise, reset to zero
            if len(output) == 1 and output[0][7] == "Failure":
                error_count += 1
            else:
                error_count = 0 
                for entry in output:
                    results.append(entry)
        print(f"End time: {datetime.datetime.now()}")

    # Display results
    print("\ndbGaP Report Results: ")
    results_df = pd.DataFrame(results, columns = ["user_input", "phs_id", "accession", "study_name", "consent_codes", "anvil_trusted_partner", "dbgap_status", "retrieval_status", "errors"])
    display(results_df)

#############################################
## Input Parameters
#############################################

# Specify whether only the latest version of the study should be included in the report:
limit_to_latest_version = True

# Specify the list of PHS numbers (in any format) to generate the report for, or leave empty to generate for all:
phs_list = [
    "002324",
    "phs002502",
    "phs003249.v1.p1"
]

#############################################
## Execution
#############################################

gen_dbgap_report(phs_list)


# AnVIL Workspace Tag Control

## Bulk View Workspace Tags
Can be used to view the workspace tags currently recorded on the specified workspaces.

In [None]:
#############################################
## Functions
#############################################

def retrieve_ws_tags(workspace_list):
    # Loop through and process workspaces
    results = []
    for workspace in workspace_list:

        # Initialize
        print(f"Retrieving workspace tags for {workspace}.")
        error_list = []

        # Establish credentials
        creds, project = google.auth.default()
        auth_req = google.auth.transport.requests.Request()
        creds.refresh(auth_req)

        # Retrieve workspace tags
        response = requests.get(
            url=f"https://api.firecloud.org/api/workspaces/anvil-datastorage/{workspace}/tags",
            headers={"Authorization": f"Bearer {creds.token}"}
        )
        if response.status_code != 200:
            error_list.append("Error retrieving workspace tags.")
            tag_str = ""
        else:
            try:
                tag_str = ""
                for item in response.json():
                    tag_str += f"'{item}', "
            except:
                error_list.append("Error formatting workspace tags.")

        # Record status
        status = "Success" if not error_list else "Failure"
        error_str = "; ".join(error_list)
        results.append([workspace, tag_str.strip(), status, error_str])

    # Display results
    print("\nResults:")
    results_df = pd.DataFrame(results, columns = ["workspace", "tags", "retrieval_status", "errors"])
    display(results_df)

#############################################
## Input Parameters
#############################################

# Specify the list of workspaces to view the workspace tags for:
workspace_list = [
'AnVIL_ccdg_asc_ndd_daly_talkowski_aleksic_asd_exome',
'AnVIL_ccdg_asc_ndd_daly_talkowski_barbosa_asd_exome',
]

#############################################
## Execution
#############################################

retrieve_ws_tags(workspace_list)


## Bulk Update Workspace Tags
Can be used to update the workspace tags recorded on the specified workspaces. This includes the ability to both add and remove workspace tags in bulk. 

In [None]:
#############################################
## Functions
#############################################

def update_ws_tags(workspace_list, tags_to_remove_list, tags_to_remove_regex_list, tags_to_add_list):
    # Loop through and process workspaces
    results = []
    for workspace in workspace_list:

        # Initialize
        print(f"Processing workspace tag updates for {workspace}.")
        error_list = []

        # Establish credentials
        creds, project = google.auth.default()
        auth_req = google.auth.transport.requests.Request()
        creds.refresh(auth_req)

        # Remove workspace tags explicitly listed
        if tags_to_remove_list:
            response = requests.delete(
                url=f"https://api.firecloud.org/api/workspaces/anvil-datastorage/{workspace}/tags",
                headers={"Authorization": f"Bearer {creds.token}"},
                json=tags_to_remove_list
            )
            if response.status_code != 200:
                error_list.append("Error removing workspace tags by list.")

        # Remove workspace tags by regex
        if tags_to_remove_regex_list:
            # Retrieve existing workspace tags
            response = requests.get(
                url=f"https://api.firecloud.org/api/workspaces/anvil-datastorage/{workspace}/tags",
                headers={"Authorization": f"Bearer {creds.token}"}
            )
            if response.status_code != 200:
                error_list.append("Error retrieving workspace tags.")
                tag_list = []
            else:
                tag_list = response.json()

            # Compare tags to specified regex to identify tags to remove
            regex_removal_list = []
            for tag in tag_list:
                for regex in tags_to_remove_regex_list:
                    if re.search(regex, tag):
                        regex_removal_list.append(tag)
                        break

            # Remove identified workspace tags
            if regex_removal_list:
                response = requests.delete(
                    url=f"https://api.firecloud.org/api/workspaces/anvil-datastorage/{workspace}/tags",
                    headers={"Authorization": f"Bearer {creds.token}"},
                    json=regex_removal_list
                )
                if response.status_code != 200:
                    error_list.append("Error removing workspace tags by regex.")    

        # Add new workspace tags explicitly listed
        if tags_to_add_list:
            response = requests.patch(
                url=f"https://api.firecloud.org/api/workspaces/anvil-datastorage/{workspace}/tags",
                headers={"Authorization": f"Bearer {creds.token}"},
                json=tags_to_add_list
            )
            if response.status_code != 200:
                error_list.append("Error adding workspace tags by list.")

        # Record status
        status = "Success" if not error_list else "Failure"
        error_str = "; ".join(error_list)
        results.append([workspace, status, error_str])

    # Display results
    print("\nResults:")
    results_df = pd.DataFrame(results, columns = ["workspace", "update_status", "errors"])
    display(results_df)    

#############################################
## Input Parameters
#############################################

# Specify the list of workspaces to apply the below changes to:
workspace_list = [
'AnVIL_ccdg_asc_ndd_daly_talkowski_aleksic_asd_exome',
'AnVIL_ccdg_asc_ndd_daly_talkowski_barbosa_asd_exome',
'AnVIL_ccdg_asc_ndd_daly_talkowski_passos-bueno_asd_exome',
'AnVIL_ccdg_asc_ndd_daly_talkowski_chung_asd_exome',
'AnVIL_ccdg_asc_ndd_daly_talkowski_gargus_asd_exome',
'AnVIL_ccdg_asc_ndd_daly_talkowski_goethe_asd_exome',
'AnVIL_ccdg_asc_ndd_daly_talkowski_hertz-picciotto_asd_exome',
'AnVIL_ccdg_asc_ndd_daly_talkowski_hultman_asd_exome',
'AnVIL_ccdg_asc_ndd_daly_talkowski_kolevzon_asd_exome',
'AnVIL_ccdg_asc_ndd_daly_talkowski_menashe_asd_exome',
'AnVIL_ccdg_asc_ndd_daly_talkowski_minshew_asd_exome',
'AnVIL_ccdg_asc_ndd_daly_talkowski_ac-boston_asd_exome',
'AnVIL_ccdg_asc_ndd_daly_talkowski_AGRE_asd_exome',
'AnVIL_ccdg_asc_ndd_daly_talkowski_control_NIMH_asd_exome',
'AnVIL_ccdg_asc_ndd_daly_talkowski_TASC_asd_exome',
'AnVIL_ccdg_asc_ndd_daly_talkowski_herman_asd_exome',
'AnVIL_ccdg_asc_ndd_daly_talkowski_parellada_asd_exome',
'AnVIL_ccdg_asc_ndd_daly_talkowski_pericak-vance_asd_exome_',
'AnVIL_ccdg_asc_ndd_daly_talkowski_persico_asd_exome',
'AnVIL_ccdg_asc_ndd_daly_talkowski_renieri_asd_exome',
'AnVIL_ccdg_asc_ndd_daly_talkowski_weiss_asd_exome',
'AnVIL_ccdg_asc_ndd_daly_talkowski_brusco_asd_exome',
'AnVIL_ccdg_asc_ndd_daly_talkowski_domenici_asd_exome',
'AnVIL_ccdg_asc_ndd_daly_talkowski_hertz-picciotto_asd_wgs',
'AnVIL_ccdg_asc_ndd_daly_talkowski_kolevzon_asd_wgs',
'AnVIL_ccdg_asc_ndd_daly_talkowski_pericak-vance_asd_wgs',
]

# Specify the exact tags that should be removed from the workspace (if any):
tags_to_remove_list = []

# Specify the list of regex expressions that should be used to identify tags to remove (if any):
tags_to_remove_regex_list = ["^dbGap:", "^dbGaP:"]

# Specify the tags that should be added to the workspace (if any):
tags_to_add_list = ["dbGaP: phs002502", "Study Accession: phs002502.v1.p1", "Consortium: CCDG"]

#############################################
## Execution
#############################################

update_ws_tags(workspace_list, tags_to_remove_list, tags_to_remove_regex_list, tags_to_add_list)


# AnVIL Workspace Attribute Control

## Look Up Workspace Attribute Names
This is intended to be a quick way to translate from the common/display names for properties that appear in workspaces to the underlying attribute property names needed to update the attributes in any way. For example, "Study Design" in a workspace is the display name for the "library:studyDesign" property.

In [None]:
#############################################
## Functions
#############################################

def look_up_ws_attr(search_term_list):
    attr_schema_url = "https://raw.githubusercontent.com/broadinstitute/firecloud-orchestration/develop/src/main/resources/library/attribute-definitions.json"
    response = requests.get(attr_schema_url)
    attr_schema = json.loads(response.text)

    results = []
    for term in search_term_list:
        lookup_str = term.replace(" ", "").lower()
        for key, val in attr_schema["properties"].items():
            try:
                title_str = val["title"].replace(" ", "").lower()
                if lookup_str in title_str:
                    results.append([term, key, val["title"]])
            except:
                pass

    print("Results:")
    results_df = pd.DataFrame(results, columns = ["search_term", "property", "property_title"])
    display(results_df) 

#############################################
## Input Parameters
#############################################

# Specify a list of attribute names to look up:
search_term_list = ["Consent"]

#############################################
## Execution
#############################################

look_up_ws_attr(search_term_list)      


## Update Workspace Attributes
List the workspace attributes that should be updated, their updated values, and the update behavior. The behavior specified will have the following effects:
* __UPDATE__ will only update the workspace attributes specified by the user, and leave all other existing values as is
* __REPLACE__ will remove all existing workspace attributes and replace them with the workspace attributes specified by the user

In [None]:
#############################################
## Functions
#############################################

def get_attribute(ws_attribute):
    attr_schema_url = "https://raw.githubusercontent.com/broadinstitute/firecloud-orchestration/develop/src/main/resources/library/attribute-definitions.json"
    response = requests.get(attr_schema_url)
    attr_schema = json.loads(response.text)
    # Search for attribute and return the type
    if attr_schema["properties"].get(ws_attribute):
        return True, attr_schema["properties"][ws_attribute]["type"]
    else:
        return False, ""

def update_ws_attr(attr_updates, update_behavior):
    # Loop through and process workspaces
    results = []
    for workspace in attr_updates.keys():
        
        # Initialize
        print(f"Processing workspace attribute updates for {workspace}.")
        error_list = []

        # Establish credentials
        creds, project = google.auth.default()
        auth_req = google.auth.transport.requests.Request()
        creds.refresh(auth_req)
        
        # Set base attribute json according to specified update behavior
        if update_behavior == "UPDATE":
            # Fetch existing attributes for workspace
            response = requests.get(
                url=f"https://api.firecloud.org/api/library/anvil-datastorage/{workspace}/metadata",
                headers={"Authorization": f"Bearer {creds.token}"}
            )
            if response.status_code != 200:
                error_list.append("Error retrieving workspace attributes")
            else:
                attr_json = response.json() 
                print("Original attribute JSON:")
                print(attr_json)
        elif update_behavior == "REPLACE":
            attr_json = {}
        else:
            error_list.append(f"Unknown update behavior '{update_behavior}'' specified (must be UPDATE or REPLACE)")
        
        # Update base attribute json with user input
        
        if not error_list:
            for attr_entry, attr_value in attr_updates[workspace].items():
                valid, attr_type = get_attribute(attr_entry)
                if valid:
                    if attr_type == "string":
                        attr_json[attr_entry] = str(attr_value)
                    elif attr_type == "array":
                        attr_json[attr_entry] = [str(attr_value)]
                    elif attr_type == "integer":
                        try:
                            attr_json[attr_entry] = int(attr_value)
                        except:
                            error_list.append(f"Error converting value for attribute '{attr_entry}' to integer")
                    elif attr_type == "boolean":
                        try:
                            if str(attr_value).lower() == "true":
                                attr_json[attr_entry] = True
                            elif str(attr_value).lower() == "false":
                                attr_json[attr_entry] = False
                            else:
                                raise(exception)
                        except:
                            error_list.append(f"Error converting value for attribute '{attr_entry}' to boolean")
                else:
                    error_list.append(f"Attribute '{attr_entry}' is not recognized")
            if update_behavior == "UPDATE":
                print("\nUpdated attribute JSON:")
            else:
                print("Replacement attribute JSON:")
            print(attr_json)
            print("\n")
        
        # Submit updated workspace attributes to workspace
        if not error_list:
            response = requests.put(
                url=f"https://api.firecloud.org/api/library/anvil-datastorage/{workspace}/metadata?validate=false",
                headers={"Authorization": f"Bearer {creds.token}"},
                json=attr_json
            )
            if response.status_code != 200:
                error_list.append("Error updating workspace attributes")
        
        # Record status
        status = "Success" if not error_list else "Errors"
        error_str = "; ".join(error_list)
        results.append([workspace, status, error_str])

    # Display results
    print("Results:")
    results_df = pd.DataFrame(results, columns = ["workspace", "update_status", "errors"])
    display(results_df)   

#############################################
## Input Parameters
#############################################

# Specify the behavior of the update (UPDATE or REPLACE), based on the definitions above:
update_behavior = "UPDATE"

# Specify the workspace attribute updates, using the form {"workspace": {"attribute": "value"}}:
attr_updates = {
    "anvil_ccdg_broad_ai_ibd_daly_moayyedi_imagine_gsa": {
        "library:dataUseRestriction": "GRU"
    },
    "anvil_ccdg_broad_ai_ibd_daly_lewis_sparc_gsa": {
        "library:dataUseRestriction": "GRU"
    },
}

#############################################
## Execution
#############################################

update_ws_attr(attr_updates, update_behavior)
