In [None]:
# Version History
# 05/16/2023 9:48am - Nate Calvanese - First version created and released
# 06/1/2023 11:02am - Nate Calvanese - Updated Bulk Manage User Access to Workspaces to allow the user to remove themselves from workspaces
# 06/12/2023 12:18pm - Rachel Kutner - Add File Management section with gsutils script to sync files between folders
# 06/21/2023 11:30am - Nate Calvanese - Fixed a bug in the workspace access reports and added more dbGaP groups
# 07/13/2023 2:08pm - Nate Calvanese - Added a sub-section for locking workspaces
# 07/17/2023 4:35pm - Nate Calvanese - Updated other_workspace_tags column to all_workspace_tags in workspace access report 
# 12/20/2023 11:44am - Nate Calvanese - Added section 1.7 to help bulk update workspace permissions for release
# 3/11/2024 12:52pm - Data Ops - Updated workspace creation to work for Azure workspaces as well
# 4/16/2024 9:59am - Nate Calvanese - Added section 2.2 for bulk workspace deletion
# 4/17/2024 8:38pm - Nate Calvanese - Added Azure support to section 1.1
# 4/19/2024 9:45pm - Nate Calvanese - Added section 1.8 to remove workspace permissions prior to deletion
# 4/23/2024 8:23pm - Nate Calvanese - Added section 2.3 to unpublish workspaces prior to deletion (plus some bug fixes)
# 5/3/2024 11:18am - Nate Calvanese - Updated workspace access reports in 1.4 to include Azure workspaces
# 6/11/2024 1:27pm - Nate Calvanese - Updated workspace creation to include data access controls (effectively "auth domains") for Azure workspaces
# 6/14/2024 1:26pm - Nate Calvanese - Added section 1.9 to allow addition of "auth domains" to existing Azure workspaces
# 9/6/2024 10:47am - Nate Calvanese - Updated section 1.5 to include DAC information from dbGaP
# 9/9/2024 10:20pm - Nate Calvanese - Added section 1.10 to create snapshot access report to mirror workspace report in 1.4
# 10/15/2024 8:56am - Nate Calvanese - Moved content of section 1.10 into 1.4 and expanded data access reports produced in 1.4
# 10/17/2024 3:32pm - Nate Calvanese - Significant changes to add support for snapshots (see ANVIL-716 in Jira for details)
# 10/29/2024 8:34pm - Nate Calvanese - Updated section 1.6 (Release permissions) to turn on requester pays for GCP workspaces programmatically
# 10/29/2024 10:30pm - Nate Calvanese - Addition of a new section 1.3 (bumping all other 1.x numbers down) to bulk create and assign groups
# 10/30/2024 9:46am - Nate Calvanese - Updated section 1.4 to include an All DUOS Registrations Report
# 1/6/2025 12:43pm - Nate Calvanese - Updated section 1.4 to pull additional information for snapshots
# 1/15/2025 9:46am - Nate Calvanese - Updated section 1.4 to pull requester pays information for workspaces
# 2/13/2025 9:07am - Nate Calvanese - Updated section 2.1 to return the workspace bucket as part of the output
# 2/19/2025 8:39am - Nate Calvanese - Updated section 1.1 to support access for TDR datasets
# 2/20/2025 1:15pm - Nate Calvanese - Updated sections 1.1 and 1.2 to support a buffer time between requests
# 2/23/2025 9:42pm - Nate Calvanese - Updated DUOS and snapshot reports in section 1.4 to pull additional information, updated section 5.1 to allow consent_name to be updated
# 2/26/2025 2:38pm - Nate Calvanese - Tweaked elevated permissions flagging to ignore "public-workspace-creators@firecloud.org"
# 2/27/2025 11:17am - Nate Calvanese - Updated section 1.4 to explicitly capture anvil-admins role on workspaces and snapshots
# 2/28/2025 12:11pm - Nate Calvanese - Updated section 1.1 to not inadvertently remove anvil-admins from its roles; updated section 5.1 to include a dataset_ticket property
# 3/3/2025 12:20pm - Nate Calvanese - Fixed bug in section 5.1's output logging


In [None]:
# Install necessary modules (one time per cloud environment)
#!pip install --upgrade xmltodict jira

In [None]:
# Imports
import google.auth
import google.auth.transport.requests
import requests
import pandas as pd
import json 
import re
import xmltodict
import datetime
from jira import JIRA
from time import sleep

# Display options
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option("display.max_colwidth", None)
pd.set_option('display.width', 1000)
pd.set_option('display.colheader_justify', 'center')
pd.set_option('display.precision', 3)

__NOTE:__ 
See the [anvil_ingest_tools Config Builder sheet](https://docs.google.com/spreadsheets/d/1s32qKSuPZHuwpdQZNfcg-k7yFv87GWhyhrZ98AT5ecQ/edit?gid=0#gid=0) for help in structuring configurations for the below tools.

# AnVIL Resource Access Control

## Bulk Manage User Access to AnVIL Resources (and associated Auth Domains)
List the users, the role they should have, and the resources (type and identifier) those user-role combinations should apply to. The roles specified will have the following effects:

For workspaces:
* __READER__ will add the user as a reader on the workspace and a member on any associated authorization domains
* __WRITER__ will add the user as a writer on the workspace and a member on any associated authorization domains
* __OWNER__ will add the user as an owner on the workspace and a member on any associated authorization domains
* __NO ACCESS__ will remove the user from both the workspace and any associated authorization domains

For snapshots:
* __READER__ will add the user as a reader on the snapshot and a member on any associated authorization domains
* __STEWARD__ will add the user as a steward on the snapshot and a member on any associated authorization domains
* __NO ACCESS__ will remove the user from both the snapshot and any associated authorization domains

For datasets:
* __READER__ will add the user as a snapshot creator on the dataset and a member on any associated authorization domains
* __CUSTODIAN__ will add the user as a custodian on the dataset and a member on any associated authorization domains
* __STEWARD__ will add the user as a steward on the dataset and a member on any associated authorization domains
* __NO ACCESS__ will remove the user from both the dataset and any associated authorization domains

In all cases, when "__NO ACCESS__" is utilized for user "anvil-admins@firecloud.org", this user will __NOT__ be removed from its OWNER/STEWARD role on the various cloud resources, nor will it be removed from the associated authorization domains, as this user needs to be in place to manage resources for AnVIL.

In [None]:
#############################################
## Functions
#############################################

def manage_user_access(user_resource_role_list, request_buffer):
    
    # Set validation values
    acceptable_resource_types = ["workspace", "snapshot", "dataset"]
    acceptable_cloud_types = ["gcp", "azure"]
    acceptable_workspace_roles = ["READER", "WRITER", "OWNER", "NO ACCESS"]
    acceptable_snapshot_roles = ["READER", "STEWARD", "NO ACCESS"]
    acceptable_dataset_roles = ["READER", "STEWARD", "CUSTODIAN", "NO ACCESS"]
    
    # Loop through and process user access
    results = []
    total_entry_count = len(user_resource_role_list)
    for idx, user_resource_role in enumerate(user_resource_role_list):
        
        # Set and validate variables
        entry_number = idx + 1
        resource = user_resource_role["resource"]
        resource_type = user_resource_role["resource_type"]
        cloud_provider = user_resource_role["cloud_provider"]
        user = user_resource_role["user"]
        role = user_resource_role["role"]
        billing_object = ""
        error_list = []
        print(f"Processing user access updates for {resource_type} {resource} - {user} ({role}). Entry {entry_number} of {total_entry_count}.")
        if resource_type not in acceptable_resource_types:
            resource_types = ", ".join(acceptable_resource_types)
            results.append([resource, resource_type, cloud_provider, user, role, "Failure", f"Resource type must be one of {resource_types}. Skipping user access update."])
            continue
        elif resource_type == "workspace" and role not in acceptable_workspace_roles:
            workspace_roles = ", ".join(acceptable_workspace_roles)
            results.append([resource, resource_type, cloud_provider, user, role, "Failure", f"Role for workspace access update must be one of {workspace_roles}. Skipping user access update."])
            continue
        elif resource_type == "snapshot" and role not in acceptable_snapshot_roles:
            snapshot_roles = ", ".join(acceptable_snapshot_roles)
            results.append([resource, resource_type, cloud_provider, user, role, "Failure", f"Role for snapshot access update must be one of {snapshot_roles}. Skipping user access update."])
            continue
        elif resource_type == "dataset" and role not in acceptable_dataset_roles:
            dataset_roles = ", ".join(acceptable_dataset_roles)
            results.append([resource, resource_type, cloud_provider, user, role, "Failure", f"Role for dataset access update must be one of {dataset_roles}. Skipping user access update."])
            continue
        if cloud_provider not in acceptable_cloud_types:
            cloud_types = ", ".join(acceptable_cloud_types)
            results.append([resource, resource_type, cloud_provider, user, role, "Failure", f"Cloud provider must be one of {cloud_types}. Skipping user access update."])
            continue
        else:
            if cloud_provider == "gcp":
                if resource_type == "workspace":
                    billing_object = "anvil-datastorage"
                elif resource_type in ["snapshot", "dataset"]:
                    billing_object = "e0e03e48-5b96-45ec-baa4-8cc1ebf74c61"
            elif cloud_provider == "azure":
                if resource_type == "workspace":
                    billing_object = "AnVILDataStorage_Azure"
                elif resource_type in ["snapshot", "dataset"]:
                    billing_object = "9ee23bed-b46c-4561-9103-d2a723113f7f"
        
        # Establish credentials
        creds, project = google.auth.default()
        auth_req = google.auth.transport.requests.Request()
        creds.refresh(auth_req)
        
        # Workspace access logic
        if resource_type == "workspace":

            # Pull auth domains and workspace resource ID from workspace attributes
            ad_list = []
            resource_id = ""
            ws_attributes = requests.get(
                url=f"https://api.firecloud.org/api/workspaces/{billing_object}/{resource}?fields=workspace.attributes,workspace.authorizationDomain,workspace.googleProject,workspace.bucketName,workspace.workspaceId",
                headers={"Authorization": f"Bearer {creds.token}"}
            ).json()
            try:
                resource_id = ws_attributes["workspace"]["workspaceId"]
                for ad in ws_attributes["workspace"]["authorizationDomain"]:
                    ad_list.append(ad["membersGroupName"])
            except:
                error_list.append(f"Error accessing workspace.")

            # For each auth domain, add/remove the user as necessary
            if role in ["READER", "WRITER", "OWNER"]:
                for auth_domain in ad_list:
                    response = requests.put(
                        url=f"https://api.firecloud.org/api/groups/{auth_domain}/member/{user}",
                        headers={"Authorization": f"Bearer {creds.token}"}
                    )
                    if response.status_code != 204:
                        error_list.append(f"Error adding to auth domain ({auth_domain})")
            elif role == "NO ACCESS":
                if user in ["anvil-admins@firecloud.org"]:
                    print(f"User {user} in exception list. NOT removing from auth domain.")
                else:
                    for auth_domain in ad_list:
                        try:
                            ad_membership = requests.get(
                                url=f"https://api.firecloud.org/api/groups/{auth_domain}",
                                headers={"Authorization": f"Bearer {creds.token}"}
                            ).json()
                            role_list = []
                            if user in ad_membership["adminsEmails"]:
                                role_list.append("admin")
                            if user in ad_membership["membersEmails"]:
                                role_list.append("member")
                            for ad_role in role_list:
                                response = requests.delete(
                                    url=f"https://api.firecloud.org/api/groups/{auth_domain}/{ad_role}/{user}",
                                    headers={"Authorization": f"Bearer {creds.token}"}
                                )
                                if response.status_code != 204:
                                    error_list.append(f"Error removing from auth domain ({auth_domain})")
                        except:
                            error_list.append(f"Error removing from auth domain ({auth_domain})")
                        
            # Pull existing workspace ACLs
            response = requests.get(
                url=f"https://api.firecloud.org/api/workspaces/{billing_object}/{resource}/acl",
                headers={"Authorization": f"Bearer {creds.token}"}
            )
            if response.status_code != 200:
                error_list.append("Error retrieving workspace ACL")
                ws_acl = {"acl": {}}
            else:
                ws_acl = response.json()

            # Determine if auth domain membership covers the necessary workspace access
            ad_covers_ws_acl = False
            for auth_domain in ad_list: 
                for key, val in ws_acl["acl"].items():
                    if auth_domain in key:
                        if (role == "READER" and val["accessLevel"] in ["READER", "WRITER", "OWNER"]) or (role == "WRITER" and val["accessLevel"] in ["WRITER", "OWNER"]) or (role == "OWNER" and val["accessLevel"] in ["OWNER"]):
                            ad_covers_ws_acl = True
                            break
                if ad_covers_ws_acl == True:
                    break

            # If auth domain membership doesn't cover needed workspace access, add user role to workspace
            if role != "NO ACCESS" and ad_covers_ws_acl == False:
                if user in ["anvil-admins@firecloud.org"] and role not in ["OWNER"]:
                    print(f"User {user} in exception list. NOT reducing permissions.")
                else:
                    payload = [{
                            "email": user,
                            "accessLevel": role,
                            "canShare": False,
                            "canCompute": False
                        }]
                    response = requests.patch(
                        url=f"https://api.firecloud.org/api/workspaces/{billing_object}/{resource}/acl",
                        headers={"Authorization": f"Bearer {creds.token}"},
                        json=payload
                    )
                    if response.status_code != 200:
                        error_list.append("Error updating workspace ACL")
            
            # Removed users from workspace where role is "NO ACCESS"
            elif role == "NO ACCESS":
                # Determine if user is even on the workspace
                if ws_acl["acl"].get(user):
                    # Determine if the user is yourself, and remove yourself if so
                    response = requests.get(
                        url=f"https://sam.dsde-prod.broadinstitute.org/register/user/v2/self/info",
                        headers={"Authorization": f"Bearer {creds.token}"}
                    ).json()
                    if response["userEmail"] == user:
                        response = requests.delete(
                            url=f"https://sam.dsde-prod.broadinstitute.org/api/resources/v2/workspace/{resource_id}/leave",
                            headers={"Authorization": f"Bearer {creds.token}"}
                        )
                        if response.status_code != 204:
                            error_list.append("Error updating workspace ACL")
                    # If user is not yourself, remove from workspace
                    else:
                        payload = [{
                            "email": user,
                            "accessLevel": role,
                            "canShare": False,
                            "canCompute": False
                        }]
                        response = requests.patch(
                            url=f"https://api.firecloud.org/api/workspaces/{billing_object}/{resource}/acl",
                            headers={"Authorization": f"Bearer {creds.token}"},
                            json=payload
                        )
                        if response.status_code != 200:
                            error_list.append("Error updating workspace ACL")
                    
        # Snapshot logic
        elif resource_type == "snapshot":

            # Pull snapshot auth domains and policies
            ad_list = []
            ss_steward_list = []
            ss_reader_list = []
            snapshot_policies = requests.get(
                url=f"https://data.terra.bio/api/repository/v1/snapshots/{resource}/policies",
                headers={"Authorization": f"Bearer {creds.token}"}
            ).json()
            try:
                ad_list = snapshot_policies["authDomain"]
                for policy in snapshot_policies["policies"]:
                    if policy["name"] == "steward":
                        for member in policy["members"]:
                            ss_steward_list.append(member)
                    elif policy["name"] == "reader":
                        for member in policy["members"]:
                            ss_reader_list.append(member)
            except:
                error_list.append(f"Error accessing snapshot policies.")

            # For each auth domain, add/remove the user as necessary
            if role in ["READER", "STEWARD"]:
                for auth_domain in ad_list:
                    response = requests.put(
                        url=f"https://api.firecloud.org/api/groups/{auth_domain}/member/{user}",
                        headers={"Authorization": f"Bearer {creds.token}"}
                    )
                    if response.status_code != 204:
                        error_list.append(f"Error adding to auth domain ({auth_domain})")
            elif role == "NO ACCESS":
                if user in ["anvil-admins@firecloud.org"]:
                    print(f"User {user} in exception list. NOT removing from auth domain.")
                else:
                    for auth_domain in ad_list:
                        try:
                            ad_membership = requests.get(
                                url=f"https://api.firecloud.org/api/groups/{auth_domain}",
                                headers={"Authorization": f"Bearer {creds.token}"}
                            ).json()
                            role_list = []
                            if user in ad_membership["adminsEmails"]:
                                role_list.append("admin")
                            if user in ad_membership["membersEmails"]:
                                role_list.append("member")
                            for ad_role in role_list:
                                response = requests.delete(
                                    url=f"https://api.firecloud.org/api/groups/{auth_domain}/{ad_role}/{user}",
                                    headers={"Authorization": f"Bearer {creds.token}"}
                                )
                                if response.status_code != 204:
                                    error_list.append(f"Error removing from auth domain ({auth_domain})")
                        except:
                            error_list.append(f"Error removing from auth domain ({auth_domain})")
            
            # Determine if auth domain membership covers the necessary snapshot access
            ad_covers_ss_acl = False
            for auth_domain in ad_list:
                if role == "READER":
                    for ss_reader in ss_reader_list:
                        if auth_domain in ss_reader:
                            ad_covers_ss_acl = True
                            break
                    for ss_steward in ss_steward_list:
                        if auth_domain in ss_steward:
                            ad_covers_ss_acl = True
                            break
                elif role == "STEWARD":
                    for ss_steward in ss_steward_list:
                        if auth_domain in ss_steward:
                            ad_covers_ss_acl = True
                            break
                if ad_covers_ss_acl == True:
                    break

            # If auth domain membership doesn't cover needed snapshot access, add user role to snapshot
            if role != "NO ACCESS" and ad_covers_ss_acl == False:
                policy = role.lower()
                payload = {
                        "email": user
                    }
                response = requests.post(
                    url=f"https://data.terra.bio/api/repository/v1/snapshots/{resource}/policies/{policy}/members",
                    headers={"Authorization": f"Bearer {creds.token}"},
                    json=payload
                )
                if response.status_code != 200:
                    error_list.append("Error updating snapshot policies")
            
            # Removed users from snapshot where role is "NO ACCESS"
            elif role == "NO ACCESS":
                # Determine if user is on the snapshot, and remove if so
                if user in ss_reader_list:
                    response = requests.delete(
                        url=f"https://data.terra.bio/api/repository/v1/snapshots/{resource}/policies/reader/members/{user}",
                        headers={"Authorization": f"Bearer {creds.token}"}
                    )
                    if response.status_code != 200:
                        error_list.append("Error updating snapshot policies")
                elif user in ss_steward_list and user not in ["anvil-admins@firecloud.org"]:
                    response = requests.delete(
                        url=f"https://data.terra.bio/api/repository/v1/snapshots/{resource}/policies/steward/members/{user}",
                        headers={"Authorization": f"Bearer {creds.token}"}
                    )
                    if response.status_code != 200:
                        error_list.append("Error updating snapshot policies")

        # Dataset logic
        elif resource_type == "dataset":
            
            # Get existing dataset policies
            ds_custodian_list = []
            ds_steward_list = []
            ds_reader_list = []
            dataset_policies = requests.get(
                url=f"https://data.terra.bio/api/repository/v1/datasets/{resource}/policies",
                headers={"Authorization": f"Bearer {creds.token}"}
            ).json()
            try:
                for policy in dataset_policies["policies"]:
                    if policy["name"] == "custodian":
                        for member in policy["members"]:
                            ds_custodian_list.append(member)
                    elif policy["name"] == "steward":
                        for member in policy["members"]:
                            ds_steward_list.append(member)
                    elif policy["name"] == "snapshot_creator":
                        for member in policy["members"]:
                            ds_reader_list.append(member)
            except:
                error_list.append(f"Error accessing dataset policies.")
            
            # Add user role to dataset
            if role != "NO ACCESS":
                policy = role.lower()
                policy = "snapshot_creator" if policy == "reader" else policy
                payload = {
                        "email": user
                    }
                response = requests.post(
                    url=f"https://data.terra.bio/api/repository/v1/datasets/{resource}/policies/{policy}/members",
                    headers={"Authorization": f"Bearer {creds.token}"},
                    json=payload
                )
                if response.status_code != 200:
                    error_list.append("Error updating dataset policies")

            # Removed users from dataset where role is "NO ACCESS"
            elif role == "NO ACCESS":
                # Determine if user is on the dataset, and remove if so
                if user in ds_reader_list:
                    response = requests.delete(
                        url=f"https://data.terra.bio/api/repository/v1/datasets/{resource}/policies/snapshot_creator/members/{user}",
                        headers={"Authorization": f"Bearer {creds.token}"}
                    )
                    if response.status_code != 200:
                        error_list.append("Error updating dataset policies")
                elif user in ds_custodian_list:
                    response = requests.delete(
                        url=f"https://data.terra.bio/api/repository/v1/datasets/{resource}/policies/custodian/members/{user}",
                        headers={"Authorization": f"Bearer {creds.token}"}
                    )
                    if response.status_code != 200:
                        error_list.append("Error updating dataset policies")
                elif user in ds_steward_list and user not in ["anvil-admins@firecloud.org"]:
                    response = requests.delete(
                        url=f"https://data.terra.bio/api/repository/v1/datasets/{resource}/policies/steward/members/{user}",
                        headers={"Authorization": f"Bearer {creds.token}"}
                    )
                    if response.status_code != 200:
                        error_list.append("Error updating dataset policies")
        
        # Record status
        status = "Success" if not error_list else "Failure"
        error_str = "; ".join(error_list)
        results.append([resource, resource_type, cloud_provider, user, role, status, error_str])
        
        # If request buffer time specified, sleep that amount of time
        if request_buffer and request_buffer > 0 and entry_number < total_entry_count:
            print(f"Sleeping for {str(request_buffer)} seconds...")
            sleep(request_buffer)
        
    # Display results
    print(f"\nResults: ")
    results_df = pd.DataFrame(results, columns = ["resource", "resource_type", "cloud_provider", "user", "role", "update_status", "errors"])
    display(results_df)

#############################################
## Input Parameters
#############################################

# Specify the user-resource-role changes that should be made
user_resource_role_list = [
#     {"resource": "workspace_name, snapshot_id, dataset_id", "resource_type": "workspace/snapshot/dataset", "cloud_provider": "gcp/azure", "user": "ncalvane@broadinstitute.org", "role": "READER/WRITER/OWNER/CUSTODIAN/STEWARD/NO ACCESS"},
    {'resource': 'cba804c9-0bdd-4219-a53e-98c8db6334a0', 'resource_type': 'dataset', 'cloud_provider': 'gcp', 'user': 'anvil-admins@firecloud.org', 'role': 'NO ACCESS'},
    {'resource': '5c6a1c4f-ccd3-48a8-ac00-e18e5ecaa0bb', 'resource_type': 'dataset', 'cloud_provider': 'gcp', 'user': 'anvil-admins@firecloud.org', 'role': 'NO ACCESS'},
]

# Specify the time buffer that should be added between requests in seconds (if any)
request_buffer = 0

#############################################
## Execution
#############################################

manage_user_access(user_resource_role_list, request_buffer)


## Bulk Manage User Access to Terra Groups
List the users, target Terra groups, and roles the users should have on those target Terra groups. The roles specified will have the following effects:
* __ADMIN__ will add the user as an admin on the group
* __MEMBER__ will add the user as a member on the group
* __NO ACCESS__ will remove the user from the group

In [None]:
#############################################
## Functions
#############################################

def manage_user_group_role(user_group_role_list, request_buffer):
    
    # Loop through and process user group roles
    results = []
    total_entry_count = len(user_group_role_list) 
    for idx, user_group_role in enumerate(user_group_role_list):
        
        # Initialize
        entry_number = idx + 1
        user = user_group_role[0]
        group = user_group_role[1]
        role = user_group_role[2]
        print(f"Processing user group role updates for: {user} - {group} - {role}. Entry {entry_number} of {total_entry_count}.")
        error_list = []
        
        # Validate specified role
        if role not in ["MEMBER", "ADMIN", "NO ACCESS"]:
            error_list.append("Unknown role specified. Role must be MEMBER, ADMIN or NO ACCESS.")
        else:

            # Establish credentials
            creds, project = google.auth.default()
            auth_req = google.auth.transport.requests.Request()
            creds.refresh(auth_req)

            # Get existing group membership
            response = requests.get(
                url=f"https://api.firecloud.org/api/groups/{group}",
                headers={"Authorization": f"Bearer {creds.token}"}
            )
            if response.status_code != 200:
                error_list.append("Error retrieving existing group membership.")
                curr_role = "UNKNOWN"
            else:
                response_json = response.json()
                curr_role = "NONE"
                for member in response_json["membersEmails"]:
                    if member == user:
                        curr_role = "MEMBER"
                        break
                for admin in response_json["adminsEmails"]:
                    if admin == user:
                        curr_role = "ADMIN"
                        break 

            # Process user group role updates 
            if role == "ADMIN":
                response = requests.put(
                    url=f"https://api.firecloud.org/api/groups/{group}/{role.lower()}/{user}",
                    headers={"Authorization": f"Bearer {creds.token}"}
                )
                if response.status_code != 204:
                    error_list.append("Error updating user group role.")
            elif role == "MEMBER":
                # If necessary, remove user's admin role from group
                if curr_role == "ADMIN":
                    response = requests.delete(
                        url=f"https://api.firecloud.org/api/groups/{group}/{curr_role.lower()}/{user}",
                        headers={"Authorization": f"Bearer {creds.token}"}
                    )
                    if response.status_code != 204:
                        error_list.append("Error updating user group role.") 
                # Add user as a member to group
                response = requests.put(
                    url=f"https://api.firecloud.org/api/groups/{group}/{role.lower()}/{user}",
                    headers={"Authorization": f"Bearer {creds.token}"}
                )
                if response.status_code != 204:
                    error_list.append("Error updating user group role.")
            elif role == "NO ACCESS":
                response = requests.delete(
                    url=f"https://api.firecloud.org/api/groups/{group}/{curr_role.lower()}/{user}",
                    headers={"Authorization": f"Bearer {creds.token}"}
                )
                if response.status_code != 204:
                    error_list.append("Error updating user group role.")

        # Record status
        status = "Success" if not error_list else "Failure"
        error_str = "; ".join(error_list)
        results.append([user, group, role, status, error_str])
        
        # If request buffer time specified, sleep that amount of time
        if request_buffer and request_buffer > 0 and entry_number < total_entry_count:
            print(f"Sleeping for {str(request_buffer)} seconds...")
            sleep(request_buffer)

    # Display results
    print("\nResults:")
    results_df = pd.DataFrame(results, columns = ["user", "target_group", "target_role", "update_status", "errors"])
    display(results_df)

#############################################
## Input Parameters
#############################################

# Specify the user, target group, and the role the user should have on the group:
user_group_role_list = [
    #["user", "terra_group_name", "role - ADMIN, MEMBER, NO ACCESS"]
    ['tdr-ingest-sa@datarepo-00aaa29b.iam.gserviceaccount.com', 'anvil_tdr_ingest', 'NO ACCESS'],
    ['tdr-ingest-sa@datarepo-0138cae6.iam.gserviceaccount.com', 'anvil_tdr_ingest', 'NO ACCESS'],
]

# Specify the time buffer that should be added between requests in seconds (if any)
request_buffer = 0

#############################################
## Execution
#############################################

manage_user_group_role(user_group_role_list, request_buffer)


## Bulk Create and Assign Terra Groups
List the new Terra groups to create, and optionally the users that should be admins of that group, the users that should be a members of that group, and the other existing Terra groups the new Terra group should be added to as a member. This functionality is limited to new groups only. To update the membership of existing Terra groups, use the "Bulk Manage User Access to Terra Groups" section of this notebook. 

In [None]:
#############################################
## Functions
#############################################

def bulk_create_user_group(user_group_list):
    results = []
    # Loop through and process user group list
    for user_group in user_group_list:
        
        # Initialize
        group = user_group["new_group_name"]
        group_admins_list = user_group["group_owners"]
        group_members_list = user_group["group_members"]
        group_to_add_to_list = user_group["groups_to_add_to"]
        print(f"Processing new user group: {group}")
        error_list = []
        
        # Establish credentials
        creds, project = google.auth.default()
        auth_req = google.auth.transport.requests.Request()
        creds.refresh(auth_req)

        # Attempt to create the group
        group_already_exists = False
        response = requests.post(
            url=f"https://api.firecloud.org/api/groups/{group}",
            headers={"Authorization": f"Bearer {creds.token}"}
        )
        if response.status_code == 409:
            error_list.append("Group already exists.")
        elif response.status_code != 201:
            error_list.append(f"Error creating group: {response.text}")
            
        # If group is created, process group membership
        if not error_list:
            # Add members
            member_err_list = []
            for user in group_members_list:
                response = requests.put(
                    url=f"https://api.firecloud.org/api/groups/{group}/member/{user}",
                    headers={"Authorization": f"Bearer {creds.token}"}
                )
                if response.status_code != 204:
                    member_err_list.append(user)
            if member_err_list:
                member_err_str = ", ".join(member_err_list)
                error_list.append(f"Error adding members to group: {member_err_str}")
            
            # Add admins
            admin_err_list = []
            for user in group_admins_list:
                response = requests.put(
                    url=f"https://api.firecloud.org/api/groups/{group}/admin/{user}",
                    headers={"Authorization": f"Bearer {creds.token}"}
                )
                if response.status_code != 204:
                    admin_err_list.append(user)
            if admin_err_list:
                admin_err_str = ", ".join(admin_err_list)
                error_list.append(f"Error adding admins to group: {admin_err_str}")
            
            # Add group as member of other groups
            group_add_err_list = []
            group_email = group + "@firecloud.org"
            for group_to_add_to in group_to_add_to_list:
                response = requests.put(
                    url=f"https://api.firecloud.org/api/groups/{group_to_add_to}/member/{group_email}",
                    headers={"Authorization": f"Bearer {creds.token}"}
                )
                if response.status_code != 204:
                    group_add_err_list.append(group_to_add_to)
            if group_add_err_list:
                group_add_err_str = ", ".join(group_add_err_list)
                error_list.append(f"Error adding new group to groups: {group_add_err_str}")
            
            status = "Success" if not error_list else "Warning"
            error_str = "; ".join(error_list) 
        else:
            status = "Failure"
            error_str = "; ".join(error_list)

        # Record status
        results.append([group, status, error_str])

    # Display results
    print("\nResults:")
    results_df = pd.DataFrame(results, columns = ["new_group_name", "status", "errors"])
    display(results_df)

#############################################
## Input Parameters
#############################################

# Specify the user, target group, and the role the user should have on the group:
user_group_list = [
#     {"new_group_name": "group_name", "group_owners": ["<user1>", "<user2>"], "group_members": ["<user1>", "<user2>"], "groups_to_add_to": ["<group1>", "<group2>"]},
    {'new_group_name': 'test_group_1', 'group_owners': [], 'group_members': ['ncalvane@broadinstitute.org','kbalacon@broadinstitute.org'], 'groups_to_add_to': []},
    {'new_group_name': 'test_group_2', 'group_owners': [], 'group_members': [], 'groups_to_add_to': ['test_group_1']},
]

#############################################
## Execution
#############################################

bulk_create_user_group(user_group_list)


## AnVIL Resource Access Reports
Optionally run five reports related to AnVIL cloud resource access:
* The __All Workspaces Report__ pulls all of the AnVIL workspaces specified (or all the workspaces you have access to in the AnVIL billing projects if the workspace_list parameter is left empty), and records any auth domains, any roles the auth domains have on the workspace, the roles the anvil-admins and AnVIL_Devs groups have within the auth domain, the list of dbgap groups that are part of the auth domain, the users in the auth domain, the users and their roles on the workspace, whether or not the workspace is public, and whether or not the workspace is locked. 
* The __All Snapshots Report__ pulls all of the AnVIL snapshots specified (or all the snapshots you have access to in the AnVIL billing profiles if the snapshot_list parameter is left empty), and records any duos identifiers, any auth domains, any roles the auth domains have on the snapshots, the roles the anvil-admins and AnVIL_Devs groups have within the auth domain, the list of dbgap groups that are part of the auth domain, the users in the auth domain, the users and their roles on the snapshot, whether or not the snapshot is public, and whether or not the snapshot is locked.
* The __All dbGaP Telemetry Groups Report__ takes a list of user-specified dbGaP Telemetry Groups and returns whether that group exists as a user in Terra, and if so, which auth domains it is currently in. Note that if ONLY the dbGaP Terra Group Report is run, then the report will be unable to determine which auth domains the existing dbGaP Telemetry Groups are in.
* The __All DUOS Registrations Report__ returns information on all AnVIL dataset registrations in the DUOS production environment.
* The __All Dataset Tickets Report__ returns information on all ANVIL Jira tickets with an issue type of "Dataset", for use in connecting the workspace and snapshot information together at a per-dataset level. 

In [None]:
#############################################
## Functions
#############################################

def gen_workspace_report(workspace_list):
    
    print("Generating All Workspaces Report:")
    # Establish credentials
    creds, project = google.auth.default()
    auth_req = google.auth.transport.requests.Request()
    creds.refresh(auth_req)

    # Collect list of AnVIL workspaces to process and their auth domains
    anvil_ws_list = []
    workspaces = requests.get(
        url=f"https://api.firecloud.org/api/workspaces?fields=workspace.authorizationDomain,public,workspace.namespace,workspace.name,workspace.bucketName,workspace.googleProject,workspace.isLocked,workspace.workspaceId",
        headers={"Authorization": f"Bearer {creds.token}"}
    ).json()
    for workspace in workspaces:
        include_ws = False
        if workspace_list:
            if workspace["workspace"]["name"] in workspace_list:
                include_ws = True 
        else:
            if workspace["workspace"]["namespace"] in ["anvil-datastorage", "AnVILDataStorage_Azure"]:
                include_ws = True  
        if include_ws:
            if workspace["workspace"]["authorizationDomain"]:
                for ad in workspace["workspace"]["authorizationDomain"]:
                    anvil_ws_list.append([workspace["workspace"]["name"], workspace["workspace"]["namespace"], workspace["workspace"]["googleProject"], workspace["workspace"]["bucketName"], ad["membersGroupName"], workspace["public"], workspace["workspace"]["isLocked"], workspace["workspace"]["workspaceId"]])
            else:
                anvil_ws_list.append([workspace["workspace"]["name"], workspace["workspace"]["namespace"], workspace["workspace"]["googleProject"], workspace["workspace"]["bucketName"], None, workspace["public"], workspace["workspace"]["isLocked"], workspace["workspace"]["workspaceId"]])

    # Loop through AnVIL workspaces and collect additional report information
    results = []
    for idx, workspace in enumerate(anvil_ws_list):

        # Initialize
        workspace_id = workspace[7]
        name = workspace[0]
        namespace = workspace[1]
        project = workspace[2]
        bucket = workspace[3]
        ad = workspace[4]
        ad_ws_role = "NO ACCESS"
        anv_admins_role = "No Role"
        anv_devs_role = "No Role"
        anv_admins_ws_role = "No Role"
        duos_group_list = []
        dbgap_group_list = []
        other_ad_admin_list = []
        other_ad_member_list = []
        public = workspace[5]
        locked = workspace[6]
        ws_owner_list = []
        ws_writer_list = []
        ws_reader_list = []
        error_list = []
        ad_str = f" - {ad}" if ad else ""
        print(f"Processing {name}{ad_str}: {str(idx+1)} of {str(len(anvil_ws_list))}")

        # If AD on workspace, pull AD group membership
        if ad:
            response = requests.get(
                url=f"https://api.firecloud.org/api/groups/{ad}",
                headers={"Authorization": f"Bearer {creds.token}"}
            )
            if response.status_code != 200:
                error_list.append("Error retrieving auth domain membership")
                anv_admins_role = "Unknown"
                anv_devs_role = "Unknown"
            else:
                group_acl = response.json()
                # Determine roles/presence of anvil-admins, anvil_devs, and dbgap groups
                for admin in group_acl["adminsEmails"]:
                    if admin == "anvil-admins@firecloud.org":
                        anv_admins_role = "Admin"
                    elif admin == "AnVIL_Devs@firecloud.org":
                        anv_devs_role = "Admin"
                    elif "dbgap_anvil" in admin.lower() or "dbgap-authorized" in admin.lower():
                        dbgap_group_list.append(admin)
                    elif "duos-" in admin.lower():
                        duos_group_list.append(admin)
                    else:
                        other_ad_admin_list.append(admin)
                for member in group_acl["membersEmails"]:
                    if member == "anvil-admins@firecloud.org" and anv_admins_role != "Admin":
                        anv_admins_role = "Member"
                    elif member == "AnVIL_Devs@firecloud.org" and anv_devs_role != "Admin":
                        anv_devs_role = "Member"
                    elif "dbgap_anvil" in member.lower() or "dbgap-authorized" in member.lower():
                        dbgap_group_list.append(member)
                    elif "duos-" in member.lower():
                        duos_group_list.append(member)
                    else:
                        other_ad_member_list.append(member)
        else:
            ad_ws_role = "N/A"
            anv_admins_role = "N/A"
            anv_devs_role = "N/A"
        duos_group_str = ", ".join(duos_group_list) 
        dbgap_group_str = ", ".join(dbgap_group_list)
        ad_elevated_permissions = False
        other_ad_str = ""
        if other_ad_admin_list:
            other_ad_str += "ADMIN - " + ", ".join(other_ad_admin_list) + "; "
            ad_elevated_permissions = True
        if other_ad_member_list:
            other_ad_str += "MEMBER - " + ", ".join(other_ad_member_list) + "; "

        # Pull workspace ACLs
        elevated_permissions = False
        response = requests.get(
            url=f"https://api.firecloud.org/api/workspaces/{namespace}/{name}/acl",
            headers={"Authorization": f"Bearer {creds.token}"}
        )
        if response.status_code != 200:
            error_list.append("Error retrieving workspace ACL")
            ad_ws_role = "Unknown"
            anv_admins_ws_role = "Unknown"
            ws_member_str = ""
        else:
            ws_acl = response.json()
            for key, val in ws_acl["acl"].items():
                if ad and ad in key:
                    ad_ws_role = val["accessLevel"]
                else:
                    if val["accessLevel"] == "OWNER":
                        ws_owner_list.append(key)
                    elif val["accessLevel"] == "WRITER":
                        ws_writer_list.append(key)
                    elif val["accessLevel"] == "READER":
                        ws_reader_list.append(key)
            ws_member_str = ""
            if ws_owner_list:
                ws_member_str += "OWNER - " + ", ".join(ws_owner_list) + "; "
                for ws_owner in ws_owner_list:
                    if ws_owner not in ["anvil-admins@firecloud.org", "public-workspace-creators@firecloud.org"]:
                        elevated_permissions = True
                        break
            if ws_writer_list:
                ws_member_str += "WRITER - " + ", ".join(ws_writer_list) + "; "
                elevated_permissions = True
            if ws_reader_list:
                ws_member_str += "READER - " + ", ".join(ws_reader_list) + "; "
            if "anvil-admins@firecloud.org" in ws_owner_list:
                anv_admins_ws_role = "OWNER"
            elif "anvil-admins@firecloud.org" in ws_writer_list:
                anv_admins_ws_role = "WRITER"
            elif "anvil-admins@firecloud.org" in ws_reader_list:
                anv_admins_ws_role = "READER"            

        # Pull requester pays status
        requester_pays = False
        response = requests.get(
            url=f"https://api.firecloud.org/api/workspaces/{namespace}/{name}?fields=bucketOptions",
            headers={"Authorization": f"Bearer {creds.token}"}
        )
        if response.status_code != 200:
            error_list.append("Error retrieving requester pays status")
        else:
            ws_details = response.json()
            if ws_details.get("bucketOptions"):
                requester_pays = ws_details["bucketOptions"]["requesterPays"]
        
        # Pull workspace tags
        released = False
        consortium = ""
        phs_id = ""
        consent = ""
        other_tags = ""
        response = requests.get(
            url=f"https://api.firecloud.org/api/workspaces/{namespace}/{name}/tags",
            headers={"Authorization": f"Bearer {creds.token}"}
        )
        if response.status_code != 200:
            error_list.append("Error retrieving workspace tags.")
            released = None
        else:
            try:
                for item in response.json():
                    if item == "AnVIL Status: Released":
                        released = True
                    elif "consortium:" in item.lower():
                        consortium = item.split(":")[1].strip()
                    elif "dbgap:" in item.lower():
                        phs_id = item.split(":")[1].strip()
                    elif "consent_code:" in item.lower():
                        consent = item.split(":")[1].strip()
                    other_tags += f"'{item}', "
            except:
                error_list.append("Error parsing workspace tags.")
        
        # If Azure workspace, pull additional details
        if namespace == "AnVILDataStorage_Azure":
            platform = "azure"
            project = "https://lzb34bb58bfb122730765416.blob.core.windows.net/"
            ws_resources = requests.get(
                url=f"https://workspace.dsde-prod.broadinstitute.org/api/workspaces/v1/{workspace_id}/resources?offset=0&limit=10&resource=AZURE_STORAGE_CONTAINER",
                headers={"Authorization": f"Bearer {creds.token}"}
            ).json()
            for resource_entry in ws_resources["resources"]:
                if resource_entry["resourceAttributes"]["azureStorageContainer"]["storageContainerName"][0:3] == "sc-":
                    bucket = resource_entry["resourceAttributes"]["azureStorageContainer"]["storageContainerName"]
                    break
        else:
            platform = "gcp"
        
        # Record results
        status = "Success" if not error_list else "Errors"
        error_str = "; ".join(error_list)
        results.append([name, workspace_id, namespace, platform, project, bucket, ad, ad_ws_role, anv_admins_role, anv_devs_role, duos_group_str, dbgap_group_str, other_ad_str, ad_elevated_permissions, public, locked, ws_member_str, anv_admins_ws_role, elevated_permissions, phs_id, consent, consortium, other_tags, requester_pays, status, error_str])

    # Display results
    print("\nAll Workspaces Report Results: ")
    results_df = pd.DataFrame(results, columns = ["workspace", "workspace_id", "billing_project", "cloud_platform", "cloud_project", "bucket", "auth_domain", "ad_workspace_role", "anvil_adms_ad_role", "anvil_devs_ad_role", "duos_groups_in_ad", "dbgap_groups_in_ad", "other_ad_members", "ad_elevated_permissions_flag", "workspace_public", "workspaced_locked", "workspace_members", "anvil_adms_ws_role", "elevated_permissions_flag", "phs_id", "consent_name", "consortium", "all_workspace_tags", "requester_pays", "status", "errors"])
    results_df_sorted = results_df.sort_values(by=["workspace"], ignore_index=True)
    display(results_df_sorted)
    
    # Output 
    output = []
    for entry in results:
        for grp in entry[11].split(","):
            if grp:
                output.append([grp, entry[6]])
    return output

def gen_snapshot_report(snapshot_id_list):
    
    print("Generating All Snapshots Report:")
    # Establish credentials
    creds, project = google.auth.default()
    auth_req = google.auth.transport.requests.Request()
    creds.refresh(auth_req)

    # Collect list of AnVIL snapshots to process and their auth domains
    billing_profile_list = ["e0e03e48-5b96-45ec-baa4-8cc1ebf74c61", "9ee23bed-b46c-4561-9103-d2a723113f7f"]
    snapshot_list = []
    snapshots = requests.get(
        url=f"https://data.terra.bio/api/repository/v1/snapshots?offset=0&limit=5000",
        headers={"Authorization": f"Bearer {creds.token}"}
    ).json()
    for snapshot in snapshots["items"]:
        include_ss = False
        if snapshot_id_list:
            if snapshot["id"] in snapshot_id_list:
                include_ss = True
        else:
            if snapshot["profileId"] in billing_profile_list:
                include_ss = True
        if include_ss:
            snapshot_list.append([snapshot["id"], snapshot["name"], snapshot["profileId"], snapshot["cloudPlatform"], snapshot["resourceLocks"]["exclusive"], snapshot["duosId"], snapshot["phsId"], snapshot["consentCode"], snapshot["dataProject"]])

    # Loop through AnVIL snapshots and collect additional report information
    results = []
    for idx, snapshot in enumerate(snapshot_list):

        # Initialize
        snapshot_id = snapshot[0]
        name = snapshot[1]
        billing_profile = snapshot[2]
        cloud_platform = snapshot[3]
        cloud_project = snapshot[8]
        duos_id = snapshot[5]
        phs_id = snapshot[6]
        consent = snapshot[7]
        snapshot_locked = True if snapshot[4] else False
        snapshot_public = False
        ad_list = []
        ad = None
        ad_ss_role = "NO ACCESS"
        anv_admins_role = "No Role"
        anv_devs_role = "No Role"
        anv_admins_ds_role = "No Role"
        anv_admins_ss_role = "No Role"
        dbgap_group_list = []
        duos_group_list = []
        other_ad_admin_list = []
        other_ad_member_list = []
        ss_full_steward_list = []
        ss_steward_list = []
        ss_full_reader_list = []
        ss_reader_list = []
        snapshot_dataset_id = ""
        snapshot_dataset_project = ""
        snapshot_dataset_sa = ""
        ds_custodian_list = []
        ds_steward_list = []
        ds_snapshotter_list = []
        error_list = []
        print(f"Processing {name}: {str(idx+1)} of {str(len(snapshot_list))}")

        # Pull snapshot dataset details and policies
        snapshot_details = requests.get(
            url=f"https://data.terra.bio/api/repository/v1/snapshots/{snapshot_id}",
            headers={"Authorization": f"Bearer {creds.token}"}
        ).json()
        snapshot_dataset_id = snapshot_details["source"][0]["dataset"]["id"]
        snapshot_dataset_project = snapshot_details["source"][0]["dataset"]["dataProject"] 
        if snapshot_dataset_project and cloud_platform == "gcp":
            snapshot_dataset_sa = f"tdr-ingest-sa@{snapshot_dataset_project}.iam.gserviceaccount.com" 
        dataset_policies = requests.get(
            url=f"https://data.terra.bio/api/repository/v1/datasets/{snapshot_dataset_id}/policies",
            headers={"Authorization": f"Bearer {creds.token}"}
        ).json() 
        
        for policy in dataset_policies["policies"]:
            if policy["name"] == "custodian":
                ds_custodian_list = policy["members"]
            elif policy["name"] == "steward":
                ds_steward_list = policy["members"]
            elif policy["name"] == "snapshot_creator":
                ds_snapshotter_list = policy["members"]
        ds_member_str = ""
        if ds_steward_list:
            ds_member_str += "STEWARD - " + ", ".join(ds_steward_list) + "; "
        if ds_custodian_list:
            ds_member_str += "CUSTODIAN - " + ", ".join(ds_custodian_list) + "; "
        if ds_snapshotter_list:
            ds_member_str += "SNAPSHOT_CREATOR - " + ", ".join(ds_snapshotter_list) + "; "
        if "anvil-admins@firecloud.org" in ds_steward_list:
            anv_admins_ds_role = "STEWARD"
        elif "anvil-admins@firecloud.org" in ds_custodian_list:
            anv_admins_ds_role = "CUSTODIAN"
        elif "anvil-admins@firecloud.org" in ds_snapshotter_list:
            anv_admins_ds_role = "SNAPSHOT_CREATOR" 
        
        # Pull snapshot auth domains (and other policies)
        elevated_permissions = False
        snapshot_policies = requests.get(
            url=f"https://data.terra.bio/api/repository/v1/snapshots/{snapshot_id}/policies",
            headers={"Authorization": f"Bearer {creds.token}"}
        ).json()
        ad_list = snapshot_policies["authDomain"]
        ad_full_email_list = []
        for ad in ad_list:
            ad_full_email_list.append(ad + "@firecloud.org")
        for policy in snapshot_policies["policies"]:
            if policy["name"] == "steward":
                for member in policy["members"]:
                    ss_full_steward_list.append(member)
                    if "DUOS" not in member and "policy-" not in member and member not in ad_full_email_list:
                        ss_steward_list.append(member)
            elif policy["name"] == "reader":
                for member in policy["members"]:
                    ss_full_reader_list.append(member)
                    if "DUOS" not in member and "policy-" not in member and member not in ad_full_email_list:
                        ss_reader_list.append(member)
        ss_member_str = ""
        if ss_steward_list:
            ss_member_str += "STEWARD - " + ", ".join(ss_steward_list) + "; "
            for ss_steward in ss_steward_list:
                if ss_steward not in ["anvil-admins@firecloud.org"]:
                    elevated_permissions = True
                    break
        if ss_reader_list:
            ss_member_str += "READER - " + ", ".join(ss_reader_list) + "; "
        if "anvil-admins@firecloud.org" in ss_steward_list:
            anv_admins_ss_role = "STEWARD"
        elif "anvil-admins@firecloud.org" in ss_reader_list:
            anv_admins_ss_role = "READER"

        # Pull snapshot public status
        public_response = requests.get(
            url=f"https://sam.dsde-prod.broadinstitute.org/api/resources/v2/datasnapshot/{snapshot_id}/policies/reader/public",
            headers={"Authorization": f"Bearer {creds.token}"},
        )
        if public_response.text == "true":
            snapshot_public = True

        # Process each AD on snapshot
        if ad_list:
            for ad in ad_list:
                # Initialize AD specific variables
                ad_ss_role = "NO ACCESS"
                anv_admins_role = "No Role"
                anv_devs_role = "No Role"
                dbgap_group_list = []
                duos_group_list = []
                other_ad_admin_list = []
                other_ad_member_list = []
                ad_error_list = error_list.copy()
                ad_email = ad + "@firecloud.org" 
                if ad_email in ss_full_steward_list:
                    ad_ss_role = "STEWARD"
                elif ad_email in ss_full_reader_list:
                    ad_ss_role = "READER"
                # Pull AD group members    
                response = requests.get(
                    url=f"https://api.firecloud.org/api/groups/{ad}",
                    headers={"Authorization": f"Bearer {creds.token}"}
                )
                if response.status_code != 200:
                    ad_error_list.append("Error retrieving auth domain membership")
                    anv_admins_role = "Unknown"
                    anv_devs_role = "Unknown"
                else:
                    group_acl = response.json()
                    # Determine roles/presence of anvil-admins, anvil_devs, and dbgap groups
                    for admin in group_acl["adminsEmails"]:
                        if admin == "anvil-admins@firecloud.org":
                            anv_admins_role = "Admin"
                        elif admin == "AnVIL_Devs@firecloud.org":
                            anv_devs_role = "Admin"
                        elif "dbgap_anvil" in admin.lower() or "dbgap-authorized" in admin.lower():
                            dbgap_group_list.append(admin)
                        elif "duos-" in admin.lower():
                            duos_group_list.append(admin)
                        else:
                            other_ad_admin_list.append(admin)
                    for member in group_acl["membersEmails"]:
                        if member == "anvil-admins@firecloud.org" and anv_admins_role != "Admin":
                            anv_admins_role = "Member"
                        elif member == "AnVIL_Devs@firecloud.org" and anv_devs_role != "Admin":
                            anv_devs_role = "Member"
                        elif "dbgap_anvil" in member.lower() or "dbgap-authorized" in member.lower():
                            dbgap_group_list.append(member)
                        elif "duos-" in member.lower():
                            duos_group_list.append(member)
                        else:
                            other_ad_member_list.append(member)
                dbgap_group_str = ", ".join(dbgap_group_list)
                duos_group_str = ", ".join(duos_group_list) 
                ad_elevated_permissions = False
                other_ad_str = ""
                if other_ad_admin_list:
                    other_ad_str += "ADMIN - " + ", ".join(other_ad_admin_list) + "; "
                    ad_elevated_permissions = True
                if other_ad_member_list:
                    other_ad_str += "MEMBER - " + ", ".join(other_ad_member_list) + "; "
                # Record AD Results
                status = "Success" if not ad_error_list else "Errors"
                error_str = "; ".join(ad_error_list)
                results.append([name, snapshot_id, billing_profile, cloud_platform, cloud_project, duos_id, ad, ad_ss_role, anv_admins_role, anv_devs_role, duos_group_str, dbgap_group_str, other_ad_str, ad_elevated_permissions, snapshot_public, snapshot_locked, ss_member_str, anv_admins_ss_role, elevated_permissions, phs_id, consent, snapshot_dataset_id, snapshot_dataset_sa, ds_member_str, anv_admins_ds_role, status, error_str])
        else:
            # Record non-AD results
            ad_ss_role = "N/A"
            anv_admins_role = "N/A"
            anv_devs_role = "N/A"
            status = "Success" if not error_list else "Errors"
            error_str = "; ".join(error_list)
            results.append([name, snapshot_id, billing_profile, cloud_platform, cloud_project, duos_id, ad, ad_ss_role, anv_admins_role, anv_devs_role, "", "", "", False, snapshot_public, snapshot_locked, ss_member_str, anv_admins_ss_role, elevated_permissions, phs_id, consent, snapshot_dataset_id, snapshot_dataset_sa, ds_member_str, anv_admins_ds_role, status, error_str])

    # Display results
    print("\nAll Snapshots Report Results: ")
    results_df = pd.DataFrame(results, columns = ["snapshot", "snapshot_id", "billing_profile", "cloud_platform", "cloud_project", "duos_id", "auth_domain", "ad_snapshot_role", "anvil_adms_ad_role", "anvil_devs_ad_role", "duos_groups_in_ad", "dbgap_groups_in_ad", "other_ad_members", "ad_elevated_permissions_flag", "snapshot_public", "snapshot_locked", "snapshot_members", "anvil_adms_ss_role", "elevated_permissions_flag", "phs_id", "consent_name", "dataset_id", "dataset_service_account", "dataset_members", "anvil_adms_ds_role", "status", "errors"])
    results_df_sorted = results_df.sort_values(by=["snapshot"], ignore_index=True)
    display(results_df_sorted)
    
    # Output 
    output = []
    for entry in results:
        for grp in entry[11].split(","):
            if grp:
                output.append([grp, entry[6]])
    return output

def gen_dbgap_report(dbgap_telemetry_group_list, tele_ad_map):
    
    print("Generating All dbGaP Telemetry Groups Report:")
    # Establish credentials
    creds, project = google.auth.default()
    auth_req = google.auth.transport.requests.Request()
    creds.refresh(auth_req)
    
    # Augment user-supplied dbgap telemetry groups with those present in the tele_ad_map
    for entry in tele_ad_map:
        grp_name = entry[0].replace("@firecloud.org", "")
        grp_found = False
        for existing_grp in dbgap_telemetry_group_list:
            if grp_name.lower() == existing_grp.lower():
                grp_found = True
                break
        if not grp_found:
            dbgap_telemetry_group_list.append(grp_name)

    # Loop through dbgap telemetry groups
    dbgap_results = []
    for grp in dbgap_telemetry_group_list:

        group_exists = False
        group_in_ads_list = []

        # Confirm group exists
        response = requests.get(
            url=f"https://sam.dsde-prod.broadinstitute.org/api/groups/v1/{grp}",
            headers={"Authorization": f"Bearer {creds.token}"}
        )
        if response.status_code == 200:
            group_exists = True

        # Loop through workspace access results to determine which ADs the group is in
        ad_set = set()
        if group_exists:
            if tele_ad_map:
                for map_entry in tele_ad_map:
                    if grp.lower() in map_entry[0].lower():
                        ad_set.add(map_entry[1])
            else:
                group_in_ads_list.append("Unknown - WS and SS Reports Not Run")

        # Record results
        dbgap_results.append([grp, group_exists, list(ad_set)])

    print("\nAll dbGaP Telemetry Groups Report Results: ")
    dbgap_results_df = pd.DataFrame(dbgap_results, columns = ["group", "group_exists", "group_in_auth_domains"])
    display(dbgap_results_df)   
    
def gen_duos_report(duos_token):
    
    print("Generating All DUOS Registrations Report:")
    # Pull a list of existing AnVIL studies and datasets from DUOS
    url = "https://consent.dsde-prod.broadinstitute.org"
    results = []
    datasets = requests.get(
        url=f"{url}/api/dataset/v3",
        headers={"Authorization": f"Bearer {duos_token}"}
    ).json()
    datasets_to_process = len(datasets)
    datasets_processed = 0
    for dataset_entry in datasets:
        datasets_processed += 1
        print(f"Processing dataset {datasets_processed} of {datasets_to_process}...")
        dataset_id = dataset_entry["dataset_id"]
        dataset_details = requests.get(
            url=f"{url}/api/dataset/v2/{dataset_id}",
            headers={"Authorization": f"Bearer {duos_token}"}
        ).json() 
        if dataset_details.get("study"):
            study_id = dataset_details["study"]["studyId"]
            if dataset_details["study"].get("description") and "Platform: AnVIL" in dataset_details["study"]["description"]: 
                study_name = dataset_details["study"]["name"]
                study_phs = ""
                for prop_entry in dataset_details["study"]["properties"]:
                    if prop_entry["key"] == "dbGaPPhsID":
                        study_phs = prop_entry["value"]
                        break
                dataset_name = dataset_details["name"]
                dataset_identifier = dataset_details["datasetIdentifier"]
                dac_id = dataset_details.get("dacId") if dataset_details.get("dacId") else ""
                data_use = dataset_details.get("dataUse")
                du_gru = data_use.get("generalUse") if data_use.get("generalUse") else False
                du_hmb = data_use.get("hmbResearch") if data_use.get("hmbResearch") else False
                du_disease = data_use.get("diseaseRestrictions") if data_use.get("diseaseRestrictions") else []
                du_poa = data_use.get("populationOriginsAncestry") if data_use.get("populationOriginsAncestry") else False
                du_ethics = data_use.get("ethicsApprovalRequired") if data_use.get("ethicsApprovalRequired") else False
                du_collab = data_use.get("collaboratorRequired") if data_use.get("collaboratorRequired") else False
                du_geog = data_use.get("geographicalRestrictions") if data_use.get("geographicalRestrictions") else ""
                du_genetic = data_use.get("geneticStudiesOnly") if data_use.get("geneticStudiesOnly") else False
                du_pub = data_use.get("publicationResults") if data_use.get("publicationResults") else False
                du_nmds = data_use.get("methodsResearch") if data_use.get("methodsResearch") else False
                du_npu = data_use.get("nonProfitUse") if data_use.get("nonProfitUse") else False
                du_other = data_use.get("other") if data_use.get("other") else ""
                access_management = ""
                snapshot_id = ""
                for prop_entry in dataset_details["properties"]:
                    if prop_entry["propertyName"] == "URL":
                        snapshot_url = prop_entry["propertyValue"]
                        if snapshot_url and "https://data.terra.bio/snapshots/" in snapshot_url:
                                snapshot_id = snapshot_url.replace("https://data.terra.bio/snapshots/", "")   
                    elif prop_entry["propertyName"] == "Access Management":
                        access_management = prop_entry["propertyValue"]
                results.append([study_id, study_name, study_phs, dataset_id, dataset_identifier, dataset_name, dac_id, access_management, du_gru, du_hmb, du_disease, du_poa, du_ethics, du_collab, du_geog, du_genetic, du_pub, du_nmds, du_npu, du_other, snapshot_id])

    # Display results
    df_results = pd.DataFrame(results, columns = ["Study ID", "Study Name", "Study PHS", "Dataset ID", "Dataset Identifier", "Dataset Name", "DAC ID", "Access", "GRU", "HMB", "DS", "POA", "IRB", "COL", "GS", "GSO", "PUB", "NMDS", "NPU", "OTHER", "Snapshot ID"])
    df_results_sorted = df_results.sort_values(by=["Study ID", "Dataset ID"], ascending=[True, True], ignore_index=True)
    print("\nDUOS AnVIL Datasets: ")
    display(df_results)
    
def gen_jira_datasets_report(user, api_key):
    
    print("Generating All Dataset Tickets Report:")
    # Establish Jira instance and collect results
    jira = JIRA("https://broadworkbench.atlassian.net", basic_auth=(user, api_key))
    active_anvil_datasets = []
    inactive_anvil_datasets = []
    start_at_param = 0
    while True:
        search_results = jira.search_issues("project = ANVIL and issuetype = Dataset order by summary", maxResults=100, startAt=start_at_param)
        if len(search_results) == 0:
            break
        else:
            for issue in search_results:
                key = issue.key
                summary = issue.fields.summary
                status = issue.fields.status
                access_management = issue.fields.customfield_10578.value if issue.fields.customfield_10578 else None
                duos_identifier = issue.fields.customfield_10585 
                released_in_duos = issue.fields.customfield_10597.value if issue.fields.customfield_10597 else None
                released_in_tdr = issue.fields.customfield_10593.value if issue.fields.customfield_10593 else None
                released_in_explorer = issue.fields.customfield_10592.value if issue.fields.customfield_10592 else None
                data_access_control_group = issue.fields.customfield_10584
                dbgap_accession = issue.fields.customfield_10579
                dbgap_consent_group = issue.fields.customfield_10615
                dbgap_consent_code = issue.fields.customfield_10576
                dbgap_dac = issue.fields.customfield_10601 
                dbgap_terra_group = issue.fields.customfield_10598
                staging_workspace = issue.fields.customfield_10582
                staging_buckets = issue.fields.customfield_10617
                tdr_gcp_dataset = issue.fields.customfield_10586
                tdr_gcp_snapshot = issue.fields.customfield_10587
                tdr_az_dataset = issue.fields.customfield_10588
                tdr_az_snapshot = issue.fields.customfield_10589
                size = issue.fields.customfield_10599
                map_spec = issue.fields.customfield_10614
                data_on_clouds = issue.fields.customfield_10602
                data_on_clouds_list = []
                if data_on_clouds:
                    for doc_value in data_on_clouds:
                         data_on_clouds_list.append(str(doc_value.value))
                data_on_clouds_str = ", ".join(data_on_clouds_list)
                if str(status) != "Wont Do":
                    active_anvil_datasets.append([key, summary, status, access_management, duos_identifier, released_in_duos, released_in_tdr, released_in_explorer, data_access_control_group, dbgap_accession, dbgap_consent_group, dbgap_consent_code, dbgap_dac, dbgap_terra_group, staging_workspace, tdr_gcp_dataset, tdr_gcp_snapshot, tdr_az_dataset, tdr_az_snapshot, size, map_spec, data_on_clouds_str, staging_buckets])
                else:
                    inactive_anvil_datasets.append([key, summary, status, access_management, duos_identifier, released_in_duos, released_in_tdr, released_in_explorer, data_access_control_group, dbgap_accession, dbgap_consent_group, dbgap_consent_code, dbgap_dac, dbgap_terra_group, staging_workspace, tdr_gcp_dataset, tdr_gcp_snapshot, tdr_az_dataset, tdr_az_snapshot, size, map_spec, data_on_clouds_str, staging_buckets])
        start_at_param += len(search_results)

    # Display results
    print("\nANVIL Jira Datasets: ")
    all_anvil_datasets = []
    all_anvil_datasets.extend(active_anvil_datasets)
    all_anvil_datasets.extend(inactive_anvil_datasets)
    all_datasets_df = pd.DataFrame(all_anvil_datasets, columns = ["key", "summary", "status", "access_management", "duos_identifier", "released_in_duos", "released_in_tdr", "released_in_explorer", "data_access_control_group", "dbgap_accession", "dbgap_consent_group", "dbgap_consent_code", "dbgap_dac", "dbgap_terra_group", "staging_workspace", "tdr_gcp_dataset", "tdr_gcp_snapshot", "tdr_az_dataset", "tdr_az_snapshot", "size_tb", "mapping_spec", "data_on_clouds", "staging_buckets"])  
    display(all_datasets_df)

#############################################
## Input Parameters
#############################################

# Specify whether the All Workspaces Report should be run:
run_ws_access_rpt = True

# Specify the workspaces that should be included in the All Workspaces Report, or leave empty to pull all workspaces you have 
# access to in the anvil-datastorage or AnVILDataStorage_Azure billing projects.
workspace_list = [
]

# Specify whether the All Snapshots Report should be run:
run_ss_access_rpt = True

# Specify the snapshots that should be included in the All Snapshots Report, or leave empty to pull all snapshots you have 
# access to in the e0e03e48-5b96-45ec-baa4-8cc1ebf74c61 or 9ee23bed-b46c-4561-9103-d2a723113f7f billing profiles.
snapshot_id_list = [
]

# Specify whether the All dbGaP Telemetry Groups Report should be run:
run_dbgap_telemetry_group_rpt = True

# Specify the groups to include in the All dbGaP Telemetry Groups Report, or leave empty to only pull the groups
# that are currently in use on a workspace and/or snapshot (depending on which of the above reports you run):
dbgap_telemetry_group_list = [
    'dbGaP_AnVIL_phs000298_c1_Autism_HighSeq_DS-ASD',
    'dbGaP_AnVIL_phs000298_c2_Autism_HighSeq_GRU',
    'dbGaP_AnVIL_phs000298_c3_Autism_HighSeq_DS-AOND-MDS',
    'dbGaP_AnVIL_phs000298_c4_Autism_HighSeq_HMB-MDS',
    'dbGaP_AnVIL_phs000711_c1_MendelianGenomics_Ba_GRU',
    'dbGaP_AnVIL_phs000711_c2_MendelianGenomics_Ba_HMB-NPU',
    'dbGaP_AnVIL_phs000711_c3_MendelianGenomics_Ba_HMB-IRB-NPU',
    'dbGaP_AnVIL_phs000693_c1_MendelianGenomics_UW_GRU',
    'dbGaP_AnVIL_phs000693_c2_MendelianGenomics_UW_DS-EP',
    'dbGaP_AnVIL_phs000693_c3_MendelianGenomics_UW_HMB-GSO',
    'dbGaP_AnVIL_phs000693_c4_MendelianGenomics_UW_DS-SL_AN-NPU',
    'dbGaP_AnVIL_phs000693_c5_MendelianGenomics_UW_DS-SASC',
    'dbGaP_AnVIL_phs000693_c6_MendelianGenomics_UW_HMB-NPU',
    'dbGaP_AnVIL_phs000693_c7_MendelianGenomics_UW_DS-SKDYS',
    'dbGaP_AnVIL_phs000693_c8_MendelianGenomics_UW_DS-BAV-IRB-PU',
    'dbGaP_AnVIL_phs000693_c9_MendelianGenomics_UW_HMB',
    'dbGaP_AnVIL_phs000693_c10_MendelianGenomics_UW_DS-BAB',
    'dbGaP_AnVIL_phs000693_c11_MendelianGenomics_UW_DS-SKDYS-RD',
    'dbGaP_AnVIL_phs000693_c12_MendelianGenomics_UW_DS-HSS',
    'dbGaP_AnVIL_phs000693_c13_MendelianGenomics_UW_DS-NDRD',
    'dbGaP_AnVIL_phs000693_c14_MendelianGenomics_UW_DS-PRO',
    'dbGaP_AnVIL_phs000693_c15_MendelianGenomics_UW_DS-GIDIS',
    'dbGaP_AnVIL_phs000693_c16_MendelianGenomics_UW_DS-RCTD',
    'dbGaP_AnVIL_phs000693_c17_MendelianGenomics_UW_DS-HECD',
    'dbGaP_AnVIL_phs000693_c18_MendelianGenomics_UW_DS-LPARS-IRB',
    'dbGaP_AnVIL_phs000693_c19_MendelianGenomics_UW_DS-IPF',
    'dbGaP_AnVIL_phs000693_c20_MendelianGenomics_UW_DS-LIVD',
    'dbGaP_AnVIL_phs000693_c21_MendelianGenomics_UW_DS-HFA',
    'dbGaP_AnVIL_phs000693_c22_MendelianGenomics_UW_DS-IF',
    'dbGaP_AnVIL_phs000693_c23_MendelianGenomics_UW_DS-TC-IRB',
    'dbGaP_AnVIL_phs000693_c24_MendelianGenomics_UW_DS-PLGD-NPU',
    'dbGaP_AnVIL_phs000693_c25_MendelianGenomics_UW_DS-LD',
    'dbGaP_AnVIL_phs000693_c26_MendelianGenomics_UW_DS-TRIC',
    'dbGaP_AnVIL_phs000693_c27_MendelianGenomics_UW_DS-NBIA',
    'dbGaP_AnVIL_phs000693_c28_MendelianGenomics_UW_HMB-IRB',
    'dbGaP_AnVIL_phs000693_c29_MendelianGenomics_UW_DS-CHD-IRB',
    'dbGaP_AnVIL_phs000693_c30_MendelianGenomics_UW_GRU-IRB',
    'dbGaP_AnVIL_phs000693_c31_MendelianGenomics_UW_DS-CHC',
    'dbGaP_AnVIL_phs000693_c32_MendelianGenomics_UW_DS-SHFM',
    'dbGaP_AnVIL_phs000693_c33_MendelianGenomics_UW_DS-CERD',
    'dbGaP_AnVIL_phs000693_c34_MendelianGenomics_UW_DS-BDIS',
    'dbGaP_AnVIL_phs000693_c35_MendelianGenomics_UW_DS-CHDEF',
    'dbGaP_AnVIL_phs000693_c36_MendelianGenomics_UW_DS-LYD',
    'dbGaP_AnVIL_phs000744_c1_MendelianGenomics_Ya_GRU',
    'dbGaP_AnVIL_phs001487_c1_CCDG_TAICHI_DS-MULTIPLE_D',
    'dbGaP_AnVIL_phs001272_c1_BroadCMG_GRU',
    'dbGaP_AnVIL_phs001272_c2_BroadCMG_DS-KRD-RD',
    'dbGaP_AnVIL_phs001272_c3_BroadCMG_HMB-MDS',
    'dbGaP_AnVIL_phs001272_c4_BroadCMG_DS-NIC-EMP-LE',
    'dbGaP_AnVIL_phs001259_c1_VIRGO_AMI_DS-CARD-MDS-G',
    'dbGaP_AnVIL_phs001740_c2_CCDG_SAGE_DS-ASD-RD-IRB',
    'dbGaP_AnVIL_phs001227_c1_CAD_WU_DS-ATHSCL-IRB',
    'dbGaP_AnVIL_phs001227_c2_CAD_WU_GRU-IRB',
    'dbGaP_AnVIL_phs001880_c1_CCDG_Emory_GRU-NPU',
    'dbGaP_AnVIL_phs001871_c1_CCDG_CCC_DS-CAD-IRB',
    'dbGaP_AnVIL_phs001741_c2_CCDG_TASC_DS-ASD-IRB',
    'dbGaP_AnVIL_phs001894_c2_CCDG_HMCA_DS-EAC-PUB-GS',
    'dbGaP_AnVIL_phs002004_c1_CCDG_CAG_DS-AUT',
    'dbGaP_AnVIL_phs001676_c2_CCDG_Autism_SSC_DS-AONDD-IRB',
    'dbGaP_AnVIL_phs001766_c1_CCDG_AGRE_DS-ASD-IRB',
    'dbGaP_AnVIL_phs002042_c1_CCDG_ACEII_GRU',
    'dbGaP_AnVIL_phs002042_c2_CCDG_ACEII_DS-ASD',
    'dbGaP_AnVIL_phs002043_c1_CCDG_NeurogeneticsFe_GRU',
    'dbGaP_AnVIL_phs002043_c2_CCDG_NeurogeneticsFe_DS-AASD',
    'dbGaP_AnVIL_phs002044_c1_CCDG_CATS_DS-ASD-IRB',
    'dbGaP_AnVIL_phs001222_c1_CCDG_T1DGC_DS-DRC-IRB-NP',
    'dbGaP_AnVIL_phs001642_c1_CCDG_IBD_GRU',
    'dbGaP_AnVIL_phs001642_c2_CCDG_IBD_HMB',
    'dbGaP_AnVIL_phs001642_c3_CCDG_IBD_DS-IBD',
    'dbGaP_AnVIL_phs001642_c4_CCDG_IBD_DS-GID',
    'dbGaP_AnVIL_phs001913_c1_CCDG_eMERGE_Northwes_GRU-IRB',
    'dbGaP_AnVIL_phs000235_c1_NCI_CGCI_CMR',
    'dbGaP_AnVIL_phs000235_c2_NCI_CGCI_PC',
    'dbGaP_AnVIL_phs000235_c1_CGCI_DS-CA-MDS',
    'dbGaP_AnVIL_phs000235_c2_CGCI_PCR',
    'dbGaP_AnVIL_phs000235_c1_CGCI_V4_CRGM',
    'dbGaP_AnVIL_phs000235_c2_CGCI_V4_PCR',
    'dbGaP_AnVIL_phs000235_c1_TP_CGCI_DS-CA-MDS',
    'dbGaP_AnVIL_phs000235_c2_TP_CGCI_PCR',
    'dbGaP_AnVIL_phs000235_c3_TP_CGCI_GRU',
    'dbGaP_AnVIL_phs000235_c4_TP_CGCI_GRU-IRB',
    'dbGaP_AnVIL_phs000971_c1_ClinSeq_GRU',
    'dbGaP_AnVIL_phs001011_c1_eMERGE_CCHMC_IIIA_GRU-IRB-NPU',
    'dbGaP_AnVIL_phs001395_c1_TOPMed_WGS_HCHS_SOL_HMB-NPU',
    'dbGaP_AnVIL_phs001395_c2_TOPMed_WGS_HCHS_SOL_HMB',
    'dbGaP_AnVIL_phs001489_c1_CCDG_Epi25_DS-EPSBAID-MD',
    'dbGaP_AnVIL_phs001489_c2_CCDG_Epi25_DS-EPSBA-MDS-',
    'dbGaP_AnVIL_phs001489_c3_CCDG_Epi25_DS-EPSBACID-M',
    'dbGaP_AnVIL_phs001489_c4_CCDG_Epi25_DS-EPCOM-MDS-',
    'dbGaP_AnVIL_phs001489_c5_CCDG_Epi25_DS-EPSBACID-N',
    'dbGaP_AnVIL_phs001489_c6_CCDG_Epi25_DS-EPI-MULTI-',
    'dbGaP_AnVIL_phs001489_c7_CCDG_Epi25_DS-EPASM-MDS',
    'dbGaP_AnVIL_phs001489_c8_CCDG_Epi25_HMB-NPU-MDS',
    'dbGaP_AnVIL_phs001489_c9_CCDG_Epi25_DS-EPASM-MDS-',
    'dbGaP_AnVIL_phs001489_c10_CCDG_Epi25_DS-EP',
    'dbGaP_AnVIL_phs001489_c11_CCDG_Epi25_HMB-MDS',
    'dbGaP_AnVIL_phs001489_c12_CCDG_Epi25_GRU-IRB',
    'dbGaP_AnVIL_phs001489_c13_CCDG_Epi25_GRU',
    'dbGaP_AnVIL_phs001489_c14_CCDG_Epi25_DS-CARNEU-MDS',
    'dbGaP_AnVIL_phs001489_c15_CCDG_Epi25_DS-SEIZD',
    'dbGaP_AnVIL_phs001489_c16_CCDG_Epi25_DS-EP-MDS',
    'dbGaP_AnVIL_phs001489_c17_CCDG_Epi25_DS-EP-NPU',
    'dbGaP_AnVIL_phs001489_c18_CCDG_Epi25_EPILEPSY_MULT',
    'dbGaP_AnVIL_phs001489_c19_CCDG_Epi25_HMB',
    'dbGaP_AnVIL_phs001489_c20_CCDG_Epi25_DS-EPI-ADULT-',
    'dbGaP_AnVIL_phs001489_c21_CCDG_Epi25_GRU-NPU',
    'dbGaP_AnVIL_phs001489_c22_CCDG_Epi25_DS-EAED-MDS',
    'dbGaP_AnVIL_phs001489_c23_CCDG_Epi25_DS-NEUROLOGY-',
    'dbGaP_AnVIL_phs001489_c24_CCDG_Epi25_DS-EARET-MDS',
    'dbGaP_AnVIL_phs001489_c25_CCDG_Epi25_DS-NPD-IRB-NP',
    'dbGaP_AnVIL_phs001489_c26_CCDG_Epi25_DS-NEUROLOGY-',
    'dbGaP_AnVIL_phs001489_c27_CCDG_Epi25_HMB-IRB-MDS',
    'dbGaP_AnVIL_phs001592_c1_CCDG_ATVB_DS-CVD',
    'dbGaP_AnVIL_phs001901_c1_CCDG_CoronaryPronePe_DS-CVD-MDS',
    'dbGaP_AnVIL_phs002018_c1_CCDG_PartnersBiobank_HMB-MDS',
    'dbGaP_AnVIL_phs002032_c1_NeuronalPhenotypes_GRU',
    'dbGaP_AnVIL_phs002032_c2_NeuronalPhenotypes_DS-SMA-MDS',
    'dbGaP_AnVIL_phs002041_c1_Schizophrenia_Bipola_GRU',
    'dbGaP_AnVIL_phs002041_c2_Schizophrenia_Bipola_DS-MLHLTH-MDS',
    'dbGaP_AnVIL_phs002041_c3_Schizophrenia_Bipola_DS-SZRD-MDS',
    'dbGaP_AnVIL_phs002205_c1_CCDG_IBD_GMbC_Exomes_GRU',
    'dbGaP_AnVIL_phs002205_c2_CCDG_IBD_GMbC_Exomes_GRU-NPU',
    'dbGaP_AnVIL_phs002206_c1_GenomicAnswersforKid_DS-PEDD-IRB',
    'dbGaP_AnVIL_phs002242_c1_CCDG_SWISS_AF_DS-CCSD-NPU-M',
    'dbGaP_AnVIL_phs002243_c1_CCDG_PEGASUS_TIMI_54_HMB-MDS',
    'dbGaP_AnVIL_phs002324_c1_CSER_DiagnosisPediat_HMB',
    'dbGaP_AnVIL_phs002324_c2_CSER_DiagnosisPediat_BD-GC',
    'dbGaP_AnVIL_phs001398_c1_CCDG_BRAVE_GRU',
    'dbGaP_AnVIL_phs002509_c1_CCDG_GASD_GRU-NPU',
    'dbGaP_AnVIL_phs002511_c1_CCDG_SPARK_SFARI_GRU-IRB-PUB',
    'dbGaP_AnVIL_phs001746_c1_GTEx_VMRs_GRU',
    'dbGaP_AnVIL_phs002245_c1_COVID19_Susceptibili_GRU',
    'dbGaP_AnVIL_phs002282_c1_CCDG_GeneDiet_CostaR_DS-CVDRF',
    'dbGaP_AnVIL_phs002325_c1_CCDG_CardiovascularB_DS-HBVD',
    'dbGaP_AnVIL_phs002512_c1_CCDG_SimonsSearchlig_DS-AUT-IRB-RD',
    'dbGaP_AnVIL_phs002307_c1_CSER_SouthSeq_Newbor_GRU',
    'dbGaP_AnVIL_phs000920_c2_TOPMed_WGS_GALAII_DS-LD-IRB-COL',
    'dbGaP_AnVIL_phs000925_c1_PAGE_IPM_BioMe_GRU',
    'dbGaP_AnVIL_phs001033_c1_PAGE_GlobalRP_GRU',
    'dbGaP_AnVIL_phs001211_c1_TOPMed_WGS_ARIC_HMB-IRB',
    'dbGaP_AnVIL_phs001211_c2_TOPMed_WGS_ARIC_DS-CVD-IRB',
    'dbGaP_AnVIL_phs001798_c1_AASP_HGV',
    'dbGaP_AnVIL_phs002502_c1_CCDG_ASD_GRU',
    'dbGaP_AnVIL_phs002502_c2_CCDG_ASD_DS-ASD',
    'dbGaP_AnVIL_phs002502_c10_CCDG_ASD_DS-ASD-RD',
    'dbGaP_AnVIL_phs002502_c11_CCDG_ASD_DS-RARED',
    'dbGaP_AnVIL_phs002502_c12_CCDG_ASD_DS-NDEVRD',
    'dbGaP_AnVIL_phs002502_c3_CCDG_ASD_HMB-MDS',
    'dbGaP_AnVIL_phs002502_c4_CCDG_ASD_GRU-NPU',
    'dbGaP_AnVIL_phs002502_c5_CCDG_ASD_HMB-NPU',
    'dbGaP_AnVIL_phs002502_c6_CCDG_ASD_DS-ASD-NPU',
    'dbGaP_AnVIL_phs002502_c7_CCDG_ASD_DS-ASD-NPU-MD',
    'dbGaP_AnVIL_phs002502_c8_CCDG_ASD_DS-ASD-MDS',
    'dbGaP_AnVIL_phs002502_c9_CCDG_ASD_DS-MHNR-NPU-M',
    'dbGaP_AnVIL_phs000220_c1_PAGE_MEC_CRM',
    'dbGaP_AnVIL_phs000220_c2_PAGE_MEC_GRU',
    'dbGaP_AnVIL_phs002726_c1_CCDG_CardiologyBioba_HMB-MDS',
    'dbGaP_AnVIL_phs002774_c1_CCDG_ENGAGE_AF_TIMI4_DS-CCSD-MDS',
    'dbGaP_AnVIL_phs002774_c2_CCDG_ENGAGE_AF_TIMI4_DS-BBFOD-MDS',
    'dbGaP_AnVIL_phs002235_c1_GMbC_Microbiome_GRU-NPU',
    'dbGaP_AnVIL_phs001300_c1_NABEC_LR_WGS_GRU',
    'dbGaP_AnVIL_phs002110_c1_CSER_NCGENES2_GRU',
    'dbGaP_AnVIL_phs002378_c1_CSER_KidsCanSeq_GRU',
    'dbGaP_AnVIL_phs002111_c1_CSER_CHARM_GRU',
    'dbGaP_AnVIL_phs000971_c1_CSER_ClinSeq_GRU',
    'dbGaP_AnVIL_phs003184_c1_ALS_Compute_Collecti_GRU',
    'dbGaP_AnVIL_phs001585_c1_ALS_FTD_GRU',
    'dbGaP_AnVIL_phs002337_c1_CSER_NYCKIDSEQ_HMB',
    'dbGaP_AnVIL_phs002337_c2_CSER_NYCKIDSEQ_GRU',
    'dbGaP_AnVIL_phs003200_c1_MAS_ISO_seq_DS-MSC-MDS',
    'dbGaP_AnVIL_phs002502_c13_CCDG_ASD_DS-MLHLTH-IRB',
    'dbGaP_AnVIL_phs002502_c14_CCDG_ASD_DS-MBND-MDS',
    'dbGaP_AnVIL_phs001642_c5_CCDG_IBD_HMB-IRB-MDS',
    'dbGaP_AnVIL_phs001642_c6_CCDG_IBD_DS-IBD-MDS',
    'dbGaP_AnVIL_phs001642_c7_CCDG_IBD_HMB-MDS',
    'dbGaP_AnVIL_phs001642_c8_CCDG_IBD_DS-DSDI-MDS',
    'dbGaP_AnVIL_phs001642_c9_CCDG_IBD_DS-GR-IRB-MDS',
    'dbGaP_AnVIL_phs002242_c1_CCDG_SWISS_AF_DS-DCCA-NPU-M',
    'dbGaP_AnVIL_phs002324_c3_CSER_P3EGS_GRU',
    'dbGaP_AnVIL_phs003047_c1_GREGoR_HMB',
    'dbGaP_AnVIL_phs003047_c2_GREGoR_GRU',
    'dbGaP_AnVIL_phs000744_c2_MendelianGenomics_Ya_HMB',
    'dbGaP_AnVIL_phs000744_c3_MendelianGenomics_Ya_DS-RARED',
    'dbGaP_AnVIL_phs000744_c4_MendelianGenomics_Ya_DS-GSD-GSO',
    'dbGaP_AnVIL_phs000744_c5_MendelianGenomics_Ya_DS-MC',
    'dbGaP_AnVIL_phs000744_c6_MendelianGenomics_Ya_HMB-GSO',
    'dbGaP_AnVIL_phs000744_c7_MendelianGenomics_Ya_DS-MCDMS',
    'dbGaP_AnVIL_phs000744_c8_MendelianGenomics_Ya_DS-GD',
    'dbGaP_AnVIL_phs000744_c9_MendelianGenomics_Ya_HMB-IRB',
    'dbGaP_AnVIL_phs000744_c10_MendelianGenomics_Ya_DS-THAL-IRB',
    'dbGaP_AnVIL_phs000744_c11_MendelianGenomics_Ya_DS-RD',
    'dbGaP_AnVIL_phs000744_c12_MendelianGenomics_Ya_DS-BPEAKD',
    'dbGaP_AnVIL_phs002041_c4_Schizophrenia_Bipola_DS-MBND-MDS',
    'dbGaP_AnVIL_phs002041_c5_Schizophrenia_Bipola_DS-SZ-MDS',
    'dbGaP_AnVIL_phs002041_c6_Schizophrenia_Bipola_HMB-MDS',
    'dbGaP_AnVIL_phs001963_c1_ALS_FTD_DementiaSeq_GRU',
    'dbGaP_AnVIL_phs001272_c5_BroadCMG_DS-BFD-MDS',
    'dbGaP_AnVIL_phs001272_c6_BroadCMG_DS-NEUROLOGY-',
    'dbGaP_AnVIL_phs001272_c7_BroadCMG_DS-CVD-MDS',
    'dbGaP_AnVIL_phs001272_c8_BroadCMG_GRU-IRB',
    'dbGaP_AnVIL_phs002032_c3_NeuronalPhenotypes_DS-MBND-MDS',
    'dbGaP_AnVIL_phs001489_c28_CCDG_Epi25_HMB-GSO',
    'dbGaP_AnVIL_phs001489_c29_CCDG_Epi25_DS-NSD-NPU-MD',
    'dbGaP_AnVIL_phs001489_c30_CCDG_Epi25_DS-NSD-ADULTS',
    'dbGaP_AnVIL_phs001489_c31_CCDG_Epi25_DS-NEURO-EP-M',
    'dbGaP_AnVIL_phs001489_c32_CCDG_Epi25_DS-MBND-NPU-M',
    'dbGaP_AnVIL_phs001489_c33_CCDG_Epi25_DS-EAED-IRB-N',
    'dbGaP_AnVIL_phs003444_c1_DepMap_HMB-MDS',
    'dbGaP_AnVIL_phs003537_c1_HudsonAlpha_GRU',
    'dbGaP_AnVIL_phs002041_c1_Schizophrenia_Bipola_DS-MLHLTH-MDS',
    'dbGaP_AnVIL_phs002041_c2_Schizophrenia_Bipola_DS-SZRD-MDS',
    'dbGaP_AnVIL_phs002041_c3_Schizophrenia_Bipola_GRU',
    'dbGaP_AnVIL_phs003184_c1_ALS_Compute_Collecti_HMB',
]

# Specify whether the All DUOS Registrations Report should be run:
run_duos_registrations_rpt = True

# Specify the token that should be used for accessing DUOS production for the All DUOS Registrations Report 
# (you can use gcloud auth print-access-token on the command line to get this):
duos_user_token = ""

# Specify whether the All Dataset Tickets Report should be run:
run_dataset_tickets_rpt = True

# Specify the user and api_key to use for accessing Jira for the All Dataset Tickets Report:
user = "ncalvane@broadinstitute.org"
api_key = ""


#############################################
## Execution
#############################################

tele_ad_map = []
if run_ws_access_rpt:
    ws_output = gen_workspace_report(workspace_list)
    tele_ad_map.extend(ws_output)
if run_ss_access_rpt:
    ss_output = gen_snapshot_report(snapshot_id_list)
    tele_ad_map.extend(ss_output)
if run_dbgap_telemetry_group_rpt:
    gen_dbgap_report(dbgap_telemetry_group_list, tele_ad_map)  
if run_duos_registrations_rpt:
    gen_duos_report(duos_user_token)
if run_dataset_tickets_rpt:
    gen_jira_datasets_report(user, api_key)


## dbGaP Reports
Run a report that will fetch and parse the dbGaP XML into a more user-friendly view, for the attributes that are currently of interest to our team (the PHS ID, Study Name, Consent Codes, presense of AnVIL as a trusted partner, and dbGaP Status). This report can be run on either a specified set of PHS IDs, or on all PHS IDs present in dbGaP.

In [None]:
#############################################
## Functions
#############################################

def format_phs_id(input_str):
    try:
        num = re.search("(phs)?0*([0-9]+)", input_str, re.IGNORECASE).group(2)
    except:
        num = ""
    if num:
        output_str = "phs" + str(num).zfill(6)
    else:
        output_str = ""
    return output_str

def parse_dbgap_xml(study_dict, user_input, phs_id, limit_to_latest_version):
    parsed_results = []
    # Parse XML and pull information of interest
    try:
        if not isinstance(study_dict["dbgapss"]["Study"], list):
            study_dict["dbgapss"]["Study"] = [study_dict["dbgapss"]["Study"]]
    except:
        parsed_results.append([user_input, phs_id, "", "", "", "", "", "Failure", "Error retrieving valid dbGaP XML."])
        return parsed_results
    for study in study_dict["dbgapss"]["Study"]:
        
        # Pull base information
        accession = study["StudyInfo"]["@accession"]
        name = study["StudyInfo"]["StudyNameEntrez"]
        status = study["Status"]["@title"]
        
        # Pull trusted partner information, if available
        anvil_trusted_partner = False
        try:
            if isinstance(study["Policy"]["TrustedPartners"]["TrustedPartner"], list):
                for tp in study["Policy"]["TrustedPartners"]["TrustedPartner"]:
                    if tp["@trp_db_name"] == "AnVIL":
                        anvil_trusted_partner = True
                        break
            else:
                if study["Policy"]["TrustedPartners"]["TrustedPartner"]["@trp_db_name"] == "AnVIL":
                    anvil_trusted_partner = True
        except:
            pass
        
        # Pull consent code and DAC information, if available
        try:
            if isinstance(study["Policy"]["ConsentGroup"], list):
                for idx, consent in enumerate(study["Policy"]["ConsentGroup"]):
                    consent_code = consent["@name"]
                    dac_id = consent["@dac_uid"]
                    dac_name = consent["@dac_name"]
                    parsed_results.append([user_input, phs_id, accession, name, idx+1, consent_code, dac_id, dac_name, anvil_trusted_partner, status, "Success", ""])
            else:
                consent_code = study["Policy"]["ConsentGroup"]["@name"]
                dac_id = study["Policy"]["ConsentGroup"]["@dac_uid"]
                dac_name = study["Policy"]["ConsentGroup"]["@dac_name"]
                parsed_results.append([user_input, phs_id, accession, name, 1, consent_code, dac_id, dac_name, anvil_trusted_partner, status, "Success", ""])
        except:
            parsed_results.append([user_input, phs_id, accession, name, 0, None, None, None, anvil_trusted_partner, status, "Success", ""])
        
        # Break loop if only latest version is of interest
        if limit_to_latest_version: break
    return parsed_results
        
def gen_dbgap_report(phs_list):
    results = []
    dbgap_url = "https://dbgap.ncbi.nlm.nih.gov/ss/dbgapssws.cgi?request=Study&phs="
    if phs_list:
        for phs in phs_list:
            # Retrieve dbGaP XML if exists, and parse out information of interest
            phs_id = format_phs_id(phs)
            if phs_id:
                phs_num = phs_id[3:9]
            else:
                phs_num = "Invalid"
            print(f"Processing input '{phs}' ({phs_id})")
            retry_count = 0
            while retry_count <= 2:
                try:
                    response = requests.get(dbgap_url+phs_num)
                    break
                except:
                    retry_count += 1
            study_dict = xmltodict.parse(response.content)
            output = parse_dbgap_xml(study_dict, phs, phs_id, limit_to_latest_version)
            for entry in output:
                results.append(entry)
    else:
        i = 0
        error_count = 0
        print("Processing all existing dbGaP studies. Note that this can take upwards of 45 minutes to run. To collect information on specific studies, list the studies of interest in the phs_list input parameter.")
        print(f"Start time: {datetime.datetime.now()}")
        # Loop through possible phs IDs, only stopping when 100 sequential IDs yield no results
        for phs in [str(item).zfill(6) for item in list(range(1,1000000))]: 
            # Initialize
            i += 1 
            if error_count > 99: break

            # Retrieve dbGaP XML if exists, and parse out information of interest   
            phs_id = format_phs_id(phs)
            if phs_id:
                phs_num = phs_id[3:9]
            else:
                phs_num = "Invalid" 
            retry_count = 0
            while retry_count <= 2:
                try:
                    response = requests.get(dbgap_url+phs_num)
                    break
                except:
                    retry_count += 1
            study_dict = xmltodict.parse(response.content)
            output = parse_dbgap_xml(study_dict, phs, phs_id, limit_to_latest_version)

            # If no valid results return, increment error count, otherwise, reset to zero
            if len(output) == 1 and output[0][7] == "Failure":
                error_count += 1
            else:
                error_count = 0 
                for entry in output:
                    results.append(entry)
        print(f"End time: {datetime.datetime.now()}")

    # Display results
    print("\ndbGaP Report Results: ")
    results_df = pd.DataFrame(results, columns = ["user_input", "phs_id", "accession", "study_name", "consent_idx", "consent_code", "dac_id", "dac_name", "anvil_trusted_partner", "dbgap_status", "retrieval_status", "errors"])
    display(results_df)

#############################################
## Input Parameters
#############################################

# Specify whether only the latest version of the study should be included in the report:
limit_to_latest_version = True

# Specify the list of PHS numbers (in any format) to generate the report for, or leave empty to generate for all:
phs_list = [
]

#############################################
## Execution
#############################################

gen_dbgap_report(phs_list)


## Bulk Manage AnVIL Resource Locks
List the workspaces and/or snapshots of interest and whether this script should attempt to "LOCK" or "UNLOCK" them.

In [None]:
#############################################
## Functions
#############################################

def update_workspace_lock_status(action, workspace_list):
    results = []
    # Validate action
    print(f"Validating provided action: {action}")
    if action not in ["LOCK", "UNLOCK"]:
        results.append(["ALL", action, "Failure", "Invalid action specified. Must be LOCK or UNLOCK."])
    else:
        # Loop through and process workspaces
        act = action.lower()
        for workspace_entry in workspace_list:

            # Initialize
            acceptable_cloud_types = ['gcp', 'azure']
            cloud_provider = workspace_entry["cloud_provider"]
            workspace = workspace_entry["workspace_name"]
            print(f"Updating workspace lock status for {workspace}.")
            if cloud_provider not in acceptable_cloud_types:
                cloud_types = ', '.join(acceptable_cloud_types)
                results.append([workspace, action, "Failure", f"Cloud provider must be one of {cloud_types}. Cloud provider provided was {cloud_provider}. Aborting."])
                continue
            billing_project = 'anvil-datastorage' if cloud_provider == 'gcp' else 'AnVILDataStorage_Azure'
            error_str = ""

            # Establish credentials
            creds, project = google.auth.default()
            auth_req = google.auth.transport.requests.Request()
            creds.refresh(auth_req)

            # Change workspace lock status
            response = requests.put(
                url=f"https://api.firecloud.org/api/workspaces/{billing_project}/{workspace}/{act}",
                headers={"Authorization": f"Bearer {creds.token}"}
            )
            if response.status_code not in [200, 204]:
                error_str = "Error updating workspace lock status."

            # Record status
            status = "Success" if not error_str else "Failure"
            results.append([workspace, action, status, error_str])

    # Display results
    print("\nResults:")
    results_df = pd.DataFrame(results, columns = ["workspace", "action", "status", "errors"])
    display(results_df)
    
def update_snapshot_lock_status(action, snapshot_id_list):
    results = []
    # Validate action
    print(f"Validating provided action: {action}")
    if action not in ["LOCK", "UNLOCK"]:
        results.append(["ALL", action, "Failure", "Invalid action specified. Must be LOCK or UNLOCK."])
    else:
        # Loop through and process snapshots
        act = action.lower()
        for snapshot_id in snapshot_id_list:

            # Initialize
            print(f"Updating snapshot lock status for snapshot: {snapshot_id}.")
            error_str = ""
            
            # Establish credentials
            creds, project = google.auth.default()
            auth_req = google.auth.transport.requests.Request()
            creds.refresh(auth_req)

            # Update snapshot lock status
            if act == "lock":
                # Lock snapshot
                try:
                    response = requests.put(
                        url=f"https://data.terra.bio/api/repository/v1/snapshots/{snapshot_id}/lock",
                        headers={"Authorization": f"Bearer {creds.token}"}
                    ).json()
                    results.append([snapshot_id, action, "Success", ""])
                except Exception as e: 
                    error_str = f"Error updating snapshot lock status: {str(e)}"
                    print(error_str)
                    results.append([snapshot_id, action, "Failure", error_str])
            else:
                # Fetch exclusive lock from snapshot (if there is one)
                try:
                    snapshot_detail = requests.get(
                        url=f"https://data.terra.bio/api/repository/v1/snapshots/{snapshot_id}",
                        headers={"Authorization": f"Bearer {creds.token}"}
                    ).json()
                    lock_name = snapshot_detail["resourceLocks"].get("exclusive")
                    if lock_name:
                        # Unlock snapshot (if locked)
                        try:
                            request_body = {"lockName": lock_name, "forceUnlock": False}
                            response = requests.put(
                                url=f"https://data.terra.bio/api/repository/v1/snapshots/{snapshot_id}/unlock",
                                headers={"Authorization": f"Bearer {creds.token}"},
                                json=request_body
                            ).json()
                            results.append([snapshot_id, action, "Success", None])
                        except Exception as e: 
                            error_str = f"Error updating snapshot lock status: {str(e)}"
                            print(error_str)
                            results.append([snapshot_id, action, "Failure", error_str])
                    else:
                        results.append([snapshot_id, action, "Success", "No existing lock found on snapshot."])
                except Exception as e:
                    error_str = f"Error retrieving lock on snapshot: {str(e)}"
                    results.append([snapshot_id, action, "Failure", error_str])

    # Display results
    print("\nResults:")
    results_df = pd.DataFrame(results, columns = ["snapshot", "action", "status", "errors"])
    display(results_df)

#############################################
## Input Parameters
#############################################

# Specify the action to apply to the workspaces and/or snapshots (LOCK/UNLOCK):
action = "LOCK"

# Specify the list of workspaces to apply the action to:
workspace_list = [
#    {"workspace_name": "workspace name", "cloud_provider": "Cloud provider - gcp/azure"    }
    {"workspace_name": "AnVIL_TEST_WORKSPACE_AZ", "cloud_provider": "azure"},
    {"workspace_name": "AnVIL_TEST_WORKSPACE_GCP", "cloud_provider": "gcp"}
]

# Specify the list of snapshots to apply the action to:
snapshot_id_list = [
    "snapshot_id",
]

#############################################
## Execution
#############################################

if workspace_list:
    print("Processing provided action for specified workspaces...")
    update_workspace_lock_status(action, workspace_list)
if snapshot_id_list:
    print("Processing provided action for specified workspaces...")
    update_snapshot_lock_status(action, snapshot_id_list)


## Update AnVIL Resource Permissions for Release
Prior to the formal release of a workspace or snapshot, permissions need to be updated to ensure that users do not have any elevated permissions unnecessarily. This script will do the following:
* Confirm that anvil-admins is the only owner/steward on the workspace or snapshot. 
* Confirm that any auth domain group on the workspace or snapshot is a reader on the resources.
* Confirm all other users on the workspace are readers without share or compute permissions. 
* Confirm that anvil-admins is the only admin in the auth domain group. 
* Confirm that anvil_devs is a member in the auth domain group. 
* Confirm that the cloud bucket is set to requester pays (for workspaces only).

In [None]:
#############################################
## Functions
#############################################

def update_workspace_permissions_for_release(workspace_list):
    # Set validation values
    acceptable_cloud_types = ['gcp', 'azure']
    
    # Loop through and process workspaces
    results = []
    for workspace_entry in workspace_list:

        # Initialize
        workspace = workspace_entry['workspace_name']
        print(f"Processing updates for {workspace}.")
        cloud_provider = workspace_entry['cloud_provider']
        if cloud_provider not in acceptable_cloud_types:
            cloud_types = ', '.join(acceptable_cloud_types)
            results.append(
                [workspace, "Failure", f"Cloud provider must be one of {cloud_types}. Cloud provider provided was {cloud_provider}. Aborting workspace deletion."]
            )
            continue
        billing_project = 'anvil-datastorage' if cloud_provider == 'gcp' else 'AnVILDataStorage_Azure'
        error_list = []
        
        # Establish credentials
        creds, project = google.auth.default()
        auth_req = google.auth.transport.requests.Request()
        creds.refresh(auth_req)

        # Pull auth domains and workspace resource ID from workspace attributes
        ad_list = []
        resource_id = ""
        ws_attributes = requests.get(
            url=f"https://api.firecloud.org/api/workspaces/{billing_project}/{workspace}?fields=workspace.attributes,workspace.authorizationDomain,workspace.googleProject,workspace.bucketName,workspace.workspaceId",
            headers={"Authorization": f"Bearer {creds.token}"}
        ).json()
        try:
            resource_id = ws_attributes["workspace"]["workspaceId"]
            for ad in ws_attributes["workspace"]["authorizationDomain"]:
                ad_list.append(ad["membersGroupName"])
        except:
            error_list.append(f"Error accessing workspace")

        # Ensure anvil-admins is an owner on the workspace      
        payload = [{
                "email": "anvil-admins@firecloud.org",
                "accessLevel": "OWNER",
                "canShare": True,
                "canCompute": True
            }]
        response = requests.patch(
            url=f"https://api.firecloud.org/api/workspaces/{billing_project}/{workspace}/acl",
            headers={"Authorization": f"Bearer {creds.token}"},
            json=payload
        )
        if response.status_code != 200:
            error_list.append(f"Error updating workspace ACL ({response.text})")
        
        # Pull existing workspace policies
        response = requests.get(
            url=f"https://api.firecloud.org/api/workspaces/{billing_project}/{workspace}/acl",
            headers={"Authorization": f"Bearer {creds.token}"}
        )
        if response.status_code != 200:
            error_list.append(f"Error retrieving workspace ACL ({response.text})")
            ws_acl = {"acl": {}}
        else:
            ws_acl = response.json()

        # Loop through workspace ACLs and reduce permissions where necessary
        user_processed_list = []
        for key, val in ws_acl["acl"].items():
            user_processed_list.append(key)
            # Determine if the user is yourself, and remove yourself if so
            response = requests.get(
                url=f"https://sam.dsde-prod.broadinstitute.org/register/user/v2/self/info",
                headers={"Authorization": f"Bearer {creds.token}"}
            ).json()
            if key == response["userEmail"]:
                response = requests.delete(
                    url=f"https://sam.dsde-prod.broadinstitute.org/api/resources/v2/workspace/{resource_id}/leave",
                    headers={"Authorization": f"Bearer {creds.token}"}
                )
                if response.status_code != 204:
                    error_list.append("Error updating workspace ACL")
            # Otherwise, reduce permissions for non anvil-admins users
            elif key not in ["anvil-admins@firecloud.org", "public-workspace-creators@firecloud.org"]:
                if val["accessLevel"] != "READER" or val["canCompute"] == True or val["canShare"] == True:
                    payload = [{
                            "email": key,
                            "accessLevel": "READER",
                            "canShare": False,
                            "canCompute": False
                        }]
                    response = requests.patch(
                        url=f"https://api.firecloud.org/api/workspaces/{billing_project}/{workspace}/acl",
                        headers={"Authorization": f"Bearer {creds.token}"},
                        json=payload
                    )
                    if response.status_code != 200:
                        error_list.append(f"Error updating workspace ACL ({response.text})")

        # Add permissions for any listed auth domains not already processed
        for ad in ad_list:
            user = ad + "@firecloud.org"
            if user not in user_processed_list:
                payload = [{
                    "email": user,
                    "accessLevel": "READER",
                    "canShare": False,
                    "canCompute": False
                }]
                response = requests.patch(
                    url=f"https://api.firecloud.org/api/workspaces/{billing_project}/{workspace}/acl",
                    headers={"Authorization": f"Bearer {creds.token}"},
                    json=payload
                )
                if response.status_code != 200:
                    error_list.append(f"Error updating workspace ACL ({response.text})") 

        # Ensure anvil-admins and anvil_devs have appropriate roles in the auth domains
        for auth_domain in ad_list:
            anvil_admins_found = False
            anvil_devs_found = False
            try:
                ad_membership = requests.get(
                    url=f"https://api.firecloud.org/api/groups/{auth_domain}",
                    headers={"Authorization": f"Bearer {creds.token}"}
                ).json()  
                # Add anvil-admins as an admin if not already one
                for user in ad_membership["adminsEmails"]:
                    if user == "anvil-admins@firecloud.org":
                        anvil_admins_found = True
                        break   
                if not anvil_admins_found:
                    response = requests.put(
                        url=f"https://api.firecloud.org/api/groups/{auth_domain}/admin/anvil-admins@firecloud.org",
                        headers={"Authorization": f"Bearer {creds.token}"}
                    )
                    if response.status_code != 204:
                        error_list.append(f"Error adding anvil-admins to auth domain {auth_domain} ({response.text})")
                # Reduce permissions for any other admins (remove from admin and re-add as member)
                for user in ad_membership["adminsEmails"]:
                    if user != "anvil-admins@firecloud.org":
                        response = requests.delete(
                            url=f"https://api.firecloud.org/api/groups/{auth_domain}/admin/{user}",
                            headers={"Authorization": f"Bearer {creds.token}"}
                        )
                        if response.status_code != 204:
                            error_list.append(f"Error removing user {user} as admin from auth domain ({auth_domain})")
                        response = requests.put(
                            url=f"https://api.firecloud.org/api/groups/{auth_domain}/member/{user}",
                            headers={"Authorization": f"Bearer {creds.token}"}
                        )
                        if response.status_code != 204:
                            error_list.append(f"Error add user {user} as member on auth domain {auth_domain} ({response.text})")
                # Add anvil_devs as a member if not already one
                if "anvil_devs@firecloud.org" not in ad_membership["adminsEmails"] and "anvil_devs@firecloud.org" not in ad_membership["membersEmails"]:
                    response = requests.put(
                        url=f"https://api.firecloud.org/api/groups/{auth_domain}/member/anvil_devs@firecloud.org",
                        headers={"Authorization": f"Bearer {creds.token}"}
                    )
                    if response.status_code != 204:
                        error_list.append(f"Error adding anvil_devs to auth domain {auth_domain} ({response.text})")
                
                for user in ad_membership["adminsEmails"]:
                    if user == "anvil-admins@firecloud.org":
                        anvil_admins_found = True
                        break 
            except:
                error_list.append(f"Error retrieving membership for auth domain {auth_domain} ({response.text})")

        # Set GCP workspaces to requester pays
        if cloud_provider == "gcp":
            payload = [{
                "settingType": "GcpBucketRequesterPays",
                "config": {
                  "enabled": True
                }
            }]
            response = requests.put(
                url=f"https://api.firecloud.org/api/workspaces/v2/{billing_project}/{workspace}/settings",
                headers={"Authorization": f"Bearer {creds.token}"},
                json=payload
            )
            if response.status_code != 200:
                error_list.append(f"Error setting workspace to requester pays ({response.text})")
        
        # Record status
        status = "Success" if not error_list else "Failure"
        error_str = "; ".join(error_list)
        results.append([workspace, status, error_str])

    # Display results
    print(f"\nResults:")
    results_df = pd.DataFrame(results, columns = ["workspace", "update_status", "errors"])
    display(results_df) 

def update_snapshot_permissions_for_release(snapshot_id_list):
    # Loop through and process snapshots
    results = []
    for snapshot_id in snapshot_id_list:

        # Initialize
        print(f"Processing updates for {snapshot_id}.")
        error_list = []
        
        # Establish credentials
        creds, project = google.auth.default()
        auth_req = google.auth.transport.requests.Request()
        creds.refresh(auth_req)

        # Ensure anvil-admins is a steward on the snapshot
        payload = {
            "email": "anvil-admins@firecloud.org"
            }
        response = requests.post(
            url=f"https://data.terra.bio/api/repository/v1/snapshots/{snapshot_id}/policies/steward/members",
            headers={"Authorization": f"Bearer {creds.token}"},
            json=payload
        )
        if response.status_code != 200:
            error_list.append("Error updating snapshot policies")
        
        # Pull snapshot auth domains and policies
        ad_list = []
        ss_steward_list = []
        ss_reader_list = []
        snapshot_policies = requests.get(
            url=f"https://data.terra.bio/api/repository/v1/snapshots/{snapshot_id}/policies",
            headers={"Authorization": f"Bearer {creds.token}"}
        ).json()
        try:
            ad_list = snapshot_policies["authDomain"]
            for policy in snapshot_policies["policies"]:
                if policy["name"] == "steward":
                    for member in policy["members"]:
                        ss_steward_list.append(member)
                elif policy["name"] == "reader":
                    for member in policy["members"]:
                        ss_reader_list.append(member)
        except:
            error_list.append(f"Error accessing snapshot policies.")
        
        # Loop through snapshot policies and reduce permissions where necessary
        if "anvil-admins@firecloud.org" in ss_steward_list:
            for ss_steward in ss_steward_list:
                if ss_steward != "anvil-admins@firecloud.org":
                    response = requests.delete(
                        url=f"https://data.terra.bio/api/repository/v1/snapshots/{snapshot_id}/policies/steward/members/{ss_steward}",
                        headers={"Authorization": f"Bearer {creds.token}"}
                    )
                    if response.status_code != 200:
                        error_list.append("Error updating snapshot policies")
                    if ss_steward not in ss_reader_list:
                        payload = {
                            "email": ss_steward
                        }
                        response = requests.post(
                            url=f"https://data.terra.bio/api/repository/v1/snapshots/{snapshot_id}/policies/reader/members",
                            headers={"Authorization": f"Bearer {creds.token}"},
                            json=payload
                        )
                        if response.status_code != 200:
                            error_list.append("Error updating snapshot policies")

        # Add/update permissions for any listed auth domains
        for ad in ad_list:
            payload = {
                "email": ad + "@firecloud.org"
            }
            response = requests.post(
                url=f"https://data.terra.bio/api/repository/v1/snapshots/{snapshot_id}/policies/reader/members",
                headers={"Authorization": f"Bearer {creds.token}"},
                json=payload
            )
            if response.status_code != 200:
                error_list.append("Error updating snapshot policies")

        # Ensure anvil-admins and anvil_devs have appropriate roles in the auth domains
        for auth_domain in ad_list:
            anvil_admins_found = False
            anvil_devs_found = False
            try:
                ad_membership = requests.get(
                    url=f"https://api.firecloud.org/api/groups/{auth_domain}",
                    headers={"Authorization": f"Bearer {creds.token}"}
                ).json()  
                # Add anvil-admins as an admin if not already one
                for user in ad_membership["adminsEmails"]:
                    if user == "anvil-admins@firecloud.org":
                        anvil_admins_found = True
                        break   
                if not anvil_admins_found:
                    response = requests.put(
                        url=f"https://api.firecloud.org/api/groups/{auth_domain}/admin/anvil-admins@firecloud.org",
                        headers={"Authorization": f"Bearer {creds.token}"}
                    )
                    if response.status_code != 204:
                        error_list.append(f"Error adding anvil-admins to auth domain {auth_domain} ({response.text})")
                # Reduce permissions for any other admins (remove from admin and re-add as member)
                for user in ad_membership["adminsEmails"]:
                    if user != "anvil-admins@firecloud.org":
                        response = requests.delete(
                            url=f"https://api.firecloud.org/api/groups/{auth_domain}/admin/{user}",
                            headers={"Authorization": f"Bearer {creds.token}"}
                        )
                        if response.status_code != 204:
                            error_list.append(f"Error removing user {user} as admin from auth domain ({auth_domain})")
                        response = requests.put(
                            url=f"https://api.firecloud.org/api/groups/{auth_domain}/member/{user}",
                            headers={"Authorization": f"Bearer {creds.token}"}
                        )
                        if response.status_code != 204:
                            error_list.append(f"Error add user {user} as member on auth domain {auth_domain} ({response.text})")
                # Add anvil_devs as a member if not already one
                if "anvil_devs@firecloud.org" not in ad_membership["adminsEmails"] and "anvil_devs@firecloud.org" not in ad_membership["membersEmails"]:
                    response = requests.put(
                        url=f"https://api.firecloud.org/api/groups/{auth_domain}/member/anvil_devs@firecloud.org",
                        headers={"Authorization": f"Bearer {creds.token}"}
                    )
                    if response.status_code != 204:
                        error_list.append(f"Error adding anvil_devs to auth domain {auth_domain} ({response.text})")
                
                for user in ad_membership["adminsEmails"]:
                    if user == "anvil-admins@firecloud.org":
                        anvil_admins_found = True
                        break 
            except:
                error_list.append(f"Error retrieving membership for auth domain {auth_domain} ({response.text})")

        # Record status
        status = "Success" if not error_list else "Failure"
        error_str = "; ".join(error_list)
        results.append([snapshot_id, status, error_str])

    # Display results
    print(f"\nResults:")
    results_df = pd.DataFrame(results, columns = ["snapshot_id", "update_status", "errors"])
    display(results_df) 
    
#############################################
## Input Parameters
#############################################

# Specify the list of workspaces to update permissions for
workspace_list = [
#    {"workspace_name": "workspace name", "cloud_provider": "Cloud provider - gcp/azure"    }
    {'workspace_name': 'temp_test', 'cloud_provider': 'gcp'},
]

# Specify the list of snapshots to update permissions for
snapshot_id_list = [
    "snapshot_id",
]

#############################################
## Execution
#############################################

if workspace_list:
    print("Updating workspace permissions...")
    update_workspace_permissions_for_release(workspace_list)
if snapshot_id_list:
    print("Updating snapshot permissions...")
    update_snapshot_permissions_for_release(snapshot_id_list)


## Update AnVIL Resource Permissions for Deletion
Prior to the deletion of a workspace or snapshot, there may be a desire to perform a "scream test" where user access is removed from the workspace or snapshot for a period of time before deletion to see if any issues arise. This script supports this by:
* Ensuring anvil-admins is an owner on the workspace or snapshot.  
* Removing all users outside of the exemption list from the workspace or snapshot.

In [None]:
#############################################
## Functions
#############################################

def update_workspace_permissions_for_deletion(workspace_list, user_exemption_list):
    # Set validation values
    acceptable_cloud_types = ['gcp', 'azure']
    
    # Loop through and process workspaces
    results = []
    for workspace_entry in workspace_list:

        # Set variables
        workspace = workspace_entry['workspace_name']
        cloud_provider = workspace_entry['cloud_provider']
        if cloud_provider not in acceptable_cloud_types:
            cloud_types = ', '.join(acceptable_cloud_types)
            results.append(
                [workspace, "Failure", f"Cloud provider must be one of {cloud_types}. Cloud provider provided was {cloud_provider}. Aborting workspace deletion."]
            )
            continue
        billing_project = 'anvil-datastorage' if cloud_provider == 'gcp' else 'AnVILDataStorage_Azure'

        # Initialize
        print(f"Processing updates for {workspace}.")
        error_list = []
        
        # Establish credentials
        creds, project = google.auth.default()
        auth_req = google.auth.transport.requests.Request()
        creds.refresh(auth_req)
        
        # Pull workspace details
        resource_id = ""
        ws_attributes = requests.get(
            url=f"https://api.firecloud.org/api/workspaces/{billing_project}/{workspace}?fields=workspace.attributes,workspace.authorizationDomain,workspace.googleProject,workspace.bucketName,workspace.workspaceId",
            headers={"Authorization": f"Bearer {creds.token}"}
        ).json()
        try:
            resource_id = ws_attributes["workspace"]["workspaceId"]
        except:
            error_list.append(f"Error accessing workspace.")

        # Pull existing workspace ACLs
        response = requests.get(
            url=f"https://api.firecloud.org/api/workspaces/{billing_project}/{workspace}/acl",
            headers={"Authorization": f"Bearer {creds.token}"}
        )
        if response.status_code != 200:
            error_list.append("Error retrieving workspace ACL")
            ws_acl = {"acl": {}}
        else:
            ws_acl = response.json()

        # Add anvil-admins as an owner, regardless of current status
        payload = [{
                "email": "anvil-admins@firecloud.org",
                "accessLevel": "OWNER",
                "canShare": True,
                "canCompute": True
            }]
        response = requests.patch(
            url=f"https://api.firecloud.org/api/workspaces/{billing_project}/{workspace}/acl",
            headers={"Authorization": f"Bearer {creds.token}"},
            json=payload
        )
        if response.status_code != 200:
            error_list.append("Error updating workspace ACL")
        
        # Loop through workspace ACLs and adjust permissions where required
        user_exemption_list.append("anvil-admins@firecloud.org")
        for key, val in ws_acl["acl"].items():
            if key not in user_exemption_list:
                # Determine if the user is yourself, and remove yourself if so
                response = requests.get(
                    url=f"https://sam.dsde-prod.broadinstitute.org/register/user/v2/self/info",
                    headers={"Authorization": f"Bearer {creds.token}"}
                ).json()
                if key == response["userEmail"]:
                    response = requests.delete(
                        url=f"https://sam.dsde-prod.broadinstitute.org/api/resources/v2/workspace/{resource_id}/leave",
                        headers={"Authorization": f"Bearer {creds.token}"}
                    )
                    if response.status_code != 204:
                        error_list.append("Error updating workspace ACL")
                else:
                    payload = [{
                        "email": key,
                        "accessLevel": "NO ACCESS",
                        "canShare": False,
                        "canCompute": False
                    }]
                    response = requests.patch(
                        url=f"https://api.firecloud.org/api/workspaces/{billing_project}/{workspace}/acl",
                        headers={"Authorization": f"Bearer {creds.token}"},
                        json=payload
                    )
                    if response.status_code != 200:
                        error_list.append("Error updating workspace ACL")

        # Record status
        status = "Success" if not error_list else "Failure"
        error_str = "; ".join(error_list)
        results.append([workspace, status, error_str])

    # Display results
    print(f"\nResults:")
    results_df = pd.DataFrame(results, columns = ["workspace", "update_status", "errors"])
    display(results_df) 
    
def update_snapshot_permissions_for_deletion(snapshot_id_list, user_exemption_list):
    # Loop through and process snapshots
    results = []
    for snapshot_id in snapshot_id_list:

        # Initialize
        print(f"Processing updates for {snapshot_id}.")
        error_list = []

        # Establish credentials
        creds, project = google.auth.default()
        auth_req = google.auth.transport.requests.Request()
        creds.refresh(auth_req)

        # Ensure anvil-admins is a steward on the snapshot
        payload = {
            "email": "anvil-admins@firecloud.org"
            }
        response = requests.post(
            url=f"https://data.terra.bio/api/repository/v1/snapshots/{snapshot_id}/policies/steward/members",
            headers={"Authorization": f"Bearer {creds.token}"},
            json=payload
        )
        if response.status_code != 200:
            error_list.append("Error updating snapshot policies")
        
        # Pull snapshot auth domains and policies
        ad_list = []
        ss_steward_list = []
        ss_reader_list = []
        snapshot_policies = requests.get(
            url=f"https://data.terra.bio/api/repository/v1/snapshots/{snapshot_id}/policies",
            headers={"Authorization": f"Bearer {creds.token}"}
        ).json()
        try:
            ad_list = snapshot_policies["authDomain"]
            for policy in snapshot_policies["policies"]:
                if policy["name"] == "steward":
                    for member in policy["members"]:
                        ss_steward_list.append(member)
                elif policy["name"] == "reader":
                    for member in policy["members"]:
                        ss_reader_list.append(member)
        except:
            error_list.append(f"Error accessing snapshot policies.")

        # Loop through snapshot policies and reduce permissions where necessary
        if "anvil-admins@firecloud.org" in ss_steward_list:
            for ss_steward in ss_steward_list:
                if ss_steward != "anvil-admins@firecloud.org" and ss_steward not in user_exemption_list:
                    response = requests.delete(
                        url=f"https://data.terra.bio/api/repository/v1/snapshots/{snapshot_id}/policies/steward/members/{ss_steward}",
                        headers={"Authorization": f"Bearer {creds.token}"}
                    )
                    if response.status_code != 200:
                        error_list.append("Error updating snapshot policies")
            for ss_reader in ss_reader_list:
                if ss_reader not in user_exemption_list:
                    response = requests.delete(
                        url=f"https://data.terra.bio/api/repository/v1/snapshots/{snapshot_id}/policies/reader/members/{ss_reader}",
                        headers={"Authorization": f"Bearer {creds.token}"}
                    )
                    if response.status_code != 200:
                        error_list.append("Error updating snapshot policies") 

        # Record status
        status = "Success" if not error_list else "Failure"
        error_str = "; ".join(error_list)
        results.append([snapshot_id, status, error_str])

    # Display results
    print(f"\nResults:")
    results_df = pd.DataFrame(results, columns = ["snapshot_id", "update_status", "errors"])
    display(results_df) 

#############################################
## Input Parameters
#############################################

# Specify the list of users whose permissions should not be altered (anvil-admins is included by default)
user_exemption_list = [
]

# Specify the list of workspaces to remove permissions for
workspace_list = [
#    {"workspace_name": "workspace name", "cloud_provider": "Cloud provider - gcp/azure"    }
     {'workspace_name': 'temp_test', 'cloud_provider': 'gcp'},
]

# Specify the list of snapshots to remove permissions for
snapshot_id_list = [
    "snapshot_id",
]

#############################################
## Execution
#############################################

if workspace_list:
    print("Updating workspace permissions...")
    update_workspace_permissions_for_deletion(workspace_list, user_exemption_list)
if snapshot_id_list:
    print("Updating snapshot permissions...")
    update_snapshot_permissions_for_deletion(snapshot_id_list, user_exemption_list)


## Add Authorization Domain Group to Existing Workspace
**AZURE ONLY** 
Adds a data access control group to an existing workspace. This is useful in cases where a data access control group wasn't added at the time of workspace creation and needs to be added, or additional data access control groups need to be added to a workspace (for whatever reason).

In [None]:
#############################################
## Functions
#############################################

def add_data_access_control_group(workspace_list):
    # Loop through and process workspaces
    results = []
    acceptable_cloud_types = ['azure']
    
    for workspace in workspace_list:
        # Initialize 
        name = workspace['workspace_name']
        ad_name = workspace['auth_domain']
        cloud_provider = workspace['cloud_provider']
        error_list = []
        warning_list = []
        
        # Get billing project based on cloud provider
        print(f"Processing workspace {name}...")
        if cloud_provider not in acceptable_cloud_types:
            cloud_types = ', '.join(acceptable_cloud_types)
            err_str = f"Cloud provider must be one of {cloud_types}. Cloud provider provided was {cloud_provider}. Skipping workspace."
            print(err_str)
            results.append([name, "N/A", ad_name, "Failure", err_str, None])
            continue
        billing_project = 'anvil-datastorage' if workspace['cloud_provider'] == 'gcp' else 'AnVILDataStorage_Azure'

        # Establish credentials
        creds, project = google.auth.default()
        auth_req = google.auth.transport.requests.Request()
        creds.refresh(auth_req)

        # Get workspace ID for the workspace
        response = requests.get(
            url=f"https://api.firecloud.org/api/workspaces/{billing_project}/{name}",
            headers={"Authorization": f"Bearer {creds.token}"}
        )
        response_json = response.json()
        if response.status_code != 200:
            results.append([name, billing_project, ad_name, "Failure", "Workspace not found (or user does not have access). Skipping workspace.", None])
            continue
        else:
            workspace_id = response_json["workspace"]["workspaceId"]

        # Validate the specified auth domain group
        if ad_name:
            response = requests.get(
                url=f"https://api.firecloud.org/api/groups/{ad_name}",
                headers={"Authorization": f"Bearer {creds.token}"}
            )
            if response.status_code == 404:
                results.append([name, billing_project, ad_name, "Failure", "Specified auth domain group does not exist. Skipping workspace.", None])
                continue
            elif response.status_code not in (200, 403):
                results.append([name, billing_project, ad_name, "Failure", "Error validating specified auth domain group. Please try again. Skipping workspace.", None])
                continue
        else:
            results.append([name, billing_project, ad_name, "Failure", "No auth domain group specified. Skipping workspace.", None])
            continue
        
        # Build and submit data access control group policy request
        add_policy_payload = {
            "addAttributes": {
                "inputs": [{
                    "namespace": "terra",
                    "name": "group-constraint",
                    "additionalData": [{"key": "group", "value": ad_name}]
                }]
              },
              "updateMode": "FAIL_ON_CONFLICT"
        }
        response = requests.patch(
            url=f"https://workspace.dsde-prod.broadinstitute.org/api/workspaces/v1/{workspace_id}/policies",
            headers={"Authorization": f"Bearer {creds.token}"},
            json=add_policy_payload 
        )
        if response.status_code != 200:
            error_message = response.text
            results.append([name, billing_project, ad_name, "Failure", f"Error updating workspace: {error_message}.", None])
            continue

        # Record status
        status = "Success" if not error_list else "Failure"
        error_str = "; ".join(error_list)
        warning_str = "; ".join(warning_list)
        results.append([name, billing_project, ad_name, status, error_str, warning_str])

    # Display results
    print("\nResults:")
    results_df = pd.DataFrame(results, columns = ["workspace_name", "billing_project", "auth_domain_name", "status", "errors", "warnings"])
    display(results_df) 

#############################################
## Input Parameters
#############################################

# Specify the name of the workspace, the name of the auth domain group to add to the workspace, 
# and the cloud the workspace lives on:
workspace_list = [
#    {
#        "workspace_name": "workspace name",
#        "auth_domain": "auth domain (or None). For Azure this is just a Terra group that gets added",
#        "cloud_provider": "Cloud provider - azure"    
#    }
    {
        "workspace_name": "AnVIL_TEST_WORKSPACE",
        "auth_domain": "AUTH_AnVIL_TEST_WORKSPACE",
        "cloud_provider": "azure"
    },
]

#############################################
## Execution
#############################################

add_data_access_control_group(workspace_list)


# AnVIL Resource Creation and Deletion Control

## Workspace and Auth Domain Group Creation
List the desired workspaces to create, the authorization domain group they should have (if any), the role the authorization domain group should have on the workspace once created, and the cloud the workspace should be created on.

In [None]:
#############################################
## Functions
#############################################

def setup_auth_domain(name):
    # Initialize
    log_items = []
    
    # Establish credentials
    creds, project = google.auth.default()
    auth_req = google.auth.transport.requests.Request()
    creds.refresh(auth_req)
    
    # Attempt to create the group
    response = requests.post(
        url=f"https://api.firecloud.org/api/groups/{name}",
        headers={"Authorization": f"Bearer {creds.token}"}
    )
    if response.status_code == 409:
        log_items.append("Error creating auth domain group (group already exists)")
    elif response.status_code != 201:
        raise Exception("Error creating auth domain group.")
    
    # Attempt to add anvil-admins as an admin on the group
    response = requests.put(
        url=f"https://api.firecloud.org/api/groups/{name}/admin/anvil-admins@firecloud.org",
        headers={"Authorization": f"Bearer {creds.token}"}
    )
    if response.status_code != 204:
        log_items.append("Error adding anvil-admins as an admin on the auth domain group") 
    
    # Attempt to add AnVIL_Devs as a member on the group
    response = requests.put(
        url=f"https://api.firecloud.org/api/groups/{name}/member/AnVIL_Devs@firecloud.org",
        headers={"Authorization": f"Bearer {creds.token}"}
    )
    if response.status_code != 204:
        log_items.append("Error adding AnVIL_Devs as a member on the auth domain group")
        
    # Return log
    return log_items

def setup_workspaces(workspace_list):
    # Loop through and process workspaces
    results = []
    acceptable_cloud_types = ['gcp', 'azure']
    
    for workspace in workspace_list:
        # Initialize 
        name = workspace['workspace_name']
        ad_name = workspace['auth_domain']
        ad_role = workspace['role']
        cloud_provider = workspace['cloud_provider']
        bucket = ""
        error_list = []
        warning_list = []
        
        # Get billing project based on cloud provider
        print(f"Processing workspace {name}...")
        if cloud_provider not in acceptable_cloud_types:
            cloud_types = ', '.join(acceptable_cloud_types)
            err_str = f"Cloud provider must be one of {cloud_types}. Cloud provider provided was {cloud_provider}. Skipping workspace."
            logging.error(err_str)
            results.append([name, "N/A", ad_name, ad_role, "Failure", err_str, None])
            continue
        billing_project = 'anvil-datastorage' if workspace['cloud_provider'] == 'gcp' else 'AnVILDataStorage_Azure'

        # Establish credentials
        creds, project = google.auth.default()
        auth_req = google.auth.transport.requests.Request()
        creds.refresh(auth_req)

        # Determine whether the workspace name is taken
        response = requests.get(
            url=f"https://api.firecloud.org/api/workspaces/{billing_project}/{name}",
            headers={"Authorization": f"Bearer {creds.token}"}
        )
        if response.status_code == 200:
            results.append([name, billing_project, ad_name, ad_role, "", "Failure", "Specified workspace name already exists. Aborting workspace creation.", None])
            continue

        # Set up auth domain (where specified)
        if ad_name:
            try:
                ad_output = setup_auth_domain(ad_name)
                warning_list.extend(ad_output)
            except:
                results.append([name, billing_project, ad_name, ad_role, "", "Failure", "Error creating the auth domain for the workspace. Aborting workspace creation.", None])
                continue

        # Build workspace creation payload
        create_workspace_payload = {
            "namespace": billing_project,
            "name": name,
            "attributes": {}
        }
        if ad_name:
            create_workspace_payload["authorizationDomain"] = [{"membersGroupName": ad_name}]
            if cloud_provider == "azure":
                create_workspace_payload["policies"] = [
                    {
                        "namespace": "terra",
                        "name": "group-constraint",
                        "additionalData": [{"group": ad_name}]
                    }
                ]
        else:
            create_workspace_payload["authorizationDomain"] = []
        
        # Submit create workspace request
        response = requests.post(
            url=f"https://api.firecloud.org/api/workspaces",
            headers={"Authorization": f"Bearer {creds.token}"},
            json=create_workspace_payload 
        )
        if response.status_code == 409:
            results.append([name, billing_project, ad_name, ad_role, "Failure", "Specified workspace name already exists. Aborting workspace creation.", None])
            continue
        elif response.status_code != 201:
            results.append([name, billing_project, ad_name, ad_role, "Failure", "Error creating workspace. Aborting workspace creation.", None])
            continue
        else:
            workspace_json = response.json()
            bucket = workspace_json["bucketName"]

        # Update workspace ACL for anvil-admins
        payload = [{
            "email": "anvil-admins@firecloud.org",
            "accessLevel": "OWNER",
            "canShare": True,
            "canCompute": True
        }]
        response = requests.patch(
            url=f"https://api.firecloud.org/api/workspaces/{billing_project}/{name}/acl",
            headers={"Authorization": f"Bearer {creds.token}"},
            json=payload
        )
        if response.status_code != 200:
            warning_list.append("Error adding anvil-admins as owner on the workspace.")

        # Update workspace ACL for auth domain
        if ad_name and ad_role in ["READER", "WRITER", "OWNER"]:
            ad_email = ad_name + "@firecloud.org"
            payload = [{
                "email": ad_email,
                "accessLevel": ad_role,
                "canShare": False,
                "canCompute": False
            }]
            response = requests.patch(
                url=f"https://api.firecloud.org/api/workspaces/{billing_project}/{name}/acl",
                headers={"Authorization": f"Bearer {creds.token}"},
                json=payload
            )
            if response.status_code != 200:
                warning_list.append("Error updating auth domain group's role on the workspace.")

        # Record status
        status = "Success" if not error_list else "Failure"
        error_str = "; ".join(error_list)
        warning_str = "; ".join(warning_list)
        results.append([name, billing_project, ad_name, ad_role, bucket, status, error_str, warning_str])

    # Display results
    print("\nResults:")
    results_df = pd.DataFrame(results, columns = ["workspace_name", "billing_project", "auth_domain_name", "auth_domain_role", "bucket", "status", "errors", "warnings"])
    display(results_df) 

#############################################
## Input Parameters
#############################################

# Specify the name of the workspace, the name of the auth domain group to use for the workspace (if any), 
# the role the auth domain group should have on the workspace, and the cloud the workspace lives on:
workspace_list = [
#    {"workspace_name": "workspace name", "cloud_provider": "gcp/azure", "auth_domain": "auth domain (or None)", "role": "Auth Domain group role - READER, WRITER, OWNER, NO ACCESS"},
    {"workspace_name": "AnVIL_NIAID_CSP_GRU_WGS_v3", "cloud_provider": "gcp", "auth_domain": "AUTH_AnVIL_NIAID_CSP_GRU_WGS_v3", "role": "READER"},
]

#############################################
## Execution
#############################################

setup_workspaces(workspace_list)


## Workspace Deletion
List the desired workspaces to delete, as well as which cloud they exist on. 

In [None]:
#############################################
## Functions
#############################################

def delete_workspaces(workspace_list):
    # Loop through and process workspaces
    results = []
    acceptable_cloud_types = ['gcp', 'azure']
    
    for workspace in workspace_list:
        # Initialize
        name = workspace['workspace_name']
        cloud_provider = workspace['cloud_provider']
        
        # Get billing project based on cloud provider
        print(f"Processing workspace {name}...")
        if cloud_provider not in acceptable_cloud_types:
            cloud_types = ', '.join(acceptable_cloud_types)
            results.append(
                [name, "Failure", f"Cloud provider must be one of {cloud_types}. Cloud provider provided was {cloud_provider}. Aborting workspace deletion."]
            )
            continue
        billing_project = 'anvil-datastorage' if workspace['cloud_provider'] == 'gcp' else 'AnVILDataStorage_Azure'

        # Establish credentials
        creds, project = google.auth.default()
        auth_req = google.auth.transport.requests.Request()
        creds.refresh(auth_req)

        # Delete workspace
        response = requests.delete(
            url=f"https://api.firecloud.org/api/workspaces/{billing_project}/{name}",
            headers={"Authorization": f"Bearer {creds.token}"}
        )
        if response.status_code == 404:
            msg = json.loads(response.text)["message"]
            results.append([name, "Failure", f"Specified workspace does not exist. Aborting workspace deletion. Message: {msg}"])
            continue
        elif response.status_code == 403:
            msg = json.loads(response.text)["message"]
            results.append([name, "Failure", f"User does not have permission to delete workspace. Aborting workspace deletion. Message: {msg}"])
            continue
        elif response.status_code != 202:
            results.append([name, "Failure", "Error deleting workspace (unspecified). Aborting workspace deletion."])
            continue
        else:
            results.append([name, "Success", ""])

    # Display results
    print("\nResults:")
    results_df = pd.DataFrame(results, columns = ["workspace_name", "status", "message"])
    display(results_df) 

#############################################
## Input Parameters
#############################################

# Specify the name of the workspace and the cloud the workspace lives on:
workspace_list = [
#    {"workspace_name": "workspace name", "cloud_provider": "Cloud provider - gcp/azure"    }
    {"workspace_name": "AnVIL_TEST_WORKSPACE_AZ", "cloud_provider": "azure"},
    {"workspace_name": "AnVIL_TEST_WORKSPACE_GCP", "cloud_provider": "gcp"}
]

#############################################
## Execution
#############################################

delete_workspaces(workspace_list)


## Unpublish Workspace from Firecloud Library
In some cases, a workspace may be published in the Firecloud Library, which prevents it from being deleted (which can be identified by a "You cannot delete this workspace: You must be a curator and either be an owner or have catalog with read+." message during deletion. In those cases, the workspace needs to be unpublished from the Firecloud Library before it can be deleted.

In [None]:
#############################################
## Functions
#############################################

def unpublish_workspaces_from_library(workspace_list):
    # Loop through and process workspaces
    results = []
    acceptable_cloud_types = ['gcp', 'azure']
    
    for workspace in workspace_list:
        # Initialize
        name = workspace['workspace_name']
        cloud_provider = workspace['cloud_provider']
        
        # Get billing project based on cloud provider
        print(f"Processing workspace {name}...")
        if cloud_provider not in acceptable_cloud_types:
            cloud_types = ', '.join(acceptable_cloud_types)
            results.append(
                [name, "Failure", f"Cloud provider must be one of {cloud_types}. Cloud provider provided was {cloud_provider}. Aborting workspace deletion."]
            )
            continue
        billing_project = 'anvil-datastorage' if workspace['cloud_provider'] == 'gcp' else 'AnVILDataStorage_Azure'

        # Establish credentials
        creds, project = google.auth.default()
        auth_req = google.auth.transport.requests.Request()
        creds.refresh(auth_req)

        # Unpublish workspace
        response = requests.delete(
                    url=f"https://api.firecloud.org/api/library/{billing_project}/{name}/published",
                    headers={"Authorization": f"Bearer {creds.token}"}
                )
        if response.status_code == 404:
            msg = json.loads(response.text)["message"]
            results.append([name, "Failure", f"Specified workspace does not exist. Aborting workspace unpublishing. Message: {msg}"])
            continue
        elif response.status_code == 403:
            msg = json.loads(response.text)["message"]
            results.append([name, "Failure", f"User does not have permission to delete workspace. Aborting workspace unpublishing. Message: {msg}"])
            continue
        elif response.status_code not in [200, 204]:
            results.append([name, "Failure", "Error unpublishing workspace (unspecified). Aborting workspace unpublishing."])
            continue
        else:
            results.append([name, "Success", ""])

    # Display results
    print("\nResults:")
    results_df = pd.DataFrame(results, columns = ["workspace_name", "status", "message"])
    display(results_df) 

#############################################
## Input Parameters
#############################################

# Specify the name of the workspace and the cloud the workspace lives on:
workspace_list = [
#    {"workspace_name": "workspace name", "cloud_provider": "Cloud provider - gcp/azure"    }
    {"workspace_name": "AnVIL_TEST_WORKSPACE_AZ", "cloud_provider": "azure"},
    {"workspace_name": "AnVIL_TEST_WORKSPACE_GCP", "cloud_provider": "gcp"}
]

#############################################
## Execution
#############################################

unpublish_workspaces_from_library(workspace_list)


# AnVIL Workspace Tag Control

## Bulk View Workspace Tags
Can be used to view the workspace tags currently recorded on the specified workspaces.

In [None]:
#############################################
## Functions
#############################################

def retrieve_ws_tags(workspace_list):
    # Loop through and process workspaces
    results = []
    for workspace in workspace_list:

        # Initialize
        print(f"Retrieving workspace tags for {workspace}.")
        error_list = []

        # Establish credentials
        creds, project = google.auth.default()
        auth_req = google.auth.transport.requests.Request()
        creds.refresh(auth_req)

        # Retrieve workspace tags
        response = requests.get(
            url=f"https://api.firecloud.org/api/workspaces/anvil-datastorage/{workspace}/tags",
            headers={"Authorization": f"Bearer {creds.token}"}
        )
        if response.status_code != 200:
            error_list.append("Error retrieving workspace tags.")
            tag_str = ""
        else:
            try:
                tag_str = ""
                for item in response.json():
                    tag_str += f"'{item}', "
            except:
                error_list.append("Error formatting workspace tags.")

        # Record status
        status = "Success" if not error_list else "Failure"
        error_str = "; ".join(error_list)
        results.append([workspace, tag_str.strip(), status, error_str])

    # Display results
    print("\nResults:")
    results_df = pd.DataFrame(results, columns = ["workspace", "tags", "retrieval_status", "errors"])
    display(results_df)

#############################################
## Input Parameters
#############################################

# Specify the list of workspaces to view the workspace tags for:
workspace_list = [
    "WORKSPACE_1",
    "WORKSPACE_2"
]

#############################################
## Execution
#############################################

retrieve_ws_tags(workspace_list)


## Bulk Update Workspace Tags
Can be used to update the workspace tags recorded on the specified workspaces. This includes the ability to both add and remove workspace tags in bulk. 

In [None]:
#############################################
## Functions
#############################################

def update_ws_tags(workspace_list, tags_to_remove_list, tags_to_remove_regex_list, tags_to_add_list):
    # Loop through and process workspaces
    results = []
    for workspace in workspace_list:

        # Initialize
        print(f"Processing workspace tag updates for {workspace}.")
        error_list = []

        # Establish credentials
        creds, project = google.auth.default()
        auth_req = google.auth.transport.requests.Request()
        creds.refresh(auth_req)

        # Remove workspace tags explicitly listed
        if tags_to_remove_list:
            response = requests.delete(
                url=f"https://api.firecloud.org/api/workspaces/anvil-datastorage/{workspace}/tags",
                headers={"Authorization": f"Bearer {creds.token}"},
                json=tags_to_remove_list
            )
            if response.status_code != 200:
                error_list.append("Error removing workspace tags by list.")

        # Remove workspace tags by regex
        if tags_to_remove_regex_list:
            # Retrieve existing workspace tags
            response = requests.get(
                url=f"https://api.firecloud.org/api/workspaces/anvil-datastorage/{workspace}/tags",
                headers={"Authorization": f"Bearer {creds.token}"}
            )
            if response.status_code != 200:
                error_list.append("Error retrieving workspace tags.")
                tag_list = []
            else:
                tag_list = response.json()

            # Compare tags to specified regex to identify tags to remove
            regex_removal_list = []
            for tag in tag_list:
                for regex in tags_to_remove_regex_list:
                    if re.search(regex, tag):
                        regex_removal_list.append(tag)
                        break

            # Remove identified workspace tags
            if regex_removal_list:
                response = requests.delete(
                    url=f"https://api.firecloud.org/api/workspaces/anvil-datastorage/{workspace}/tags",
                    headers={"Authorization": f"Bearer {creds.token}"},
                    json=regex_removal_list
                )
                if response.status_code != 200:
                    error_list.append("Error removing workspace tags by regex.")    

        # Add new workspace tags explicitly listed
        if tags_to_add_list:
            response = requests.patch(
                url=f"https://api.firecloud.org/api/workspaces/anvil-datastorage/{workspace}/tags",
                headers={"Authorization": f"Bearer {creds.token}"},
                json=tags_to_add_list
            )
            if response.status_code != 200:
                error_list.append("Error adding workspace tags by list.")

        # Record status
        status = "Success" if not error_list else "Failure"
        error_str = "; ".join(error_list)
        results.append([workspace, status, error_str])

    # Display results
    print("\nResults:")
    results_df = pd.DataFrame(results, columns = ["workspace", "update_status", "errors"])
    display(results_df)    

#############################################
## Input Parameters
#############################################

# Specify the list of workspaces to apply the below changes to:
workspace_list = [
    "WORKSPACE_1",
    "WORKSPACE_2"
]

# Specify the exact tags that should be removed from the workspace (if any):
tags_to_remove_list = []

# Specify the list of regex expressions that should be used to identify tags to remove (if any):
tags_to_remove_regex_list = []

# Specify the tags that should be added to the workspace (if any):
tags_to_add_list = []

#############################################
## Execution
#############################################

update_ws_tags(workspace_list, tags_to_remove_list, tags_to_remove_regex_list, tags_to_add_list)


# AnVIL Workspace Attribute Control

## Look Up Workspace Attribute Names
This is intended to be a quick way to translate from the common/display names for properties that appear in workspaces to the underlying attribute property names needed to update the attributes in any way. For example, "Study Design" in a workspace is the display name for the "library:studyDesign" property.

In [None]:
#############################################
## Functions
#############################################

def look_up_ws_attr(search_term_list):
    attr_schema_url = "https://raw.githubusercontent.com/broadinstitute/firecloud-orchestration/develop/src/main/resources/library/attribute-definitions.json"
    response = requests.get(attr_schema_url)
    attr_schema = json.loads(response.text)

    results = []
    for term in search_term_list:
        lookup_str = term.replace(" ", "").lower()
        for key, val in attr_schema["properties"].items():
            try:
                title_str = val["title"].replace(" ", "").lower()
                if lookup_str in title_str:
                    results.append([term, key, val["title"]])
            except:
                pass

    print("Results:")
    results_df = pd.DataFrame(results, columns = ["search_term", "property", "property_title"])
    display(results_df) 

#############################################
## Input Parameters
#############################################

# Specify a list of attribute names to look up:
search_term_list = ["Cohort Description"]

#############################################
## Execution
#############################################

look_up_ws_attr(search_term_list)      


## Update Workspace Attributes
List the workspace attributes that should be updated, their updated values, and the update behavior. The behavior specified will have the following effects:
* __VIEW__ will NOT alter the workspace attributes in any way, but rather can be used to display the existing attribute values 
* __UPDATE__ will only update the workspace attributes specified by the user, and leave all other existing values as is
* __REPLACE__ will remove all existing workspace attributes and replace them with the workspace attributes specified by the user

In [None]:
#############################################
## Functions
#############################################

def get_attribute(ws_attribute):
    attr_schema_url = "https://raw.githubusercontent.com/broadinstitute/firecloud-orchestration/develop/src/main/resources/library/attribute-definitions.json"
    response = requests.get(attr_schema_url)
    attr_schema = json.loads(response.text)
    # Search for attribute and return the type
    if attr_schema["properties"].get(ws_attribute):
        return True, attr_schema["properties"][ws_attribute]["type"]
    else:
        return False, ""

def update_ws_attr(attr_updates, update_behavior):
    # Loop through and process workspaces
    results = []
    for workspace in attr_updates.keys():
        
        # Initialize
        print(f"Processing workspace attribute updates for {workspace}.")
        error_list = []

        # Establish credentials
        creds, project = google.auth.default()
        auth_req = google.auth.transport.requests.Request()
        creds.refresh(auth_req)
        
        # Set base attribute json according to specified update behavior
        if update_behavior in ["VIEW", "UPDATE"]:
            # Fetch existing attributes for workspace
            response = requests.get(
                url=f"https://api.firecloud.org/api/library/anvil-datastorage/{workspace}/metadata",
                headers={"Authorization": f"Bearer {creds.token}"}
            )
            if response.status_code != 200:
                error_list.append("Error retrieving workspace attributes")
            else:
                attr_json = response.json() 
                print("Original attribute JSON:")
                print(attr_json)
        elif update_behavior == "REPLACE":
            attr_json = {}
        else:
            error_list.append(f"Unknown update behavior '{update_behavior}'' specified (must be VIEW, UPDATE, or REPLACE")
            
        # Update base attribute json with user input
        if not error_list and update_behavior != "VIEW":
            for attr_entry, attr_value in attr_updates[workspace].items():
                valid, attr_type = get_attribute(attr_entry)
                if valid:
                    if attr_type == "string":
                        attr_json[attr_entry] = str(attr_value)
                    elif attr_type == "array":
                        attr_json[attr_entry] = [str(attr_value)]
                    elif attr_type == "integer":
                        try:
                            attr_json[attr_entry] = int(attr_value)
                        except:
                            error_list.append(f"Error converting value for attribute '{attr_entry}' to integer")
                    elif attr_type == "boolean":
                        try:
                            if str(attr_value).lower() == "true":
                                attr_json[attr_entry] = True
                            elif str(attr_value).lower() == "false":
                                attr_json[attr_entry] = False
                            else:
                                raise(exception)
                        except:
                            error_list.append(f"Error converting value for attribute '{attr_entry}' to boolean")
                else:
                    error_list.append(f"Attribute '{attr_entry}' is not recognized")
            if update_behavior == "UPDATE":
                print("\nUpdated attribute JSON:")
            else:
                print("Replacement attribute JSON:")
            print(attr_json)
            print("\n")
        
        # Submit updated workspace attributes to workspace
        if not error_list and update_behavior != "VIEW":
            response = requests.put(
                url=f"https://api.firecloud.org/api/library/anvil-datastorage/{workspace}/metadata?validate=false",
                headers={"Authorization": f"Bearer {creds.token}"},
                json=attr_json
            )
            if response.status_code != 200:
                error_list.append("Error updating workspace attributes")
        
        # Record status
        status = "Success" if not error_list else "Errors"
        error_str = "; ".join(error_list)
        results.append([workspace, status, error_str])

    # Display results
    print("Results:")
    results_df = pd.DataFrame(results, columns = ["workspace", "update_status", "errors"])
    display(results_df)   

#############################################
## Input Parameters
#############################################

# Specify the behavior of the update (VIEW, UPDATE, or REPLACE), based on the definitions above:
update_behavior = "VIEW"

# Specify the workspace attribute updates, using the form {"workspace": {"attribute": "value"}}:
attr_updates = {
    "workspace": {"attribute_1": "value", "attribute_2": "value"}
}

#############################################
## Execution
#############################################

update_ws_attr(attr_updates, update_behavior)


# Snapshot Property Control

## Bulk Update Snapshot Properties
List the snapshots that should be updated and the updated property values that should be applied to them. The properties that can be updated are as follows:
* **phs_id** updates the "phsId" property on the dataset the snapshot is sourced from, if you have permissions to do so
* **consent_code** updates the "consentCode" property on the snapshot
* **consent_name** updates the "properties" property on the dataset the snapshot is sourced from to include the consent name, if you have permissions to do so
* **duos_id** attaches a DUOS ID to the snapshot, which populates the "duosFirecloudGroup" property on the snapshot
* **dataset_ticket** updates the "properties" property on the dataset the snapshot is sourced from to include the dataset ticket, if you have permissions to do so

In [None]:
#############################################
## Functions
#############################################

def update_snapshot_properties(snapshot_update_list, ignore_empty):
    # Loop through and process update requests
    results = []
    for snapshot_update_entry in snapshot_update_list:
        snapshot_id = snapshot_update_entry["snapshot_id"]
        dataset_id = ""
        phs_id = snapshot_update_entry["phs_id"] if snapshot_update_entry["phs_id"] else ""
        consent_code = snapshot_update_entry["consent_code"] if snapshot_update_entry["consent_code"] else ""
        consent_name = snapshot_update_entry["consent_name"] if snapshot_update_entry["consent_name"] else ""
        duos_id = snapshot_update_entry["duos_id"] if snapshot_update_entry["duos_id"] else ""
        dataset_ticket = snapshot_update_entry["dataset_ticket"] if snapshot_update_entry["dataset_ticket"] else ""
        current_phs_id = ""
        current_consent_code = ""
        current_duos_id = ""
        current_dataset_ticket = ""
        print(f"Processing update request for snapshot {snapshot_id}: {str(snapshot_update_entry)}")
        
        # Establish credentials
        creds, project = google.auth.default()
        auth_req = google.auth.transport.requests.Request()
        creds.refresh(auth_req)
        
        # Retrieve snapshot
        status = "Success"
        error_list = []
        if phs_id or consent_name or dataset_ticket or consent_code or duos_id or ignore_empty == False:
            snapshot_response = requests.get(
                url=f"https://data.terra.bio/api/repository/v1/snapshots/{snapshot_id}",
                headers={"Authorization": f"Bearer {creds.token}"}
            ).json()
            try:
                dataset_id = snapshot_response["source"][0]["dataset"]["id"]
                current_phs_id = snapshot_response["source"][0]["dataset"]["phsId"]
                current_consent_code = snapshot_response["consentCode"]
                dataset_props = snapshot_response["source"][0]["datasetProperties"]
                if dataset_props:
                    current_consent_name = dataset_props.get("consent_name")
                    current_dataset_ticket = dataset_props.get("dataset_ticket")
                else:
                    current_consent_name = ""
                    current_dataset_ticket = ""
                    dataset_props = {}
                current_duos_id = snapshot_response["duosFirecloudGroup"].get("duosId") if snapshot_response["duosFirecloudGroup"] != None else ""
            except:
                error_list.append(f"Error retrieving snapshot ({str(e)})")
        if error_list:
            error_str = "; ".join(error_list)
            results.append([snapshot_id, "phs_id", phs_id, status, error_str])
            results.append([snapshot_id, "consent_code", consent_code, status, error_str])
            results.append([snapshot_id, "consent_name", consent_name, status, error_str])
            results.append([snapshot_id, "duos_id", duos_id, status, error_str])
        else:
            
            # Process phs_id, consent_name, and dataset_ticket update
            status = "Success"
            error_list = []
            if phs_id or consent_name or dataset_ticket or ignore_empty == False:

                # Update dataset
                if dataset_id:
                    payload = {}
                    if ignore_empty == False or (phs_id and phs_id != current_phs_id):
                        payload["phsId"] = phs_id
                    if ignore_empty == False or (consent_name and consent_name != current_consent_name):
                        dataset_props["consent_name"] = consent_name
                        payload["properties"] = dataset_props
                    if ignore_empty == False or (dataset_ticket and dataset_ticket != current_dataset_ticket):
                        dataset_props["dataset_ticket"] = dataset_ticket
                        payload["properties"] = dataset_props
                    if payload:
                        response = requests.patch(
                            url=f"https://data.terra.bio/api/repository/v1/datasets/{dataset_id}",
                            headers={"Authorization": f"Bearer {creds.token}"},
                            json=payload
                        )
                        if response.status_code != 200:
                            error_list.append(f"Error updating dataset ({response.text})")

                # Record status
                status = "Success" if not error_list else "Failure"
                error_str = "; ".join(error_list)
                if phs_id or ignore_empty == False:
                    results.append([snapshot_id, "phs_id", phs_id, status, error_str])
                else:
                    results.append([snapshot_id, "phs_id", phs_id, "Ignored", ""]) 
                if consent_name or ignore_empty == False:
                    results.append([snapshot_id, "consent_name", consent_name, status, error_str])
                else:
                    results.append([snapshot_id, "consent_name", consent_name, "Ignored", ""]) 
                if dataset_ticket or ignore_empty == False:
                    results.append([snapshot_id, "dataset_ticket", dataset_ticket, status, error_str])
                else:
                    results.append([snapshot_id, "dataset_ticket", dataset_ticket, "Ignored", ""]) 

            # Process consent_code update
            status = "Success"
            error_list = []
            if consent_code or ignore_empty == False:

                # Update snapshot
                if consent_code != current_consent_code:
                    payload = {
                        "consentCode": consent_code
                        }
                    response = requests.patch(
                        url=f"https://data.terra.bio/api/repository/v1/snapshots/{snapshot_id}",
                        headers={"Authorization": f"Bearer {creds.token}"},
                        json=payload
                    )
                    if response.status_code != 200:
                        error_list.append(f"Error updating snapshot ({response.text})")

                # Record status
                status = "Success" if not error_list else "Failure"
                error_str = "; ".join(error_list)
                results.append([snapshot_id, "consent_code", consent_code, status, error_str])
            else:
                results.append([snapshot_id, "consent_code", consent_code, "Ignored", ""])

            # Process duos_id update
            status = "Success"
            error_list = []
            if duos_id or ignore_empty == False:

                # Update snapshot
                if not duos_id:
                    response = requests.delete(
                        url=f"https://data.terra.bio/api/repository/v1/snapshots/{snapshot_id}/unlinkDuosDataset",
                        headers={"Authorization": f"Bearer {creds.token}"}
                    )
                    if response.status_code != 200:
                        error_list.append(f"Error updating snapshot ({response.text})")
                elif duos_id != current_duos_id:
                    response = requests.put(
                        url=f"https://data.terra.bio/api/repository/v1/snapshots/{snapshot_id}/linkDuosDataset/{duos_id}",
                        headers={"Authorization": f"Bearer {creds.token}"}
                    )
                    if response.status_code != 200:
                        error_list.append(f"Error updating snapshot ({response.text})")

                # Record status
                status = "Success" if not error_list else "Failure"
                error_str = "; ".join(error_list)
                results.append([snapshot_id, "duos_id", duos_id, status, error_str])
            else:
                results.append([snapshot_id, "duos_id", duos_id, "Ignored", ""])
    
    # Display results
    print(f"\nResults:")
    results_df = pd.DataFrame(results, columns = ["snapshot_id", "property", "value", "update_status", "errors"])
    display(results_df) 

#############################################
## Input Parameters
#############################################

# Specify the list of snapshots to update and the updated property values to apply
snapshot_update_list = [
#     {"snapshot_id": "snapshot_id", "phs_id": "phs000123", "consent_code": "c1", "consent_name": "DUOS-123456", "duos_id": "", "dataset_ticket": "ANVIL-123"}
#     {'snapshot_id': '1c1804a9-990d-4238-b7e4-b6d27691794a', 'phs_id': 'phs001211', 'consent_code': 'c1', 'consent_name': 'HMB-IRB-NPU-MDS', 'duos_id': '', 'dataset_ticket': 'ANVIL-69'},
    {'snapshot_id': 'a4512cc0-f2b7-453a-82e2-1a6bf6a2c2eb', 'phs_id': '', 'consent_code': '', 'consent_name': '', 'duos_id': '', 'dataset_ticket': 'ANVIL-767'},
]

# Specify whether empty properties should be ignored when performing the update. Setting this to "false" will instead
# overwrite the existing property values with a blank value.
ignore_empty = True

#############################################
## Execution
#############################################

update_snapshot_properties(snapshot_update_list, ignore_empty)


# AnVIL File Management

## Sync between two google buckets

Syncs files/folders between two GCS directories. 
- Input: List of tuples, where the first value is the gsURI of the source directory and the second value is the gsURI of the destination directory: ('source', 'destination')
- This will not remove any files from either directory.
- This will only sync files which are not the same between the two directories.

Note: This may take some time, depending on the number of files and the number of directories to sync between. 

In [None]:
import subprocess
import argparse

########## INPUTS ##########
directories = [
    ('gs://fc-secure-3cfbf1b0-bde8-454b-804f-b648b222ea2b/gru/20230517/', 'gs://fc-secure-5a44bb4a-54c7-4a0a-856d-aeb1d5fd3056/')
]
############################
for source_directory, destination_directory in directories:
    print(f"Running rsync for source directory: {source_directory} to destination directory: {destination_directory}")
    subprocess.call(
        ["gsutil", "-m", "rsync", "-r",
            source_directory, destination_directory]
    )
