In [35]:
"""
Update tags from the given spreadsheet to add a structure like:

tag:tags": {
          "itemsType": "AttributeValue",
          "items": [
            "dbGaP:phs001584",
            "consortium:eMERGE",
            "consent_code:HMB-MDS"
          ]
        },

Tag Types:
dbGaP: None | phs001234
consortium: string
consent_code: string
"""
## Imports
import datetime
import firecloud.api as FAPI
import json
import os
import pandas as pd
import pprint
import re
import requests



print(FAPI.whoami())
pp = pprint.PrettyPrinter(indent=4)

BILLING_PROJECT_ID = os.environ['WORKSPACE_NAMESPACE']
WORKSPACE = os.environ['WORKSPACE_NAME']
WORKSPACE_BUCKET = os.environ['WORKSPACE_BUCKET']

CATALOG_WORKSPACE_NAMESPACE = "anvil-datastorage"
API_ROOT = "https://api.firecloud.org/api"
ANVIL_DATA_STORAGE = "anvil-datastorage"

def getDefaultAccessToken():
    result = !gcloud auth application-default print-access-token
    return result[0]


ACCESS_TOKEN = getDefaultAccessToken()

print(BILLING_PROJECT_ID)
print(WORKSPACE)

print("Copy files:")
# print(WORKSPACE_BUCKET)
!gsutil cp $WORKSPACE_BUCKET/notebooks/AnVILCatalogWorkspacesPublicMetadata-2022-03-10.csv  .
print("List all files:")
!ls 

## Read in tags list
metadata = pd.read_csv('AnVILCatalogWorkspacesPublicMetadata-2022-03-10.csv')
metadata = metadata[["name","consortium", "phsId"]]
metadata.set_index("name", inplace=True)


def getTagsURL(namespace,workspace):
    return  API_ROOT+"/workspaces/"+ namespace + "/" + workspace + "/tags"

def getAPIHeaders():
    headers = {
        'accept': 'application/json',
        'Authorization' : 'Bearer '+ACCESS_TOKEN
    }
    return headers

# Return the list of tags for a given workspace    
def getWorkspaceTags(namespace,workspace):
    url = getTagsURL(namespace,workspace)
    headers = getAPIHeaders()
    r = requests.get(url, headers = headers)
    return r.json()

# Replaces the set the tags on a workspace to the given list of tags
def setWorkspaceTags(namespace,workspace,tags):
    if not isinstance(tags,list):
        raise Exception('Tags must be a list')
        
#     if len(tags) > 15:
#         raise Exception('Limit to setting only 10 tags for now.')
        
    url = getTagsURL(namespace,workspace)
    headers = getAPIHeaders()
    resp = requests.put(url, json=tags, headers=headers)
    print(resp.status_code)

def backupWorkspace(w):
   
    # Create the filename
    name = w['workspace']['name']
    timestampStr = datetime.datetime.now().isoformat()
    fileName = name+'_'+timestampStr+'.json'
    
    # Save the to the local cloud environment
    with open(fileName, 'w') as fp:
        json.dump(w, fp,  indent=4)
        
    # Copy the file to the workspace bucket
    !gsutil cp ./$fileName $WORKSPACE_BUCKET/backups/

    
def modifyNextTags(tags, tag):
    if tag.startswith("Consortium"):
        key = "Consortium"
    elif tag.startswith("dbGaP"):
        key = "dbGap"
    else:
        raise Exception("Unknonw tag.." + tag)
    
    # Remove all tags of type key
    nextTags = list(filter(lambda el: not re.search(key, el, re.IGNORECASE),tags))
    
    # Add back the current one
    nextTags.append(tag)
    return nextTags

## Filter out workspaces without write access
workspaces = FAPI.list_workspaces().json()

reader = set()
writer = set()
for w in workspaces:
    name = w['workspace']['name']
    if w['accessLevel'] == "READER":
        reader.add(name)
    if w['accessLevel'] == "WRITER" or w['accessLevel'] == "OWNER":
        writer.add(name)
        



pet-117272931645288568532@terra-e36fcccd.iam.gserviceaccount.com
clever-canary-anvil-dev
Clever_Canary_AnVIL_Dev
Copy files:
Copying gs://fc-cb5be780-171f-49d4-9116-b77fd2237d0b/notebooks/AnVILCatalogWorkspacesPublicMetadata-2022-03-10.csv...
/ [1 files][ 87.6 KiB/ 87.6 KiB]                                                
Operation completed over 1 objects/87.6 KiB.                                     
List all files:
 1000G-high-coverage-2019_2022-03-10T07:30:15.619581.json
'Add Workspace Writer.ipynb'
 AnVILCatalogWorkspacesInput-2022-03-07.csv
 AnVILCatalogWorkspacesInput-2022-03-08.csv
 AnVILCatalogWorkspacesInput-2022-03-10.csv
 AnVILCatalogWorkspacesPublicMetadata-2022-02-25.csv
 AnVILCatalogWorkspacesPublicMetadata-2022-03-08.csv
 AnVILCatalogWorkspacesPublicMetadata-2022-03-09.csv
 AnVILCatalogWorkspacesPublicMetadata-2022-03-10.csv
 AnVIL_ccdg_asc_ndd_daly_talkowski_ac-boston_asd_exome_2022-03-10T07:09:16.963605.json
 AnVIL_ccdg_asc_ndd_daly_talkowski_AGRE_asd_exome_2022-03-10

In [36]:
## Update workspace tags
length = 0        
for w in workspaces:
    name = w['workspace']['name']
    if name in writer and name in metadata.index.values:
        length = length+1
        if length <= 410:
            continue 
#         if length > 2:
#             break        
        # Get consortium from tags sheet
        consortium = metadata.at[name,"consortium"]
        dbGaP = metadata.at[name,"phsId"]
        tags = []
        tags.append("Consortium: " + consortium)
        tags.append("dbGaP: " + dbGaP)

        print("##########################")
        backupWorkspace(w)
        currentTags = getWorkspaceTags(ANVIL_DATA_STORAGE,name)
        nextTags = currentTags.copy()
        for tag in tags:
            nextTags = modifyNextTags(nextTags,tag)

        print(name)
        # Update the workspace's tags
        setWorkspaceTags(ANVIL_DATA_STORAGE,name, nextTags)

##########################
Copying file://./AnVIL_PAGE_SoL_HMB_WGS_2022-03-10T08:16:07.966827.json [Content-Type=application/json]...
/ [1 files][  3.4 KiB/  3.4 KiB]                                                
Operation completed over 1 objects/3.4 KiB.                                      
AnVIL_PAGE_SoL_HMB_WGS
200
##########################
Copying file://./AnVIL_CMG_Broad_Orphan_Manton_WGS_2022-03-10T08:16:17.337221.json [Content-Type=application/json]...
/ [1 files][  4.6 KiB/  4.6 KiB]                                                
Operation completed over 1 objects/4.6 KiB.                                      
AnVIL_CMG_Broad_Orphan_Manton_WGS
200
##########################
Copying file://./AnVIL_CMG_Broad_Brain_Sherr_WES_2022-03-10T08:16:26.221108.json [Content-Type=application/json]...
/ [1 files][  3.4 KiB/  3.4 KiB]                                                
Operation completed over 1 objects/3.4 KiB.                                      
AnVIL_CMG_Broad_Brain_Sh

/ [1 files][  4.3 KiB/  4.3 KiB]                                                
Operation completed over 1 objects/4.3 KiB.                                      
AnVIL_ccdg_asc_ndd_daly_talkowski_minshew_asd_exome
200
##########################
Copying file://./AnVIL_CMG_Broad_Blood_Fleming_WES_2022-03-10T08:18:37.207680.json [Content-Type=application/json]...
/ [1 files][  3.3 KiB/  3.3 KiB]                                                
Operation completed over 1 objects/3.3 KiB.                                      
AnVIL_CMG_Broad_Blood_Fleming_WES
200
##########################
Copying file://./AnVIL_CCDG_WashU_CVD_EOCAD_Duke_WGS_2022-03-10T08:18:42.369565.json [Content-Type=application/json]...
/ [1 files][  4.8 KiB/  4.8 KiB]                                                
Operation completed over 1 objects/4.8 KiB.                                      
AnVIL_CCDG_WashU_CVD_EOCAD_Duke_WGS
200
##########################
Copying file://./anvil_ccdg_asc_ndd_daly_talkowski_cdcseed

200
##########################
Copying file://./AnVIL_CMG_Broad_Orphan_Lerner-Ellis_WES_2022-03-10T08:20:28.319474.json [Content-Type=application/json]...
/ [1 files][  3.4 KiB/  3.4 KiB]                                                
Operation completed over 1 objects/3.4 KiB.                                      
AnVIL_CMG_Broad_Orphan_Lerner-Ellis_WES
200
##########################
Copying file://./AnVIL_CCDG_TOPMed_WashU_CVD_Afib_Penn_WGS_2022-03-10T08:20:33.333354.json [Content-Type=application/json]...
/ [1 files][  3.2 KiB/  3.2 KiB]                                                
Operation completed over 1 objects/3.2 KiB.                                      
AnVIL_CCDG_TOPMed_WashU_CVD_Afib_Penn_WGS
200
##########################
Copying file://./AnVIL_CCDG_NYGC_NP_Autism_AGRE_WGS_2022-03-10T08:20:38.352465.json [Content-Type=application/json]...
/ [1 files][  4.9 KiB/  4.9 KiB]                                                
Operation completed over 1 objects/4.9 KiB.      

/ [1 files][  3.7 KiB/  3.7 KiB]                                                
Operation completed over 1 objects/3.7 KiB.                                      
AnVIL_CCDG_Broad_NP_Epilepsy_CZEMTH_GRU_GSA-MD
200
##########################
Copying file://./AnVIL_CCDG_Broad_NP_Epilepsy_USAHEP_GRU_WES_2022-03-10T08:22:26.933964.json [Content-Type=application/json]...
/ [1 files][  4.4 KiB/  4.4 KiB]                                                
Operation completed over 1 objects/4.4 KiB.                                      
AnVIL_CCDG_Broad_NP_Epilepsy_USAHEP_GRU_WES
200
##########################
Copying file://./AnVIL_CCDG_Broad_MI_UnivUtah_DS_CVD_WES_2022-03-10T08:22:31.910608.json [Content-Type=application/json]...
/ [1 files][  6.4 KiB/  6.4 KiB]                                                
Operation completed over 1 objects/6.4 KiB.                                      
AnVIL_CCDG_Broad_MI_UnivUtah_DS_CVD_WES
200
##########################
Copying file://./ANVIL_CMG_BROAD_BR