In [14]:
"""
Update tags from the given spreadsheet to add a structure like:

tag:tags": {
          "itemsType": "AttributeValue",
          "items": [
            "dbGaP:phs001584",
            "consortium:eMERGE",
            "consent_code:HMB-MDS"
          ]
        },

Tag Types:
dbGaP: None | phs001234
consortium: string
consent_code: string
"""
## Imports
import datetime
import firecloud.api as FAPI
import json
import os
import pandas as pd
import pprint
import re
import requests



print(FAPI.whoami())
pp = pprint.PrettyPrinter(indent=4)

BILLING_PROJECT_ID = os.environ['WORKSPACE_NAMESPACE']
WORKSPACE = os.environ['WORKSPACE_NAME']
WORKSPACE_BUCKET = os.environ['WORKSPACE_BUCKET']

CATALOG_WORKSPACE_NAMESPACE = "anvil-datastorage"
API_ROOT = "https://api.firecloud.org/api"
ANVIL_DATA_STORAGE = "anvil-datastorage"

def getDefaultAccessToken():
    result = !gcloud auth application-default print-access-token
    return result[0]


ACCESS_TOKEN = getDefaultAccessToken()

print(BILLING_PROJECT_ID)
print(WORKSPACE)

print("Copy files:")
# print(WORKSPACE_BUCKET)
!gsutil cp $WORKSPACE_BUCKET/notebooks/AnVILCatalogWorkspacesPublicMetadata-2022-02-25.csv  .
print("List all files:")
!ls 

## Read in tags list
workspaceTags = pd.read_csv('AnVILCatalogWorkspacesPublicMetadata-2022-02-25.csv')
workspaceTags = workspaceTags[["name","sheet:consortium", "tagsSheet:tag:tags:dbGaP"]]
workspaceTags.set_index("name", inplace=True)




def getTagsURL(namespace,workspace):
    return  API_ROOT+"/workspaces/"+ namespace + "/" + workspace + "/tags"

def getAPIHeaders():
    headers = {
        'accept': 'application/json',
        'Authorization' : 'Bearer '+ACCESS_TOKEN
    }
    return headers

# Return the list of tags for a given workspace    
def getTags(namespace,workspace):
    url = getTagsURL(namespace,workspace)
    headers = getAPIHeaders()
    r = requests.get(url, headers = headers)
    return r.json()

# Replaces the set the tags on a workspace to the given list of tags
def setTags(namespace,workspace,tags):
    if not isinstance(tags,list):
        raise Exception('Tags must be a list')
        
    if len(tags) > 15:
        raise Exception('Limit to setting only 10 tags for now.')
        
    url = getTagsURL(namespace,workspace)
    headers = getAPIHeaders()
    # uncomment me to run resp = requests.put(url, json=tags, headers=headers)
#     print(resp.status_code)

def backupWorkspace(w):
   
    # Create the filename
    name = w['workspace']['name']
    timestampStr = datetime.datetime.now().isoformat()
    fileName = name+'_'+timestampStr+'.json'
    
    # Save the to the local cloud environment
    with open(fileName, 'w') as fp:
        json.dump(w, fp,  indent=4)
        
    # Copy the file to the workspace bucket
    !gsutil cp ./$fileName $WORKSPACE_BUCKET/backups/

        



# for w in workspaces:
#     workspaceNames.add(w['workspace']['name'])
    


def replaceTag(tags, tag):
    if tag.startswith("Consortium"):
        key = "Consortium"
    elif tag.startswith("dbGaP"):
        key = "dbGap"
    else:
        raise Exception("Unknonw tag.." + tag)
    
    # Remove all tags of type key
    nextTags = list(filter(lambda el: not re.search(key, el, re.IGNORECASE),tags))
    
    # Add back the current one
    nextTags.append(tag)
    return nextTags

## Filter out workspaces without write access
workspaces = FAPI.list_workspaces().json(


reader = set()
writer = set()
for w in workspaces:
    name = w['workspace']['name']
    if w['accessLevel'] == "READER":
        reader.add(name)
    if w['accessLevel'] == "WRITER" or w['accessLevel'] == "OWNER":
        writer.add(name)


length = 0        
for name in workspaceTags.index.values:
    if name in writer:
        length = length+1
            if length > 1:
                break        
        # Get consortium from tags sheet
        consortium = workspaceTags.at[name,"sheet:consortium"]
        dbGaP = workspaceTags.at[name,"tagsSheet:tag:tags:dbGaP"]
        tags = []
        if consortium != "Unspecified":
            tags.append("Consortium: " + consortium)
        if dbGaP != "Unspecified":
            # Registration Pending case 
            if re.search("registration", dbGaP, re.IGNORECASE):
                tags.append("dbGaP: " + "Registration Pending")
            # Id Exists
            elif not re.search("open", dbGaP, re.IGNORECASE):
                tags.append("dbGaP: " + dbGaP)
        if len(tags):
#             print("##########################")
            currentTags = getTags(ANVIL_DATA_STORAGE,name)
            nextTags = currentTags.copy()
            for tag in tags:
                nextTags = replaceTag(nextTags,tag)
#             print(currentTags)
#             print(nextTags)
            print(name, nextTags)
#             setTags(ANVIL_DATA_STORAGE, name,nextTags)
                
                    
        
        
     
# tags = getTags(ANVIL_DATA_STORAGE,"AnVIL_CSER_NCGENES2_GRU")
# print(tags)

# ccdg_tags = ['Access WS', 
#              'Consortium: CCDG',
#              'dbGaP: Registration Pending',
#              'DUL:GSR:notAllowed',
#              'Exome',
#              'GRCh37/hg19',
#              'Nov 4, 2021',
#              'PDO-21942',
#              'RP-2315',
#              'Standard Germline Exome v6 Plus GSA v1 Array']

# cser_tags = [
#     "Consortium: CSER"
# ]

# setTags(ANVIL_DATA_STORAGE, "AnVIL_CSER_NCGENES2_GRU",cser_tags)

# print(getDefaultAccessToken())



SyntaxError: invalid syntax (467552736.py, line 138)

In [2]:
# dir(FAPI)
#help(FAPI)