In [1]:
## Utility to copy notebooks from one workspace to another, in an ad-hoc manner

# Imports
from firecloud import api as fapi
import json
import os
import pandas as pd
import csv
from io import StringIO
from google.cloud import storage
import re
import hashlib

# Functions
def get_notebooks_list(source_bucket, user_project, nbs_to_include=[], nbs_to_exclude=[]):
    
    # Collect list of objects/blobs from bucket 
    obj_list = []
    storage_client = storage.Client()
    storage_bucket = storage_client.bucket(source_bucket, user_project=user_project)
    objects = list(storage_client.list_blobs(storage_bucket, prefix = 'notebooks'))
    
    # Loop through list of objects and append names to final list
    for obj in objects:
        notebook_name = obj.name.split('/')[1]
        if len(nbs_to_include) > 0:
            for entry in nbs_to_include:
                if entry == notebook_name:
                    obj_list.append(notebook_name)
        else:
            obj_list.append(notebook_name)
        if len(nbs_to_exclude) > 0:
            for entry in nbs_to_exclude:
                if entry == notebook_name:
                    obj_list.remove(notebook_name)
    return obj_list

#print(get_notebooks_list('fc-9cd4583e-7855-4b5e-ae88-d8971cfd5b46'))
#print(get_notebooks_list('fc-9cd4583e-7855-4b5e-ae88-d8971cfd5b46', ['file_transforms.ipynb']))

def copy_notebooks(source_bucket, target_bucket, notebook_list):
    source_path = 'gs://' + source_bucket + '/notebooks'
    target_path = 'gs://' + target_bucket + '/notebooks'
    for item in notebook_list:
        !gsutil cp $source_path/$item $target_path/ 2> stdout

def copy_resources_directory(source_bucket, target_bucket):
    source_path = 'gs://' + source_bucket + '/ingest_pipeline/resources'
    target_path = 'gs://' + target_bucket + '/ingest_pipeline'
    !gsutil -m cp -r $source_path $target_path/

def copy_mapping_directory(source_bucket, target_bucket):
    source_path = 'gs://' + source_bucket + '/ingest_pipeline/mapping'
    target_path = 'gs://' + target_bucket + '/ingest_pipeline'
    !gsutil -m cp -r $source_path $target_path/
    
# Run ad-hoc copy
def run_copy(src_bucket, user_project, notebooks_to_include, notebooks_to_exclude, copy_resources, copy_mapping):
    nb_copy_list = get_notebooks_list(src_bucket, user_project, notebooks_to_include, notebooks_to_exclude)
    copy_notebooks(src_bucket, tar_bucket, nb_copy_list)
    if copy_resources:
        copy_resources_directory(src_bucket, tar_bucket)
    if copy_mapping:
        copy_mapping_directory(src_bucket, tar_bucket)


In [None]:
# Set parameters
user_project = "terra-92e58ed4"
src_bucket = "fc-2a9eefc3-0302-427f-9ac3-82f078741c03"
tar_bucket = "fc-bc3dad8b-b3f9-43c7-b100-e8ed59b27f43"
notebooks_to_include = [] # Leave empty to copy all notebooks
notebooks_to_exclude = []
copy_resources = True
copy_mapping = True

# Run copy
run_copy(src_bucket, user_project, notebooks_to_include, notebooks_to_exclude, copy_resources, copy_mapping)