In [None]:
### PLEASE COPY NOTEBOOKS TO YOUR FOLDERS TO PREVENT COMMIT CONFLICTS
from dcicutils import ff_utils
from functions.notebook_functions import *
import json

# get key from keypairs.json
my_env = 'data'
my_key = get_key('koray_data')
schema_name = get_schema_names(my_key) 
print('WORKING ON', my_key['server'], '\n')

##### COLLECT ITEMS TO Release #####
# use either a starting item to fetch all linked items

# Use a starting item to find linked ones
# starting_items = ['46db06ad-b399-4cf4-9acc-07b3e25ef132']
#add_items = get_query_or_linked(my_key, linked=starting_items)

# or a search query
#my_query = '/search/?q=GOLD&type=Item&limit=all'
#add_items = get_query_or_linked(my_key, query=my_query)

# if you want you can dump them to separate json files (will work as test insert)
# dump_to_json(add_items, destination folder)

my_query = '/search/?biosample.biosource.individual.organism.name=mouse&biosample.biosource_summary=ES-E14&experiment_type=in%20situ%20Hi-C&type=ExperimentHiC'
store = get_query_or_linked(my_key, query=my_query, linked_frame='raw')
print(store.keys())
print(len([i['uuid'] for key in store for i in store[key]]))
print()

find_linked = ['48732435-5a16-4d86-a0f6-ace18dc62b6c']
store = get_query_or_linked(my_key, linked=find_linked, linked_frame='raw')
print(store.keys())
print(len([i['uuid'] for key in store for i in store[key]]))

In [None]:
#### This part should only run once!

transfer_env = 'fourfront-mastertest'
transfer_key = ff_utils.get_authentication_with_server({}, ff_env=transfer_env)
# reverse lookup dictionary for schema names

# if the item exist in the target, should it overwrite it (will include user/award etc)
overwrite_existing = False

rev_schema_name = {}
for key, name in schema_name.items():
    rev_schema_name[name] = schema_name[key]

my_types = [i for i in ORDER if i in store.keys()]

second_round_items = {}

# Round I - only put the required - skip if exists already
for a_type in my_types:
    print(a_type)
    obj_type = rev_schema_name[a_type]
    # find required field
    schema_info = ff_utils.get_metadata('/profiles/{}.json'.format(a_type), key=transfer_key)
    req_fields = schema_info['required']
    ids = schema_info['identifyingProperties']
    first_fields = list(set(req_fields+ids))
    remove_existing_items = []
    counter=0
    print(len(store[a_type]), 'items exist on source')
    for an_item in store[a_type]:
        counter += 1
        
        if overwrite_existing:
            post_first = {key:value for (key,value) in an_item.items() if key in first_fields}
            ff_utils.post_metadata(post_first, obj_type, key = transfer_key)
        else:
            # does the item exist
            exists = False
            try:
                # TODO check with all identifiers
                existing = ff_utils.get_metadata(an_item['uuid'], key=transfer_key)
                exists = True
            except:
                exists = False
            # skip the items that exists
            if exists and existing:
                remove_existing_items.append(an_item['uuid'])
                print("{} {} can not post item".format(obj_type, an_item['uuid']))
                continue
            post_first = {key:value for (key,value) in an_item.items() if key in first_fields}
            ff_utils.post_metadata(post_first, obj_type, key = transfer_key)
   
    second_round_items[a_type] = [i for i in store[a_type] if i['uuid'] not in remove_existing_items]
    print(len(second_round_items[a_type]), 'items transfered to target')
    print()

    

In [None]:
# Round II - patch the rest of the metadata
for a_type in my_types:
    obj_type = rev_schema_name[a_type]
    if not second_round_items[a_type]:
        continue 
    for an_item in second_round_items[a_type]:
        counter += 1
        ff_utils.patch_metadata(an_item, obj_id = an_item['uuid'], key = transfer_key)

In [None]:
# Round III - move attachments
import boto3
s3 = boto3.resource('s3')

#source_addresses
source_health = ff_utils.get_metadata('/health', key = my_key)
source_raw = source_health['file_upload_bucket'] 
source_pf = source_health['processed_file_bucket'] 
source_att = source_health['blob_bucket']

#target_addresses
target_health = ff_utils.get_metadata('/health', key = transfer_key)
target_raw = target_health['file_upload_bucket'] 
target_pf = target_health['processed_file_bucket'] 
target_att = target_health['blob_bucket'] 

# Round III - move attachments
for a_type in my_types:
    obj_type = rev_schema_name[a_type]
    for an_item in second_round_items[a_type]:
        if 'attachment' in an_item.keys():
            at_key = an_item['attachment']['blob_id']
            copy_source = {'Bucket': source_att, 'Key': at_key}
            try:
                s3.meta.client.copy(copy_source, target_att, at_key)
            except:
                print('Can not find attachment on source', an_item['uuid'])
                continue
            print('attachment copied')


In [None]:
import boto3
s3 = boto3.resource('s3')
#source_addresses
source_health = ff_utils.get_metadata('/health', key = my_key)
source_raw = source_health['file_upload_bucket'] 
source_pf = source_health['processed_file_bucket'] 
source_att = source_health['blob_bucket']

#target_addresses
target_health = ff_utils.get_metadata('/health', key = transfer_key)
target_raw = target_health['file_upload_bucket'] 
target_pf = target_health['processed_file_bucket'] 
target_att = target_health['blob_bucket'] 

# Round IV - move files
for a_type in my_types:
    if a_type in ['file_processed']:
        source_file_bucket = source_pf
        target_file_bucket = target_pf
    elif a_type in ['file_reference', 'file_fastq', 'file_microscopy', 'file_fasta', 'file_calibration']:
        source_file_bucket = source_raw
        target_file_bucket = target_raw
    else:
        continue
        
    for an_item in second_round_items[a_type]:
        # accumulate all keys from a file object to be uploaded
        files_to_upload = []
        file_resp = ff_utils.get_metadata(an_item['uuid'], key = my_key)
        # add extra file keys
        if file_resp.get('extra_files', []):
            for an_extra_file in file_resp['extra_files']:
                files_to_upload.append(an_extra_file['upload_key'])
        # add main file key
        files_to_upload.append(file_resp['upload_key'])
        
        for file_key in files_to_upload:
            copy_source = {'Bucket': source_file_bucket, 'Key': file_key}
            try:
                s3.meta.client.copy(copy_source, target_file_bucket, file_key)
            except:
                print('Can not find file on source', file_key)
                continue
            print('file copied')
            
