In [9]:
### PLEASE COPY NOTEBOOKS TO YOUR FOLDERS TO PREVENT COMMIT CONFLICTS

### This notebook will copy items from given environment to your local build
### it expects to have 'local' key in keypairs.json, if not, please handle your keys accordingly
from dcicutils import ff_utils
from functions.notebook_functions import *
import json

# get key from keypairs.json
my_env = 'data'
my_key = get_key('koray_data')
schema_name = get_schema_names(my_key) 
print('WORKING ON', my_key['server'], '\n')

find_linked = ['11f207be-ebc4-4622-8b42-02e7841d17db']
store, uuids = ff_utils.expand_es_metadata(find_linked, key = my_key, add_pc_wfr = True, ignore_field=['references', 'attachments'])

for key in store:
    print(key, len(store[key]))
print(len([i['uuid'] for key in store for i in store[key]]))

# replace workflows from inserts
insert_f = json.load(open('/Users/koray/Github/fourfront/src/encoded/tests/data/inserts/workflow.json'))
wf_uuids = [i['uuid'] for i in store['workflow']]
updated_wf = [i for i in insert_f if i['uuid'] in wf_uuids]
store['workflow'] = updated_wf

WORKING ON https://data.4dnucleome.org 

organism 1
award 8
target 4
biosample_cell_culture 2
workflow_run_awsem 44
workflow 6
static_section 1
lab 9
file_processed 23
file_reference 3
experiment_seq 10
file_format 7
vendor 4
experiment_set_replicate 1
individual_human 1
biosample 2
file_fastq 20
biosource 1
protocol 2
antibody 4
quality_metric_fastqc 16
ontology 2
quality_metric_chipseq 11
ontology_term 10
software 12
user 17
221


In [10]:
#### This part should only run once!

transfer_env = 'local'
transfer_key = get_key('local')

#### This part should only run once!

# if the item exist in the target, should it overwrite it (will include user/award etc)
overwrite_existing = True

# reverse lookup dictionary for schema names
rev_schema_name = {}
for key, name in schema_name.items():
    rev_schema_name[name] = schema_name[key]

my_types = [i for i in ORDER if i in store.keys()]

second_round_items = {}

# Round I - only put the required - skip if exists already
for a_type in my_types:
    print(a_type)
    obj_type = rev_schema_name[a_type]
    # find required field
    schema_info = ff_utils.get_metadata('/profiles/{}.json'.format(a_type), key=transfer_key)
    req_fields = schema_info['required']
    ids = schema_info['identifyingProperties']
    first_fields = list(set(req_fields+ids))
    remove_existing_items = []
    
    print(len(store[a_type]), 'items exist on source')
    posted = 0
    skip_exist = 0
    for an_item in store[a_type]:
        exists = False
        try:
            # TODO check with all identifiers
            existing = ff_utils.get_metadata(an_item['uuid'], key=transfer_key)
            exists = True
        except:
            exists = False
        # skip the items that exists, if overwrite is not allowed, they them out from patch list
        if exists and existing:
            skip_exist += 1
            if not overwrite_existing:
                remove_existing_items.append(an_item['uuid'])
            # print("{} {} can not post existing item".format(obj_type, an_item['uuid']))
            continue
        posted += 1
        post_first = {key:value for (key,value) in an_item.items() if key in first_fields}
        ff_utils.post_metadata(post_first, obj_type, key = transfer_key)
   
    second_round_items[a_type] = [i for i in store[a_type] if i['uuid'] not in remove_existing_items]
    print(posted, 'items posted,', skip_exist, 'existing items skipped')
    print(len(second_round_items[a_type]), 'items will be patched in second round')
    print()
    

user
17 items exist on source
11 items posted, 6 existing items skipped
17 items will be patched in second round

award
8 items exist on source
6 items posted, 2 existing items skipped
8 items will be patched in second round

lab
9 items exist on source
8 items posted, 1 existing items skipped
9 items will be patched in second round

static_section
1 items exist on source
0 items posted, 1 existing items skipped
1 items will be patched in second round

ontology
2 items exist on source
0 items posted, 2 existing items skipped
2 items will be patched in second round

ontology_term
10 items exist on source
3 items posted, 7 existing items skipped
10 items will be patched in second round

file_format
7 items exist on source
0 items posted, 7 existing items skipped
7 items will be patched in second round

organism
1 items exist on source
0 items posted, 1 existing items skipped
1 items will be patched in second round

target
4 items exist on source
4 items posted, 0 existing items skipped
4

In [11]:
# Round II - patch the rest of the metadata
for a_type in my_types:
    obj_type = rev_schema_name[a_type]
    if not second_round_items[a_type]:
        print(a_type, '- no items to patch')
        continue 
    for an_item in second_round_items[a_type]:
        if a_type == 'file_fastq':
            if 'extra_files' in an_item:
                del an_item['extra_files']
        ff_utils.patch_metadata(an_item, obj_id = an_item['uuid'], key = transfer_key)
    print(a_type, '- patched')

user - patched
award - patched
lab - patched
static_section - patched
ontology - patched
ontology_term - patched
file_format - patched
organism - patched
target - patched
vendor - patched
protocol - patched
biosample_cell_culture - patched
individual_human - patched
biosource - patched
antibody - patched
biosample - patched
quality_metric_fastqc - patched
quality_metric_chipseq - patched
file_fastq - patched
file_processed - patched
file_reference - patched
experiment_seq - patched
experiment_set_replicate - patched
software - patched
workflow - patched
workflow_run_awsem - patched


In [12]:

my_key = get_key('local')
run_sets = [ff_utils.get_metadata('11f207be-ebc4-4622-8b42-02e7841d17db', my_key)]


# Move files from opc to pc
from dcicutils import ff_utils
from functions.notebook_functions import *
from functions.wfr import *

action = True


# move other processed files to processed files field
def move_opc_to_pc(resp, move_title, con_key):
    opc = resp.get('other_processed_files')
    pc = resp.get('processed_files')
    # if processed_files field already has values, exit
    if pc:
        print('There are files in processed_files field, expected empty')
        return False
    # are there files in opc
    if not opc:
        print('there are no other processed files, skipping')
        return False
    # see if there are other_processed_files to move
    if opc:
        titles = [i['title'] for i in opc]
        if move_title in titles:
            print(resp['accession'], 'files will move')
            move_item = [i for i in opc if i['title'] == move_title]
            assert len(move_item) == 1
            assert move_item[0]['type'] == 'preliminary'
            new_pc = move_item[0]['files']
            new_opc = [i for i in opc if i['title'] != move_title]
            # Time to patch
            patch_data = {}
            add_on = ""
            #if there is something left in opc, patch it, if not delete field
            if new_opc:
                patch_data['other_processed_files'] = opc
            else:
                add_on = 'delete_fields=other_processed_files'
            # patch with processed files
            patch_data['processed_files'] = new_pc
            if action: 
                ff_utils.patch_metadata(patch_data, resp['uuid'], key = con_key, add_on = add_on)
                # update status of pc to status of set or exp
                release_files(resp['uuid'], new_pc, con_key)
            return True
        else:
            return False

        
    
set_w_apf = 0
exp_w_apf = 0
counter = 0
#move_title = 'HiC Processing Pipeline - Preliminary Files'
move_title = "ENCODE ChIP-Seq Pipeline - Preliminary Files"

print(len(run_sets), 'experiment sets in scope')
for a_set in run_sets:
    set_resp = ff_utils.get_metadata(a_set['uuid'],key=my_key, add_on='frame=raw')
    counter += 1
    print(counter, set_resp['accession'])
    exps = set_resp['experiments_in_set']
    res =  move_opc_to_pc(set_resp, move_title, my_key)
    if res:
        set_w_apf += 1
        print(set_resp['accession'], 'moved to pc')
  
    for exp in exps:
        exp_resp = ff_utils.get_metadata(exp, key=my_key, add_on='frame=raw')
        res_e =  move_opc_to_pc(exp_resp,move_title,my_key)
        if res_e:
            exp_w_apf += 1
            print(exp_resp['accession'], 'moved to pc')
    print()

print(set_w_apf)
print(exp_w_apf)

1 experiment sets in scope
1 4DNES9WNNK52
4DNES9WNNK52 files will move
4DNES9WNNK52 moved to pc
4DNEX1UOUWPM files will move
4DNEX1UOUWPM moved to pc
4DNEXD6Z69AC files will move
4DNEXD6Z69AC moved to pc

1
2
