In [13]:
# Dekker lab biological replicate addition to micro-C experiments
# old and new set lists
# first updated the descriptions of the new sets, and deleted an additional empty set Ankita submitted
# All sets have the correct replicate number, so I just need to combine them

from dcicutils import ff_utils
from functions.notebook_functions import *
import json
import time
from datetime import datetime

workon = [['4DNESYTWHUH6', '51d9ac15-e55e-4f64-85e5-01998e349cf1'], # hff6 ones
          ['4DNES21D8SP8', '998573fb-384e-4792-a621-254d705d4ec9']  # h1 ones
         ]

action = True

def conv_time(time_info):
    """Convert date_created date_modified to datetime object for time operations"""
    time_info, zone_info = time_info.split('+')
    assert zone_info == '00:00'
    try:
        time_info = datetime.strptime(time_info, '%Y-%m-%dT%H:%M:%S.%f')
    except ValueError:  # items created at the perfect second
        time_info = datetime.strptime(time_info, '%Y-%m-%dT%H:%M:%S')
    return time_info

def fetch_pf_associated(pf_id, my_key):
    """Given a file accession, find all related items
    1) QCs
    2) wfr producing the file, and other outputs from the same wfr
    3) wfrs this file went as input, and all files/wfrs/qcs around it
    The returned list might contain duplicates, uuids and display titles for qcs"""
    file_as_list = []
    pf_info = ff_utils.get_metadata(pf_id, my_key)
    file_as_list.append(pf_info['uuid'])
    if pf_info.get('quality_metric'):
        file_as_list.append(pf_info['quality_metric']['uuid'])
    inp_wfrs = pf_info.get('workflow_run_inputs')
    out_wfr = pf_info.get('workflow_run_outputs')[0]
    for inp_wfr in inp_wfrs:
        file_as_list.extend(fetch_wfr_associated(inp_wfr['uuid'], my_key))
    file_as_list.extend(fetch_wfr_associated(out_wfr['uuid'], my_key))
    return list(set(file_as_list))
        
                
def fetch_wfr_associated(wfr_uuid, my_key):
    """Given wfr_uuid, find associated output files and qcs"""
    wfr_as_list = []
    wfr_info = ff_utils.get_metadata(wfr_uuid, my_key)
    wfr_as_list.append(wfr_info['uuid'])
    if wfr_info.get('output_files'):
        for o in wfr_info['output_files']:
                if o.get('value'):
                    wfr_as_list.append(o['value']['uuid'])
                elif o.get('value_qc'):
                    wfr_as_list.append(o['value_qc']) # this is a @id
    if wfr_info.get('output_quality_metrics'):
        for qc in wfr_info['output_quality_metrics']:
            if qc.get('value'):
                wfr_as_list.append(qc['value']['uuid'])
    return wfr_as_list


my_key = get_key('koray_data')
for new_set, old_set in workon:
    print()
    old_set_info = ff_utils.get_metadata(old_set, my_key, add_on='frame=raw')
    new_set_info = ff_utils.get_metadata(new_set, my_key, add_on='frame=raw')
    if old_set_info['status'] == 'replaced':
        print('old set already replaced, skipping')
    print('Combining {} into {}'.format(old_set_info['accession'], new_set_info['accession']))
    # assert new one is older the old one
    assert conv_time(old_set_info['date_created']) < conv_time(new_set_info['date_created'])
    # combine rep exps
    new_rep = new_set_info['replicate_exps'] + old_set_info['replicate_exps']
    new_rep = sorted(new_rep, key=lambda k: [k['bio_rep_no'],k['tec_rep_no']])
    # assert unique bio tec reps
    tec_bio = [str(i['bio_rep_no'])+'_'+str(i['tec_rep_no']) for i in new_rep]
    try:
        assert len(new_rep) == len(list(set(tec_bio)))
    except AssertionError:
        print('same rep numbers are used, either merged already happened, or conflicting numbers in both sets, skipping')
        continue
    ans = input('Continue with this rep numbers formatted b_t (y/n):\n{}\n'.format(tec_bio))
    if ans != 'y':
        break
    # patch the new set with the new rep info
    if action:
        ff_utils.patch_metadata({'replicate_exps': new_rep}, new_set, my_key)
        print(new_set, ' replicates are updated')
    
    # are there processed files/ other processed files and wfrs/qcs that need to be archived
    # will collect items on processed_files and other_processed_files fields, and their asociated items
    # (only 1 level of wfrs)
    archive_files = []
    if old_set_info.get('other_processed_files'):
        for case in old_set_info['other_processed_files']:
            archive_files.extend(case['files'])  # add all files to archive_list
            case['type'] = 'archived'
        if action:
            ff_utils.patch_metadata({'other_processed_files': old_set_info['other_processed_files']}, old_set
                                    , my_key)
    if old_set_info.get('processed_files'):
        archive_files.extend(old_set_info['processed_files'])
    archive_list = []
    for ar_file in archive_files:
        archive_list.extend(fetch_pf_associated(ar_file, my_key))
    print(len(archive_list), 'associated items will be archived')
    for an_item in archive_list:
        if action:
            ff_utils.patch_metadata({'status': 'archived'}, an_item, my_key)

        



Combining 4DNESCZJD9KK into 4DNESYTWHUH6
same rep numbers are used, either merged already happened, or conflicting numbers in both sets, skipping

Combining 4DNESRCFT5AI into 4DNES21D8SP8
Continue with this rep numbers formatted b_t (y/n):
['1_1', '1_2', '1_3', '1_4', '2_1', '2_2', '2_3', '2_4', '2_5']
y
4DNES21D8SP8  replicates are updated
16 associated items will be archived


In [18]:
### PLEASE COPY NOTEBOOKS TO YOUR FOLDERS TO PREVENT COMMIT CONFLICTS

# will perform patches/posts if set to true
action = True
# reason for replacement
reason = 'new biological replicates were added'

for new_set, old_set in workon:
    old_set_info = ff_utils.get_metadata(old_set, my_key)
    new_set_info = ff_utils.get_metadata(new_set, my_key)
    old_acc = old_set_info['accession']
    new_acc = new_set_info['accession']
    old_status = old_set_info['status']
    new_status = new_set_info['status']

    # convert status for the static section
    if old_status in ['in review by lab' , 'submission in proggress', 'pre-release']:
        old_status = 'draft'
    if new_status in ['in review by lab' , 'submission in proggress', 'pre-release']:
        new_status = 'draft'

    # add headers to old and new set
    old_alias = "static_header:replaced_item_{}_by_{}".format(old_acc, new_acc)
    old_header = {
      "body": "This experiment set was replaced by [{0}](https://data.4dnucleome.org/experiment-set-replicates/{0}/) because {1}.".format(new_acc, reason),
      "award": old_set_info['award']['uuid'],
      "lab": old_set_info['lab']['uuid'],            
      "name": "static-header.replaced_item_{}".format(old_acc),
      "section_type": "Item Page Header",
      "options": {"title_icon": "info", "default_open": True, "filetype": "md", "collapsible": False},
      "title": "Note: Replaced Item - {}".format(old_acc),
      "status": old_status,
      "aliases": [old_alias]
    }
    new_alias = "static_header:replacing_item_{}_old_{}".format(new_acc, old_acc)
    new_header = {
      "body": "This experiment set supercedes [{0}](https://data.4dnucleome.org/experiment-set-replicates/{1}/) because {2}.".format(old_acc, old_set_info['uuid'], reason),
      "award": new_set_info['award']['uuid'],
      "lab": new_set_info['lab']['uuid'],
      "name": "static-header.replacing_item_{}".format(new_acc),
      "section_type": "Item Page Header",
      "options": {"title_icon": "info", "default_open": True, "filetype": "md", "collapsible": False},
      "title": "Note: Replacing Item - {}".format(new_acc),
      "status": new_status,
      "aliases": [new_alias]
    }

    if action:
        # post the static sections
        try:
            old_h_resp = ff_utils.post_metadata(old_header, 'StaticSection', my_key)['@graph'][0]
            print(old_h_resp)
        except:
            print('old header already in system')
            old_h_resp = ff_utils.get_metadata(old_alias, my_key)

        try:
            new_h_resp = ff_utils.post_metadata(new_header, 'StaticSection', my_key)['@graph'][0]
        except:
            print('new header already in system')
            new_h_resp = ff_utils.get_metadata(new_alias, my_key) 

        #see if existing headers
        old_header_list = []
        new_header_list = []
        if old_set_info.get('static_headers'):
            old_header_list = [i['uuid'] for i in old_set_info['static_headers']]
        if new_set_info.get('static_headers'):
            new_header_list = [i['uuid'] for i in new_set_info['static_headers']]
        # add new ones to the list
        if old_h_resp['uuid'] in old_header_list:
            pass
        else:
            old_header_list.append(old_h_resp['uuid'])
        if new_h_resp['uuid'] in new_header_list:
            pass
        else:
            new_header_list.append(new_h_resp['uuid'])

        # set the status of old set to replaced
        ff_utils.patch_metadata({'status':'replaced', 'static_headers': old_header_list},
                                obj_id=old_set_info['uuid'], key=my_key)
        # wait for indexing to take place
        # you might need to repeat this last piece separately if indexing does not catch up
        # new status needs to be indexed for alternate accession to be patched
        time.sleep(60)
        # set the alternate accession on the new set to the old one
        alt_ac = []
        if new_set_info.get('alternate_accessions'):
            alt_ac = new_set_info['alternate_accessions']
        alt_ac.append(old_acc)
        alt_ac = list(set(alt_ac))
        ff_utils.patch_metadata({'alternate_accessions':alt_ac, 'static_headers': new_header_list},
                                obj_id=new_set_info['uuid'], key=my_key)

    print('DONE')
    print('CHECK THE OLD SET', 'https://data.4dnucleome.org/experiment-set-replicates/{}/'.format(old_set_info['uuid']))
    print('CHECK THE NEW SET', 'https://data.4dnucleome.org/experiment-set-replicates/{}/'.format(new_acc))

old header already in system
new header already in system
DONE
CHECK THE OLD SET https://data.4dnucleome.org/experiment-set-replicates/51d9ac15-e55e-4f64-85e5-01998e349cf1/
CHECK THE NEW SET https://data.4dnucleome.org/experiment-set-replicates/4DNESYTWHUH6/
old header already in system
new header already in system
DONE
CHECK THE OLD SET https://data.4dnucleome.org/experiment-set-replicates/998573fb-384e-4792-a621-254d705d4ec9/
CHECK THE NEW SET https://data.4dnucleome.org/experiment-set-replicates/4DNES21D8SP8/


In [19]:
# pre_relase the sets
for acc in ['4DNESYTWHUH6', '4DNES21D8SP8']:
    ff_utils.patch_metadata({'status': 'pre-release'}, acc, my_key)