In [None]:
from dcicutils import ff_utils
from functions.notebook_functions import *
from functions.wfr import *

# get key from keypairs.json
my_env = 'data'
my_key = get_key('koray_data')

add_wfr = True

# url for hic exps
pairs_url = "/search/?file_format=pairs&file_type=contact+list-replicate&file_type=contact+list-combined"+ \
            "&status=released&status=released+to+project&status=uploaded&type=FileProcessed" + \
            "&quality_metric.uuid=No+value&limit=all"
pairs_files = ff_utils.search_metadata(pairs_url , key=my_key)
print(len(pairs_files))

pairs_with_source = [i for i in pairs_files if i.get('source_experiments')]
print(len(pairs_with_source))


In [None]:
# for a given experiment set and some parameters like instrument
# print set of files and their partA hic workflow status
# if there are one that are running report the number of running cases
# if there are file pairs that don't have a corresponding part A, report them separately


counter = 0
completed = 0
okay = 0
non = 0
running = 0
for a_pairs in pairs_files:

    # get experiment info
    exp_acc = a_pairs.get('source_experiments')
    if not exp_acc:
        print(a_pairs['accession'], 'does not have source experiments, skipping')
        continue

    #get enzyme and chrsize files
    nz_num, chrsize = extract_nz_file(exp_acc[0], my_key)    
    if not nz_num:
        print(nz_num, chrsize)
        print(a_pairs['accession'], 'does not have RE/chrsize, skipping')
        continue
                         
    # get report
    report = get_wfr_out(a_pairs['accession'], 'pairsqc-single 0.2.5', my_key, md_qc=True)
    
    attributions = get_attribution(a_pairs)
    # if report does not provide a complete or running run
    if report['status'].startswith('no'):
        non += 1
        if add_wfr:
            parameters = {"enzyme": nz_num,
                          "sample_name": a_pairs['accession']} 
            # if mouse, parameters get an additional parameter
            if chrsize == '4DNFI3UBJ3HZ':
                parameters['max_distance']= 8.2
            
            run_guide = {'wf_name': 'pairsqc-single',
                         'wf_uuid': 'b8c533e0-f8c0-4510-b4a1-ac35158e27c3',
                         'parameters': parameters,
                         'wfr_meta': attributions}
            
            print(run_guide)
            inp_f = {'input_pairs':a_pairs['accession'], 'chromsizes':chrsize} 
            print(a_pairs['accession'], 'starting the run')
            run_missing_wfr(run_guide, inp_f, a_pairs['accession'], my_key, my_env)
        else:
            print(a_pairs['accession'], 'needs a run')
    
    # if there is a completed or still running run
    elif report['status'] == 'running':
        running += 1
        print(a_pairs['accession'], 'still running')
    elif report['status'] == 'complete':
        okay += 1
    # if there is something funky
    else:
        print('report status is not covered, please check')
        break

print("okay, not okay, running")    
print(okay, non, running)

In [None]:
# Release pairs qc
pairs_url = '/search/?file_format=pairs&status=released&status=released%20to%20project&type=FileProcessed&limit=all'
pairs_files = ff_utils.search_metadata(pairs_url , key=my_key)
pairs_files = [i for i in pairs_files if i.get('quality_metric')]
print(len(pairs_files))

patch = False
need_patch = 0
need_patch_run = 0
for pairs in pairs_files:   
    pairs_status = pairs['status']
    if pairs_status in ['released', 'released to project']:
        qc_uuid = pairs['quality_metric']['uuid']
        qc_status = ff_utils.get_metadata(qc_uuid,key=my_key)['status']
        if qc_status != pairs_status:
            need_patch += 1
            patch_data = {"status": pairs_status}
            if patch:
                ff_utils.patch_metadata(patch_data, obj_id=qc_uuid ,key=my_key)
        # find the workflow that produced this qc
        query = '/search/?type=WorkflowRunAwsem&output_quality_metrics.value.uuid=' + qc_uuid
        qc_run = ff_utils.search_metadata(query, key=my_key)[0]
        assert qc_run['run_status'] == 'complete'
        if qc_run['status'] != pairs_status:
            need_patch_run += 1
            patch_data = {"status": pairs_status}
            if patch:
                ff_utils.patch_metadata(patch_data, obj_id=qc_run['uuid'] ,key=my_key)

print(need_patch)
print(need_patch_run)