In [2]:
from dcicutils import ff_utils
from functions.notebook_functions import *
from tqdm import tqdm_notebook as tqdm
from functions.cleanup import *

my_env = 'data'
my_key = get_key('koray_data')

In [1]:
genome = 'None'
settings = [{
            "wf_name": "encode-chipseq-aln-chip",
            "wf_uuid": "4dn-dcic-lab:wf-encode-chipseq-aln-chip",
            "parameters": {},
            "config": {
                       "ebs_size": 0,
                       "ebs_type": "gp2",
                       "json_bucket": "4dn-aws-pipeline-run-json",
                       "EBS_optimized": "",
                       "ebs_iops": "",
                       "shutdown_min": "now",
                       "instance_type": "",
                       "password": "",
                       "log_bucket": "tibanna-output",
                       "key_name": "",
                       "cloudwatch_dashboard": True
            },
            'custom_pf_fields': {
                'chip.first_ta': {
                    'genome_assembly': genome,
                    'file_type': 'read positions',
                    'description': 'Positions of aligned reads in bed format, one line per read mate, for control experiment, from ENCODE ChIP-Seq Pipeline'},
                'chip.first_ta_xcor': {
                    'genome_assembly': genome,
                    'file_type': 'intermediate file',
                    'description': 'Counts file used only for QC'}
            }
        },
        {
            "wf_name": "encode-chipseq-aln-ctl",
            "wf_uuid": "4dn-dcic-lab:wf-encode-chipseq-aln-ctl",
            "parameters": {},
            "config": {
                "ebs_size": 0,
                "ebs_type": "gp2",
                "json_bucket": "4dn-aws-pipeline-run-json",
                "EBS_optimized": "",
                "ebs_iops": "",
                "shutdown_min": 'now',
                "instance_type": "",
                "password": "",
                "log_bucket": "tibanna-output",
                "key_name": "",
                "cloudwatch_dashboard": True
            },
            'custom_pf_fields': {
                'chip.first_ta_ctl': {
                    'genome_assembly': genome,
                    'file_type': 'read positions',
                    'description': 'Positions of aligned reads in bed format, one line per read mate, for control experiment, from ENCODE ChIP-Seq Pipeline'}
            }
        },
        {
            "wf_name": "encode-chipseq-postaln",
            "wf_uuid": "4dn-dcic-lab:wf-encode-chipseq-postaln",
            "parameters": {},
            "config": {
                "ebs_size": 0,
                "ebs_type": "gp2",
                "json_bucket": "4dn-aws-pipeline-run-json",
                "EBS_optimized": "",
                "ebs_iops": "",
                "shutdown_min": "now",
                "instance_type": "",
                "password": "",
                "log_bucket": "tibanna-output",
                "key_name": "",
                "cloudwatch_dashboard": True
            },
            'custom_pf_fields': {
                'chip.optimal_peak': {
                    'genome_assembly': genome,
                    'file_type': 'peaks',
                    'description': 'Peak calls from ENCODE ChIP-Seq Pipeline'},
                'chip.conservative_peak': {
                    'genome_assembly': genome,
                    'file_type': 'conservative peaks',
                    'description': 'Conservative peak calls from ENCODE ChIP-Seq Pipeline'},
                'chip.sig_fc': {
                    'genome_assembly': genome,
                    'file_type': 'signal fold change',
                    'description': 'ChIP-seq signal fold change over input control'}
            }
        },
        {
            "wf_name": "encode-atacseq-aln",
            "wf_uuid": "4dn-dcic-lab:wf-encode-atacseq-aln",
            "parameters": {},
            "config": {
                "ebs_size": 0,
                "ebs_type": "gp2",
                "json_bucket": "4dn-aws-pipeline-run-json",
                "EBS_optimized": "",
                "ebs_iops": "",
                "shutdown_min": 'now',
                "instance_type": "",
                "password": "",
                "log_bucket": "tibanna-output",
                "key_name": "",
                "cloudwatch_dashboard": True
            },
            'custom_pf_fields': {
                'atac.first_ta': {
                    'genome_assembly': genome,
                    'file_type': 'read positions',
                    'description': 'Positions of aligned reads in bed format, one line per read mate, from ENCODE ATAC-Seq Pipeline'}
            }
        },
        {
            "wf_name": "encode-atacseq-postaln",
            "wf_uuid": "4dn-dcic-lab:wf-encode-atacseq-postaln",
            "parameters": {},
            "config": {
                "ebs_size": 0,
                "ebs_type": "gp2",
                "json_bucket": "4dn-aws-pipeline-run-json",
                "EBS_optimized": "",
                "ebs_iops": "",
                "shutdown_min": "now",
                "instance_type": "",
                "password": "",
                "log_bucket": "tibanna-output",
                "key_name": "",
                "cloudwatch_dashboard": True
            },
            'custom_pf_fields': {
                'atac.optimal_peak': {
                    'genome_assembly': genome,
                    'file_type': 'peaks',
                    'description': 'Peak calls from ENCODE ATAC-Seq Pipeline'},
                'atac.conservative_peak': {
                    'genome_assembly': genome,
                    'file_type': 'conservative peaks',
                    'description': 'Conservative peak calls from ENCODE ATAC-Seq Pipeline'},
                'atac.sig_fc': {
                    'genome_assembly': genome,
                    'file_type': 'signal fold change',
                    'description': 'ATAC-seq signal fold change'}
            }
        },
        {
            "wf_name": "mergebed",
            "wf_uuid": "2b10e472-065e-43ed-992c-fccad6417b65",
            "parameters": {"sortv": "0"},
            "config": {
                "ebs_size": 0,
                "ebs_type": "gp2",
                "json_bucket": "4dn-aws-pipeline-run-json",
                "EBS_optimized": "",
                "ebs_iops": "",
                "shutdown_min": "now",
                "instance_type": "",
                "password": "",
                "log_bucket": "tibanna-output",
                "key_name": "",
                "cloudwatch_dashboard": True
            }
        }
    ]

In [13]:
for sett in settings:
    print()
    print(sett['wf_name'])
    url = '/search/?type=WorkflowRunAwsem&workflow.app_name=' + sett['wf_name']
    items = ff_utils.search_metadata(url, my_key)
    files_to_check = sett['custom_pf_fields']
    for i in items:
        print('+', end = '')
        for f in files_to_check:
            my_f = [o['value'] for o in i['output_files'] if o['workflow_argument_name'] == f][0]
            patch_data = {'file_type':files_to_check[f]['file_type'].strip(),
                          'description':files_to_check[f]['description'].strip()
                         }
            ff_utils.patch_metadata(patch_data, my_f['uuid'], my_key)
    print()


encode-chipseq-aln-chip
+++++++++++++++++++++++++++++++++++++++++++++++++

encode-chipseq-aln-ctl
+++++++++++++++++++++++++++++

encode-chipseq-postaln
++++++++++++++++++++++++++

encode-atacseq-aln
+++++++++++++++++++++++++

encode-atacseq-postaln
++++++++++++++++

mergebed
++
