In [None]:
import wolf
import pandas as pd
from matplotlib import pyplot as plt
import os

In [None]:
params = {
    'maxperm': 1e6,
    'penalty_per_strike': 1.1,
    'base_min_effect_size': 1.01,
    'pCL_min_effect_size': 1.01,
    'pFN_min_effect_size': 1.01,
    'pCF_min_effect_size': 1.01,
    'min_neighbors': 3,
    'max_neighbors': 100,
    'min_mutations_in_bagel': 10
}

In [None]:
df_params = pd.Series(params)
df_params.to_csv('params.txt', sep='\t')
df_params

In [None]:
class MutSig2CV_v5(wolf.Task):
    inputs = {
      "maf" : None,
      "params_file" : None,
      "patient_weights_file" : "", # <- will be ignored by MutSig
      # TODO: update default params file to reference these locally
      "coverage_models_mat_file" : "gs://getzlab-workflows-reference_files-oa/MutSig2CV/hg38/coverage_models.agilent.mat",
      "target_list_file" : "gs://getzlab-workflows-reference_files-oa/MutSig2CV/hg38/target_list.gencode25.txt",
      "context_and_effect_fwb_file" : "gs://getzlab-workflows-reference_files-oa/MutSig2CV/hg38/context_and_effect_track.fwb",
      "context_and_effect_fwi_file" : "gs://getzlab-workflows-reference_files-oa/MutSig2CV/hg38/context_and_effect_track.fwi",
      "covariates_file" : "gs://getzlab-workflows-reference_files-oa/MutSig2CV/hg38/covariates.agilent.txt",

      # NOTE: these are reference agnostic
      "context_and_effect_categs_file" : "gs://getzlab-workflows-reference_files-oa/MutSig2CV/context_and_effect_dict.txt",
      "mutation_type_dictionary_file" : "gs://getzlab-workflows-reference_files-oa/MutSig2CV/mutation_type_dictionary.v6.txt",
      "FixedWidthBinary_jar_file" : "gs://getzlab-workflows-reference_files-oa/MutSig2CV/FixedWidthBinary.jar",

      # NOTE: these are ignored by MutSig on hg38 runs
      "basewise_coverage_fwb_file" : "gs://getzlab-workflows-reference_files-oa/MutSig2CV/coverage_basewise.fwb",
      "basewise_coverage_fwi_file" : "gs://getzlab-workflows-reference_files-oa/MutSig2CV/coverage_basewise.fwi",
      "conservation_fwb_file" : "gs://getzlab-workflows-reference_files-oa/MutSig2CV/conservation46.fwb",
      "conservation_fwi_file" : "gs://getzlab-workflows-reference_files-oa/MutSig2CV/conservation46.fwi"
    }
    script = """
# regenerate params file to point to inputs
/app/process_params_file.py ${params_file}

/app/MutSig2CV_v5 ${maf} outdir params_remapped.txt
tar czf results.tgz outdir
"""
    outputs = { "mutsig_results" : "*.tgz" }
    docker = "gcr.io/broad-getzlab-workflows/mutsig2cv_v5:v33"
    resources = { "cpus-per-task": 2, "mem" : "20G" } 

In [None]:
results = MutSig2CV_v5(
    inputs = {
        "maf" : 'MutSig_matched_pairs_2024_03_10.union_mafs.concat.tsv',
        "params_file" : 'params.txt',
        "coverage_models_mat_file" : "gs://getzlab-workflows-reference_files-oa/MutSig2CV/hg19/coverage_models.v5a.mat",
        "target_list_file" : "gs://getzlab-workflows-reference_files-oa/MutSig2CV/hg19/target_list.gencode19.v1.txt",
        "context_and_effect_fwb_file" : "gs://getzlab-workflows-reference_files-oa/MutSig2CV/hg19/context_and_effect.c65e29.gencode19.fwb",
        "context_and_effect_fwi_file" : "gs://getzlab-workflows-reference_files-oa/MutSig2CV/hg19/context_and_effect.c65e29.gencode19.fwi",
        "covariates_file" : "gs://getzlab-workflows-reference_files-oa/MutSig2CV/hg19/covariates_transformed.v5a.txt"
    }
).run()