In [None]:
from mutagene.profiles import Sample
from mutagene.signatures import Identify
import json
import numpy as np
from glob import glob
from subprocess import Popen, PIPE

In [None]:
def get_sub_seq():
    seq = []
    for p5 in "ACGT":
        for p3 in "ACGT":
            for x,y in [('C','A'),('C','G'),('C','T'),('T','A'),('T','C'),('T','G')]:
                seq.append(f"{p5}[{x}>{y}]{p3}")
    return seq

def deconstruct_sigs(profile):
    script1 = """
library(jsonlite)
library(deconstructSigs)
s <- data.frame(t(data.frame(sample=c{})))
colnames(s) <- c{}
w <- whichSignatures(
    tumor.ref = s / sum(s),
    sample.id = 'sample',
    signatures.ref=signatures.cosmic,
    signature.cutoff=0.00)
toJSON(w)
"""
    script2 = script1.format(tuple(profile), tuple(get_sub_seq())).encode("utf-8")

    proc = Popen(["Rscript", "-"], stdin=PIPE, stdout=PIPE, stderr=PIPE)
    out, err = proc.communicate(script2, timeout=10)
    exitcode = proc.returncode
    if exitcode:
        print("Exit code:", exitcode, "Out:", out.decode("utf-8"), "Error:", err.decode("utf-8"), sep="\n\n")
        raise Exception("R error")
    json_string = out.decode("utf-8")
    w = json.loads(json_string)
    # import pprint
    # pprint.pprint(w)
    result = []
    for k, v in w['weights'][0].items():
        if k.startswith('_row'):
            continue
        # if float(v) == 0.0:
        #     continue
        name = k.replace('Signature.', '')
        result.append({
            'name': name,
            'score': v})
    return result

In [None]:
# Find sample files
sample_files = glob('samples/*/data_mutations_mskcc.txt')
list(enumerate(sample_files))

In [None]:
# Get samples from multisample file
samples = Sample.multisample(sample_files[14], 'samples/hg19.2bit')

In [None]:
profs = [s.profile for s in samples]
sel_prof = max(samples, key=lambda s: s.profile)
samples.index(sel_prof)

In [None]:
sample_id = 6
print("PROFILE")
samples[sample_id].plot_profile()
print("MUTATION COUNT:", sum(samples[sample_id].profile))
print("MUTAGENE DECONSTRUCTION")
Identify(samples[sample_id].profile, 30, bootstrap=True).write_results()
print("DECONSTRUCTSIGS DECONSTRUCTION")
deconstruct_sigs(samples[sample_id].profile)