# make

Goal: Implement the very basic Snakemake idea in Lua for the sole purpose of learning Lua

The workflow definition is written in Lua itself so we get free linting

References:

- https://www.lua.org/manual/5.4/manual.html

Goals:
- be able to execute simple things
- support `script` and `shell`
- Support pattern matching

No
- monitoring of resources
- parallel execution
- reentrancy


# helper

In [1]:
function readfile(f)
    local fh = assert(io.open(f, "r"), "Error opening file: " .. f)
    local data = fh:read("*all")
    fh:close()
    return data
end

In [2]:
readfile("make.in")


workflow{
	targets = {"joined_calls.vcf.gz"}
}
rule({
	name = "bwa map",
	input = {
		reffa = config.ref_fa,
		fastq = "fastq/{sample}.fastq.gz"
	},
	output = {
		"bam/{sample}.bam",
	},
	shell = [[
		touch {output[1]}
	]],
})

rule{
	name = "samtools index",
	input = {
		"bam/{sample}.bam",
	},
	output = {
		"bam/{sample}.bam.bai"
	},
	script = [[
		local f1, err1 = io.open(output[1], "a")
	]],
}

rule{
	name = "joined variant calling",
	input = {
		bams = expand{"bam/%s.bam", config.samples},
		bais = expand{"bam/%s.bam.bai", config.samples},
	},
	output = {
		"all.vcf",
	},
	shell = [[
		touch all.vcf
	]]
}

 


# exec

In [3]:
--[[
    would be nice to use the subprocess library, but that doesn't seem to be compatible
    with Lua 5.4 and creates extra funnies on OS X with clang (echo -n). so we are using
    temp files instead. redirection is still supported in cmd though
--]]
function exec(cmd)
    local f_o = os.tmpname()
    local f_e = os.tmpname()
    local cmd = string.format("sh -c %q > %q 2> %q", cmd, f_o, f_e)
    local p = io.popen(cmd)
    if not p then
        return nil, "Failed to open pipe"
    end
    local s = p:close() -- true or nil
    local o = readfile(f_o)
    local e = readfile(f_e)
    os.remove(f_o)
    os.remove(f_e)
    return s, o, e
end

In [4]:
local s, o, e = exec("echo hello world")
print("stdout:", o)
print("stderr:", e)
print("exit status:", s)

stdout: hello world
 
stderr:  
exit status: true 


In [5]:
local s, o, e = exec("false")
print("stdout:", o)
print("stderr:", e)
print("exit status:", s)

stdout:  
stderr:  
exit status: nil 


In [6]:
local s, o, e = exec("echo redirection works anyway > /tmp/meh.txt")
print("stdout:", o)
print("stderr:", e)
print("exit status:", s)

stdout:  
stderr:  
exit status: true 


# rule

In [7]:
config = {
    samples = {'A', 'B', 'C'}
}

In [8]:
function expand(format_string, values)
    return nil
end

In [9]:
workflow_definition = {}
function workflow(t)
    workflow_definition = t
end

In [10]:
rules = {}

function rule(t)
    assert(t.name ~= nil, "ERROR: no name defined for rule")
    assert(rules[t.name] == nil, "ERROR: rule '" .. t.name .. "' defined multiple times.")
    assert(t.input ~= nil, "ERROR: rule '" .. t.name .. "' has no input defined.")
    assert(t.output ~= nil, "ERROR: rule '" .. t.name .. "' has no ouput defined.")
    assert(t.shell ~= nil or t.script ~= nill, "ERROR: rule '" .. t.name .. "' has neither script nor shell defined.")

    rules[t.name] = t
end
dofile("make.in")


In [11]:
rules

{ 
  [bwa map] = { 
    input = { 
      fastq = fastq/{sample}.fastq.gz 
    },
    name = bwa map,
    output = { bam/{sample}.bam },
    shell = 		touch {output[1]}
	 
  },
  [joined variant calling] = { 
    input = {},
    name = joined variant calling,
    output = { all.vcf },
    shell = 		touch all.vcf
	 
  },
  [samtools index] = { 
    input = { bam/{sample}.bam },
    name = samtools index,
    output = { bam/{sample}.bam.bai },
    script = 		local f1, err1 = io.open(output[1], "a")
	 
  } 
} 


In [12]:
workflow_definition

{ 
  targets = { joined_calls.vcf.gz } 
} 


# expand

In [13]:
function expand(t)
    format_string = t[1]
    values = t[2]
    print("Got format_string ", format_string)
    print("Got values ", values)
    local results = {}
    for _, v in ipairs(values) do
        local expanded_str = string.format(format_string, v)
        table.insert(results, expanded_str)
    end
    return results
end

In [16]:
local SAMPLES = { "A", "B", "C" }
print(expand({"sorted_reads/%s.bam", SAMPLES}))


Got format_string  sorted_reads/%s.bam 
Got values  { A, B, C } 
{ sorted_reads/A.bam, sorted_reads/B.bam, sorted_reads/C.bam } 


In [19]:
local SAMPLES = { 1, 2, 3 }
print(expand({"sorted_reads/%d.bam", SAMPLES}))

Got format_string  sorted_reads/%d.bam 
Got values  { 1, 2, 3 } 
{ sorted_reads/1.bam, sorted_reads/2.bam, sorted_reads/3.bam } 


In [20]:
workflow_definition = {}
rules = {}
dofile("make.in")

Got format_string  bam/%s.bam 
Got values  { A, B, C } 
Got format_string  bam/%s.bam.bai 
Got values  { A, B, C } 


# To continue

- Fix expand above
- Construct graph
- etc