This notebook shows how to use the SLiM script generator to create valid SLiM scripts of admixture.  

Jerome has suggested that I should be able to run this in basic ways without knowing any SLiM, but with a 'SLiM hook' that enables more advanced SLiM users, like Peter Ralph, to enter in their own lines of code.

First, let's consider what all SLiM scripts will (need to) have.

At their core, SLiM scripts contain lines of code inside particular 'generations'. The SLiM code inserter should, at minimum, search for the relevant 'generations' and if they don't exist, create it.

Note that all SLiM scripts must have an `initialize` and `1 early() {}` generation, so there should be there by default.

An example SLiM script to look at while I'm working on this: 

```
initialize() {
	
	// Set constants.
	defineConstant("chromLength", input_chrom_length);
	defineConstant("p0Size", 1000);
	defineConstant("p1Size", 1000);
	defineConstant("p2Size", 1000);
	defineConstant("p0Prop", input_p0Prop);
	defineConstant("p1Prop", 1 - p0Prop);

	initializeSLiMModelType("WF");
	initializeSLiMOptions(keepPedigrees = T); // Keep pedigree information
	initializeTreeSeq();
	initializeMutationRate(0);
	
	// m1 mutation type: neutral
	initializeMutationType("m1", 0.5, "f", 0.0);
	
	// g1 genomic element type: uses m1 for all mutations
	initializeGenomicElementType("g1", m1, 1.0);
	
	// uniform chromosome of length 100 kb
	initializeGenomicElement(g1, 0, chromLength);
	
	// read in chr22 recombination map.
	// See example 6.1.2 in the SLiM manual.
	lines = readFile("examples/chr22.recombination_map");
	lines = lines[1:(size(lines)-1)]; // remove header
	rates = NULL;
	ends = NULL;
	
	for (line in lines)
	{
		components = strsplit(line, " ");
		ends = c(ends, asInteger(components[1]));
		rates = c(rates, asFloat(components[2]));
	}
	
	ends = ends - 1; // final coordinate in file is 50 000 000
	rates = rates * 1e-8;
	
	initializeRecombinationRate(rates, ends);
	
}

1 early(){
	sim.addSubpop("p0", p0Size);
	sim.addSubpop("p1", p1Size);

//	sim.treeSeqRememberIndividuals(sim.subpopulations.individuals);
}

1 late() { 
	
	// Add admixing population.
	sim.addSubpop("p2", p2Size);
	p2.setMigrationRates(c(p0,p1),c(p0Prop, p1Prop));
	
}


2 late() {

	p2.setMigrationRates(c(p0,p1),c(0.0,0.0));

}


// save the tree output after 200 generations.
200 late() {
 	sim.treeSeqOutput("examples/recent-history.trees");
	sim.simulationFinished();
}
```

In [1]:
import re

In [2]:
class RecentHistory(object):
    """Creates a SLiM script of recent history that the user
    can feed into SLiMe."""
    def __init__(self, outfile='recent-history.slim', model_type='WF'):
        self.outfile = outfile
        self.model_type = model_type
        self.script = """
initialize(){
    initializeSLiMModelType("%s");
    initializeTreeSeq();
}    

1 early(){      
}""" % self.model_type
    def dump_script(self):
        return(self.script)
    
    def print_script(self):
        print(self.script)
        
    def time_already_in_script(self, generation, time):
        if not (time == 'early' or time == 'late'):
            StringError("The event time must be either 'early' or 'late'.")
        gen_time = "%i %s()" % (generation, time)
        return(gen_time in self.script)
    
    def find_event_index(self, generation, time, INSERT_AT_START):
        """Finds the index of the script at which a new event should
        be inserted."""
        gen_time = "%i %s(){" % (generation, time)
        gen_location = self.script.find("%s" % gen_time)
        start_loc = gen_location + len(gen_time)
        if INSERT_AT_START:
            return(start_loc)
        else:
            rest_of_script = self.script[start_loc:]
            end_loc = rest_of_script.find('\n}')
            return(start_loc + end_loc)
               
    def find_time_index(self, generation, time):
        """Finds the index of the script at which a new generation and
        time should be inserted."""
        regex = re.compile(r"\d+ \bearly\b\(\)\{|\d+ \blate\b\(\)\{")
        times = regex.findall(self.script)
        gen_regex =re.compile("\d+")
        time_regex=re.compile(r"\bearly\b|\blate\b")
        BREAK_TRIGGERED = 0
        for m in times:
            current_gen = int(gen_regex.search(m).group())
            current_time = time_regex.search(m).group()
#             print(current_gen, current_time)
            if current_gen > generation or (current_gen == generation and time == 'early'):
                BREAK_TRIGGERED = 1
                break
        if BREAK_TRIGGERED:
            gen_time = "%i %s(){" % (current_gen, current_time)
            gen_location = self.script.find("%s" % gen_time)
        else:
            gen_location = len(self.script)
        return(gen_location, BREAK_TRIGGERED)
        
    def add_event(self, generation, time, event, start=False):
        if not self.time_already_in_script(generation, time):
            time_ind, NOT_AT_END = self.find_time_index(generation, time)
            if NOT_AT_END:
                new_script = self.script[:time_ind] + """%i %s(){
}
""" % (generation, time) + "\n" + self.script[time_ind:]
            else:
                new_script = self.script + "\n" + """
%i %s(){
}""" % (generation, time)
            self.script = new_script
        event_ind = self.find_event_index(generation, time, start)
        new_script = self.script[:event_ind] + """
    %s""" % event + ";" + self.script[event_ind:]
        self.script = new_script
        
    def add_simulation_end(self, generation):
        command_to_save_out = "sim.treeSeqOutput(\"%s\")" % self.outfile
        self.add_event(generation, "late", command_to_save_out)
        self.add_event(generation, "late", "sim.simulationFinished()")
        

In [3]:
scr = RecentHistory()

In [4]:
scr.add_event(1, 'early', 'start of simulation')

In [5]:
scr.print_script()


initialize(){
    initializeSLiMModelType("WF");
    initializeTreeSeq();
}    

1 early(){      
    start of simulation
}


In [6]:
scr.add_event(3, 'late', 'the end of the simulation')
scr.print_script()


initialize(){
    initializeSLiMModelType("WF");
    initializeTreeSeq();
}    

1 early(){      
    start of simulation
}

3 late(){
    the end of the simulation
}


In [7]:
scr.dump_script()

'\ninitialize(){\n    initializeSLiMModelType("WF");\n    initializeTreeSeq();\n}    \n\n1 early(){      \n    start of simulation\n}\n\n3 late(){\n    the end of the simulation\n}'

In [8]:
scr.add_event(2, 'late', 'the middle of the script')
scr.print_script()


initialize(){
    initializeSLiMModelType("WF");
    initializeTreeSeq();
}    

1 early(){      
    start of simulation
}

2 late(){
    the middle of the script
}

3 late(){
    the end of the simulation
}


In [9]:
scr.add_event(5, 'late', 'the new end of the script')
scr.print_script()


initialize(){
    initializeSLiMModelType("WF");
    initializeTreeSeq();
}    

1 early(){      
    start of simulation
}

2 late(){
    the middle of the script
}

3 late(){
    the end of the simulation
}

5 late(){
    the new end of the script
}


In [10]:
scr.add_event(4, 'early', 'more middle of the script')
scr.print_script()


initialize(){
    initializeSLiMModelType("WF");
    initializeTreeSeq();
}    

1 early(){      
    start of simulation
}

2 late(){
    the middle of the script
}

3 late(){
    the end of the simulation
}

4 early(){
    more middle of the script
}

5 late(){
    the new end of the script
}


### Next: let's add in some necessary bits.

- output

In [11]:
scr.add_simulation_end(5)
scr.print_script()


initialize(){
    initializeSLiMModelType("WF");
    initializeTreeSeq();
}    

1 early(){      
    start of simulation
}

2 late(){
    the middle of the script
}

3 late(){
    the end of the simulation
}

4 early(){
    more middle of the script
}

5 late(){
    the new end of the script
    sim.treeSeqOutput("recent-history.slim");
    sim.simulationFinished();
}


It would be super useful to be able to add stuff to the end of the simulations here.
Let's play around a bit.

In [12]:
myscr = scr.dump_script()
regex = re.compile(r"\d+ \bearly\b\(\)\{.+\}|\d+ \blate\b\(\)\{.+\}", flags=re.DOTALL)
regex.findall(myscr)

['1 early(){      \n    start of simulation\n}\n\n2 late(){\n    the middle of the script\n}\n\n3 late(){\n    the end of the simulation\n}\n\n4 early(){\n    more middle of the script\n}\n\n5 late(){\n    the new end of the script\n    sim.treeSeqOutput("recent-history.slim");\n    sim.simulationFinished();\n}']

In [13]:
str_to_search = "Bewere the werewolf."
str_to_search.find('we')

2

In [14]:
scr.add_event(5, 'late', 'at the end of the end', False)
scr.print_script()


initialize(){
    initializeSLiMModelType("WF");
    initializeTreeSeq();
}    

1 early(){      
    start of simulation
}

2 late(){
    the middle of the script
}

3 late(){
    the end of the simulation
}

4 early(){
    more middle of the script
}

5 late(){
    the new end of the script
    sim.treeSeqOutput("recent-history.slim");
    sim.simulationFinished();
    at the end of the end
}


In [22]:
myl = [1,2,3,4, 4,5, 3]
sorted(myl)
myl.index(3)

2

In [26]:
scr = RecentHistory()
scr.add_event(1, 'early', 'start of simulation')
scr.add_event(3, 'late', 'second event')
scr.add_event(3, 'late', 'first event', start=True)
scr.add_event(2, 'late', 'the middle of the script')
scr.add_event(5, 'late', 'the end of the script')
scr.add_event(3, 'late', 'third event')
scr.add_event(4, 'early', 'more middle of the script')
scr.print_script()


initialize(){
    initializeSLiMModelType("WF");
    initializeTreeSeq();
}    

1 early(){      
    start of simulation
}

2 late(){
    the middle of the script
}

3 late(){
    first event
    second event
    third event
}

4 early(){
    more middle of the script
}

5 late(){
    the end of the script
}
