In [1]:
import flash
from Bio import SeqIO
from ortools.linear_solver import pywraplp
from common import Gene, Target

In [2]:
input_file = '../../dashit_data/human/RN45s.fasta'

In [3]:
#class Gene:
#    def __init__(self, seq):
#        self.seq = seq
#        self.crispr_sites = []
#        
#    def add_crispr_site(self, i, direction):
#        self.crispr_sites.append((i, direction))
#
#    def get_crispr_site(self, i, direction):
#        if direction == 'F':
#            return self.seq[i:i+20]
#        elif direction == 'R':
#            return flash.reverse_complement(self.seq[i+3:i+23])
        
    
        
def read_genes_from_file(filename):
    sequences = SeqIO.parse(open(filename), 'fasta')

    genes = []
    
    for sequence in sequences:
        new_gene = Gene('hi')
        new_gene.seq = sequence.seq
        new_gene.targets = []
           
        for i in flash.kmers_range(new_gene.seq, 23):
            if 'G' == new_gene.seq[i+21] == new_gene.seq[i+22]:
                new_gene.targets.append(Target(flash.forward_20mer_at(new_gene.seq, i, 'F'),
                                               flash.cut_location((i, 'F'))))
            if 'C' == sequence.seq[i] == sequence.seq[i+1]:
                new_gene.targets.append(Target(flash.forward_20mer_at(new_gene.seq, i, 'R'),
                                               flash.cut_location((i, 'R'))))

                genes.append(new_gene)
    return genes

In [4]:
genes = read_genes_from_file(input_file)

hi  is missing a fasta file.


In [5]:
gene = genes[0]
gene.display_gene_cuts()

Need to cut gene first.


0

In [6]:
# Create solver & variables
solver = pywraplp.Solver('SolveIntegerProblem',
                            pywraplp.Solver.CBC_MIXED_INTEGER_PROGRAMMING)

objective = solver.Objective()

site_variables = []

for i, t in enumerate(gene.targets):
    site_variables.append(solver.IntVar(0, 1, 'x_{}'.format(i)))
    objective.SetCoefficient(site_variables[-1], 1)

objective.SetMinimization()

In [7]:
type(solver)

ortools.linear_solver.pywraplp.Solver

In [26]:
def create_minimum_spacing_constraints(gene, min_spacing, coverage_width, solver):
    # The constraint here is x_i + x_j <= 1
    coverage_constraints = set()
    spacing_constraints = set()
    
    for i, (seq, cut_loc) in enumerate(gene.targets):        
        nearby_sites = [ j for j, (s, l) in enumerate(gene.targets) if l != cut_loc and abs(l - cut_loc) < min_spacing ]
        
        if len(nearby_sites) > 0:
            spacing_constraints.add(tuple(solver.LookupVariable('x_{}'.format(j)) for j in nearby_sites + [i]))

        nearby_sites = [ j for j, (s, l) in enumerate(gene.targets) if l != cut_loc and abs(l - cut_loc) < coverage_width ]

        if len(nearby_sites) > 0:
            coverage_constraints.add(tuple(solver.LookupVariable('x_{}'.format(j)) for j in nearby_sites + [i]))
            
    for constraint in spacing_constraints:
        new_constraint = solver.Constraint(0, 1)
        for variable in constraint:
            new_constraint.SetCoefficient(variable, 1)
            
    for constraint in coverage_constraints:
        new_constraint = solver.Constraint(1, solver.Infinity())
        for variable in constraint:
            new_constraint.SetCoefficient(variable, 1)            
            
    return spacing_constraints, coverage_constraints

In [27]:
hi, bye = create_minimum_spacing_constraints(gene, 50, 200, solver)

In [28]:
result_status = solver.Solve()

In [76]:
result_status

0

In [19]:
solver.Objective().Value()

34.0

In [81]:
site_variables[3].solution_value()

0.0

In [33]:
gene.cuts = []
for i, v in enumerate(site_variables):
    if v.solution_value() > 0.0:
        gene.cuts.append(gene.targets[i])
        
gene.generate_fragments_from_cuts()

In [34]:
gene.display_gene_cuts()

Mutation_ranges were never set.

[47mG[0m[47mC[0m[47mT[0m[47mG[0m[47mA[0m[47mC[0m[47mA[0m[47mC[0m[47mG[0m[47mC[0m[47mT[0m[47mG[0m[47mT[0m[47mC[0m[47mC[0m[47mT[0m[47mC[0m[47mT[0m[47mG[0m[47mG[0m[47mC[0m[47mG[0m[47mA[0m[47mC[0m[47mC[0m[47mT[0m[47mG[0m[47mT[0m[47mC[0m[47mG[0m[47mC[0m[47mT[0m[47mG[0m[47mG[0m[47mA[0m[47mG[0m[47mA[0m[47mG[0m[47mG[0m[47mT[0m[47mT[0m[47mG[0m[47mG[0m[47mG[0m[47mC[0m[47mC[0m[47mT[0m[47mC[0m[47mC[0m[47mG[0m[47mG[0m[47mA[0m[47mT[0m[47mG[0m[47mC[0m[47mG[0m[47mC[0m[47mG[0m[47mC[0m[47mG[0m[47mG[0m[47mG[0m[47mG[0m[47mC[0m[47mT[0m[47mC[0m[47mT[0m[47mG[0m[47mG[0m[47mC[0m[47mC[0m[47mT[0m[47mA[0m[47mC[0m[47mC[0m[47mG[0m[47mG[0m[47mT[0m[47mG[0m[47mA[0m[47mC[0m[47mC[0m[47mC[0m[47mG[0m[47mG[0m[47mC[0m[47mT[0m[47mA[0m[47mG[0m[47mC[0m[47mC[0m[47mG[0m[47mG[0m[47mC[0m[47mC[0m[47mG[0m[47mC

[42mC[0m[42mG[0m[42mC[0m[42mC[0m[42mC[0m[42mC[0m[42mT[0m[42mT[0m[42mG[0m[42mC[0m[42mC[0m[42mT[0m[42mC[0m[42mT[0m[42mC[0m[42mG[0m[42mG[0m[42mC[0m[42mG[0m[42mC[0m[42mC[0m[42mC[0m[42mC[0m[42mC[0m[42mT[0m[42mC[0m[42mG[0m[42mA[0m[42mT[0m[42mG[0m[42mC[0m[42mT[0m[42mC[0m[42mT[0m[42mT[0m[42mA[0m[42mG[0m[42mC[0m[42mT[0m[42mG[0m[42mA[0m[42mG[0m[42mT[0m[42mG[0m[42mT[0m[42mC[0m[42mC[0m[42mC[0m[42mG[0m[42mC[0m[42mG[0m[42mG[0m[42mG[0m[42mG[0m[42mC[0m[42mC[0m[42mC[0m[42mG[0m[42mA[0m[42mA[0m[42mG[0m[42mC[0m[42mG[0m[42mT[0m[42mT[0m[42mT[0m[42mA[0m[42mC[0m[42mT[0m[42mT[0m[42mT[0m[42mG[0m[42mA[0m[42mA[0m[42mA[0m[42mA[0m[42mA[0m[42mA[0m[42mT[0m[42mT[0m[42mA[0m[42mG[0m[42mA[0m[42mG[0m[42mT[0m[42mG[0m[42mT[0m[42mT[0m[42mC[0m[42mA[0m[42mA[0m[41m|[0m[42mA[0m[42mG[0m[42mC[0m[42mA[0m[42mG[0m[42mG[0m[42mC[0m[42mC[0m

[42mC[0m[42mG[0m[42mC[0m[42mG[0m[42mA[0m[42mG[0m[42mC[0m[42mC[0m[42mG[0m[42mG[0m[42mG[0m[42mC[0m[42mC[0m[42mC[0m[42mT[0m[42mT[0m[42mC[0m[42mC[0m[42mC[0m[42mG[0m[42mT[0m[42mG[0m[42mG[0m[42mA[0m[42mT[0m[42mC[0m[42mG[0m[42mC[0m[42mC[0m[42mC[0m[42mC[0m[42mA[0m[42mG[0m[42mC[0m[42mT[0m[42mG[0m[42mC[0m[42mG[0m[42mG[0m[42mC[0m[42mG[0m[42mG[0m[42mG[0m[42mC[0m[42mG[0m[42mT[0m[42mC[0m[42mG[0m[42mC[0m[42mG[0m[42mG[0m[42mC[0m[42mC[0m[42mG[0m[42mC[0m[42mC[0m[42mC[0m[42mC[0m[42mC[0m[42mG[0m[42mG[0m[42mG[0m[42mG[0m[42mA[0m[42mG[0m[42mC[0m[42mC[0m[42mC[0m[42mG[0m[42mG[0m[42mC[0m[42mG[0m[42mG[0m[42mG[0m[42mC[0m[42mG[0m[42mC[0m[42mC[0m[42mG[0m[42mG[0m[42mC[0m[42mG[0m[42mC[0m[42mG[0m[42mC[0m[42mC[0m[42mC[0m[42mC[0m[42mC[0m[42mC[0m[42mC[0m[42mC[0m[42mC[0m[42mC[0m[42mC[0m[42mA[0m[42mC[0m[42mC[0m[42mC[0m[42mC[0m

[47mC[0m[47mC[0m[47mG[0m[47mA[0m[47mG[0m[47mC[0m[47mC[0m[47mG[0m[47mG[0m[47mG[0m[47mC[0m[47mC[0m[47mC[0m[47mG[0m[47mT[0m[47mG[0m[47mG[0m[47mC[0m[47mC[0m[47mC[0m[47mG[0m[47mC[0m[47mC[0m[47mG[0m[47mG[0m[47mT[0m[47mC[0m[47mC[0m[47mC[0m[47mC[0m[47mG[0m[47mT[0m[47mC[0m[47mC[0m[47mC[0m[47mG[0m[47mG[0m[47mG[0m[47mG[0m[47mG[0m[47mT[0m[47mT[0m[47mG[0m[47mG[0m[47mC[0m[47mC[0m[47mG[0m[47mC[0m[47mG[0m[47mC[0m[47mG[0m[47mG[0m[47mG[0m[47mC[0m[47mC[0m[47mC[0m[47mC[0m[47mG[0m[47mG[0m[47mT[0m[47mG[0m[47mG[0m[47mG[0m[47mG[0m[47mC[0m[47mG[0m[47mG[0m[47mC[0m[47mC[0m[47mA[0m[47mC[0m[47mC[0m[47mC[0m[47mG[0m[47mG[0m[47mG[0m[47mG[0m[47mT[0m[47mC[0m[47mC[0m[47mC[0m[47mG[0m[47mG[0m[47mC[0m[47mC[0m[47mC[0m[47mT[0m[47mC[0m[47mG[0m[47mC[0m[47mG[0m


1

In [35]:
gene.display_gene_targets()

Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation

Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation_ranges were never set.
Mutation

[47mG[0m[47mG[0m[47mG[0m[46m|[0m[47mT[0m[46m|[0m[47mC[0m[47mG[0m[47mG[0m[47mT[0m[47mG[0m[47mG[0m[47mC[0m[47mG[0m[47mC[0m[47mC[0m[47mC[0m[47mC[0m[46m|[0m[47mG[0m[46m|[0m[47mC[0m[46m|[0m[46m|[0m[47mG[0m[46m|[0m[47mT[0m[46m|[0m[46m|[0m[47mG[0m[47mG[0m[47mG[0m[46m|[0m[47mG[0m[46m|[0m[47mC[0m[47mC[0m[47mC[0m[47mG[0m[47mG[0m[47mT[0m[46m|[0m[47mG[0m[46m|[0m[47mG[0m[47mG[0m[46m|[0m[47mC[0m[47mT[0m[47mT[0m[46m|[0m[47mC[0m[46m|[0m[47mC[0m[47mC[0m[47mG[0m[47mG[0m[47mA[0m[46m|[0m[47mG[0m[46m|[0m[46m|[0m[47mG[0m[46m|[0m[47mG[0m[46m|[0m[47mT[0m[46m|[0m[47mT[0m[47mC[0m[47mC[0m[47mG[0m[46m|[0m[47mG[0m[47mG[0m[47mG[0m[46m|[0m[47mG[0m[47mT[0m[47mC[0m[47mG[0m[47mG[0m[46m|[0m[47mC[0m[47mC[0m[47mT[0m[47mG[0m[47mC[0m[47mG[0m[46m|[0m[47mG[0m[47mC[0m[47mG[0m[46m|[0m[47mC[0m[46m|[0m[47mG[0m[46m|[0m[47mT[0m[46m|[0m

[47mG[0m[46m|[0m[47mA[0m[46m|[0m[47mG[0m[47mG[0m[47mG[0m[47mG[0m[47mG[0m[47mG[0m[47mC[0m[47mG[0m[47mC[0m[46m|[0m[47mG[0m[47mC[0m[47mC[0m[47mC[0m[47mG[0m[47mG[0m[47mC[0m[46m|[0m[47mT[0m[46m|[0m[47mG[0m[47mA[0m[47mG[0m[46m|[0m[47mA[0m[46m|[0m[47mG[0m[46m|[0m[47mA[0m[47mG[0m[47mA[0m[46m|[0m[47mC[0m[47mG[0m[46m|[0m[47mG[0m[47mG[0m[47mG[0m[47mA[0m[46m|[0m[47mG[0m[47mG[0m[47mG[0m[47mC[0m[47mG[0m[47mG[0m[47mC[0m[47mG[0m[47mC[0m[47mC[0m[47mG[0m[47mC[0m[47mC[0m[47mG[0m[46m|[0m[47mC[0m[47mC[0m[47mG[0m[46m|[0m[47mC[0m[47mC[0m[47mC[0m[46m|[0m[47mG[0m[46m|[0m[47mC[0m[47mG[0m[46m|[0m[47mA[0m[46m|[0m[47mA[0m[47mG[0m[46m|[0m[47mA[0m[46m|[0m[47mC[0m[47mG[0m[47mG[0m[47mA[0m[47mG[0m[47mA[0m[47mG[0m[47mG[0m[47mG[0m[47mA[0m[47mA[0m[47mA[0m[47mG[0m[47mA[0m[47mG[0m[47mA[0m[47mG[0m[46m|[0m[47mA[0m[47mG[0m[47mC[0m

[47mT[0m[47mC[0m[47mT[0m[47mA[0m[47mA[0m[47mT[0m[47mT[0m[47mA[0m[47mG[0m[47mT[0m[47mG[0m[47mA[0m[47mC[0m[47mG[0m[47mC[0m[47mG[0m[47mC[0m[47mA[0m[47mT[0m[46m|[0m[47mG[0m[47mA[0m[47mA[0m[47mT[0m[47mG[0m[47mG[0m[47mA[0m[47mT[0m[47mG[0m[47mA[0m[47mA[0m[47mC[0m[47mG[0m[47mA[0m[47mG[0m[47mA[0m[47mT[0m[47mT[0m[47mC[0m[47mC[0m[47mC[0m[47mA[0m[47mC[0m[47mT[0m[46m|[0m[47mG[0m[46m|[0m[47mT[0m[47mC[0m[47mC[0m[47mC[0m[47mT[0m[47mA[0m[47mC[0m[46m|[0m[47mC[0m[46m|[0m[47mT[0m[47mA[0m[47mC[0m[47mT[0m[46m|[0m[47mA[0m[47mT[0m[47mC[0m[47mC[0m[47mA[0m[47mG[0m[47mC[0m[47mG[0m[46m|[0m[47mA[0m[47mA[0m[47mA[0m[47mC[0m[47mC[0m[47mA[0m[47mC[0m[47mA[0m[46m|[0m[47mG[0m[46m|[0m[46m|[0m[47mC[0m[47mC[0m[47mA[0m[47mA[0m[47mG[0m[46m|[0m[47mG[0m[46m|[0m[46m|[0m[47mG[0m[47mA[0m[47mA[0m[47mC[0m[47mG[0m[46m|[0m[47mG[0m[47mG[0m

[47mG[0m[46m|[0m[47mG[0m[46m|[0m[47mC[0m[47mC[0m[46m|[0m[47mA[0m[47mC[0m[47mC[0m[47mC[0m[46m|[0m[47mG[0m[47mG[0m[47mG[0m[46m|[0m[47mG[0m[47mT[0m[47mC[0m[47mC[0m[47mC[0m[47mG[0m[47mG[0m[47mC[0m[47mC[0m[47mC[0m[47mT[0m[47mC[0m[47mG[0m[47mC[0m[47mG[0m


1

In [8]:
dir(pywraplp.Solver)

['ABNORMAL',
 'AT_LOWER_BOUND',
 'AT_UPPER_BOUND',
 'Add',
 'BASIC',
 'BOP_INTEGER_PROGRAMMING',
 'BoolVar',
 'CBC_MIXED_INTEGER_PROGRAMMING',
 'CLP_LINEAR_PROGRAMMING',
 'Clear',
 'ComputeConstraintActivities',
 'ComputeExactConditionNumber',
 'Constraint',
 'EnableOutput',
 'ExportModelAsLpFormat',
 'ExportModelAsMpsFormat',
 'FEASIBLE',
 'FIXED_VALUE',
 'FREE',
 'GLOP_LINEAR_PROGRAMMING',
 'INFEASIBLE',
 'Infinity',
 'IntVar',
 'InterruptSolve',
 'Iterations',
 'LoadModelFromProto',
 'LookupConstraint',
 'LookupVariable',
 'Maximize',
 'Minimize',
 'NOT_SOLVED',
 'NumConstraints',
 'NumVar',
 'NumVariables',
 'OPTIMAL',
 'Objective',
 'RowConstraint',
 'SetSolverSpecificParametersAsString',
 'SetTimeLimit',
 'Solve',
 'Sum',
 'SupportsProblemType',
 'SuppressOutput',
 'UNBOUNDED',
 'VerifySolution',
 'WallTime',
 '__class__',
 '__del__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__gt__',
 '__hash_