<a href="https://colab.research.google.com/github/coywil26/DIMPLE/blob/master/DIMPLE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title 1. Install

#@markdown Please execute each cell by pressing the *Play* button on the left.

# download github "https://github.com/coywil26/DIMPLE" and install dependencies
!git clone https://github.com/coywil26/DIMPLE
%cd DIMPLE
!pip install -r requirements.txt

In [None]:
#@title 2. Upload target file
#@markdown Please execute to upload file
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))
target_file = fn
!mkdir workspace
!mv $target_file workspace
directory = "workspace/"

In [None]:
#@title 3. Generate Mutation Fragments and Primers
#@markdown Oligo Parameters
from google.colab import files
import os
import argparse
from DIMPLE.DIMPLE import align_genevariation, print_all, post_qc, addgene, DIMPLE, generate_DMS_fragments
from DIMPLE.utilities import parse_custom_mutations, codon_usage
from Bio.Seq import Seq
from google.colab import data_table
import ast
import pandas as pd


# if target_file doesnt exist use default test file
if 'target_file' not in locals():
  target_file = "DIMPLE/tests/combined_fasta.fa"
  wDir = "DIMPLE/tests/"
  print('test run')
else:
  #wDir = os.path.basename(target_file)
  wDir = directory

geneFile = target_file

oligoLen = 250 #@param {type:"integer"}
fragmentLen = "auto"  #@param {type:"string"}
if fragmentLen == "auto":
  fragmentLen = ""
melting_temp_low = 58 #@param {type:"integer"}
melting_temp_high = 62 #@param {type:"integer"}
overlap = 4 #@param {type:"integer"}
barcode_start = 0 #@param {type:"integer"}
#@markdown Use the following format for restriction enzymes<br>
#@markdown BsmBI: CGTCTC(G)1/4<br>
#@markdown BsaI: GGTCTC(G)1/4
restriction_sequence = "CGTCTC(G)1/4" #@param {type:"string"}
sequences_to_avoid = "CGTCTC, GGTCTC" #@param {type:"string"}
avoid_sequence = sequences_to_avoid.split(", ")
matchSequences = False

#@markdown Codon Usage (If using custom usage you will see a button to upload file at the bottom)
usage = "human" #@param ["human", "ecoli"]
custom_codon_usage = False #@param {type:"boolean"}
# input custom mutation file
if custom_codon_usage:
  print("Look in DIMPLE/data/custom_codon_usage.txt for example file")
  uploaded = files.upload()
  usage_file = list(uploaded.keys())[0]
  with open(usage_file) as f:
    usage = f.readlines()
  usage = ast.literal_eval(usage[0].strip('\n'))
  pd.DataFrame(usage,index=[0])

#@markdown Select Mutations (Select one or more)

select_domain_insertion = False #@param {type:"boolean"}
if select_domain_insertion:
  dis = True
else:
  dis = False
handle = "AGCGGGAGACCGGGGTCTCTGAGC" #@param {type:"string"}
select_insertions = False #@param {type:"boolean"}
insertions = "GGG,GGGGGG" #@param {type:"string"}
if select_insertions:
  insertions = insertions.split(",")
else:
  insertions = ""
select_deletions = False #@param {type:"boolean"}
deletions = "3,6" #@param {type:"string"}
if select_deletions:
  deletions = deletions.split(",")
else:
  deletions = ""
select_deep_mutation_scan = True #@param {type:"boolean"}
if select_deep_mutation_scan:
  dms = True
else:
  dms = False
#@markdown Mutation Settings
maximize_nucleotide_change = True #@param {type:"boolean"}
amino_acid_substitutions = "Cys,Asp,Ser,Gln,Met,Asn,Pro,Lys,Thr,Phe,Ala,Gly,Ile,Leu,His,Arg,Trp,Val,Glu,Tyr" #@param {type:"string"}
include_stop_codons = False #@param {type:"boolean"}
include_synonymous_mutations = False #@param {type:"boolean"}

make_double_mutations = False #@param {type:"boolean"}
select_custom_mutations = False #@param {type:"boolean"}
if select_custom_mutations:
  print("Look in DIMPLE/data/custom_codon_usage.txt for example file")
  uploaded = files.upload()
  custom_mutations_file = list(uploaded.keys())[0]
  with open(custom_mutations_file) as f:
    lines = f.readlines()
  custom_mutations = parse_custom_mutations(lines[1:])
  pd.DataFrame(custom_mutations,index=[0])
else:
  custom_mutations = {}
avoid_breaksites = False #@param {type:"boolean"}
avoid_breaksites_list = "" #@param {type:"string"}

#!python3 DIMPLE/run_dimple.py -geneFile=$target_file -oligoLen=$oligo_length -DMS=$select_DMS -usage=$codon_usage_bias -include_stop_codons=$include_stop_codons -restriction_sequence=$type_IIS_restriction_enzyme -avoid_sequence=$sequences_to_avoid -insertions=$insertions -deletions=$deletions -fragmentLen=$fragment_length -overlap=$fragment_overlap -barcode_start=$barcode_start_position -include_stop_codons=$include_stop_codons -usage=$custom_mutations


# Check genetic handle for non nucleic characters
if any([x not in ['A', 'C', 'G', 'T', 'a', 'c', 'g', 't'] for x in handle]):
    raise ValueError('Genetic handle contains non nucleic bases')

DIMPLE.handle = handle
DIMPLE.synth_len = oligoLen
if fragmentLen:
    DIMPLE.maxfrag = fragmentLen
else:
    DIMPLE.maxfrag = oligoLen - 62 - overlap  # 62 allows for cutsites and barcodes

DIMPLE.dms = dms

#  adjust primer primerBuffer
DIMPLE.primerBuffer += overlap

DIMPLE.avoid_sequence = avoid_sequence
DIMPLE.barcodeF = DIMPLE.barcodeF[int(barcode_start):]
DIMPLE.barcodeR = DIMPLE.barcodeR[int(barcode_start):]
tmp_cutsite = restriction_sequence.split('(')
DIMPLE.cutsite = Seq(tmp_cutsite[0])
DIMPLE.cutsite_buffer = Seq(tmp_cutsite[1].split(')')[0])
tmp_overhang = tmp_cutsite[1].split(')')[1].split('/')
DIMPLE.cutsite_overhang = int(tmp_overhang[1]) - int(tmp_overhang[0])
DIMPLE.avoid_sequence = [Seq(x) for x in avoid_sequence]
DIMPLE.stop_codon = include_stop_codons
DIMPLE.make_double = make_double_mutations
DIMPLE.usage = codon_usage(usage)
DIMPLE.gene_primerTM = (melting_temp_low, melting_temp_high)
DIMPLE.maximize_nucleotide_change = maximize_nucleotide_change

OLS = addgene(os.path.join(wDir, geneFile).strip())
if avoid_breaksites_list and select_custom_mutations:
    OLS[0].problemsites = set(int(x) for x in custom_mutations.keys())
    # add extras
    if avoid_breaksites_list != '':
        OLS[0].problemsites.update([int(x) for x in avoid_breaksites_list.split(',')])
    for i in range(len(OLS[0].breaksites)):
        DIMPLE.switch_fragmentsize(OLS[0], 1, OLS)
if matchSequences == 'match':
    align_genevariation(OLS)
if deletions:
    deletions = [int(x) for x in deletions]
if not any([dis, dms, insertions, deletions]):
    raise ValueError("Didn't select any mutations to generate")
print(dms)
print(overlap)
generate_DMS_fragments(OLS, overlap, overlap, include_synonymous_mutations, custom_mutations, dms, insertions, deletions, dis, wDir)

post_qc(OLS)
print_all(OLS, wDir)

directory = wDir
!zip -r dimple_results.zip $directory
files.download("dimple_results.zip")