<a href="https://colab.research.google.com/github/coywil26/DIMPLE/blob/master/DIMPLE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title 1. Install

#@markdown Please execute each cell by pressing the *Play* button on the left.

# download github "https://github.com/coywil26/DIMPLE" and install dependencies
!git clone https://github.com/coywil26/DIMPLE --branch maximize-nucleotide-mutations
%cd DIMPLE
!pip install -r requirements.txt

Cloning into 'DIMPLE'...
remote: Enumerating objects: 575, done.[K
remote: Counting objects: 100% (220/220), done.[K
remote: Compressing objects: 100% (142/142), done.[K
remote: Total 575 (delta 152), reused 115 (delta 78), pack-reused 355[K
Receiving objects: 100% (575/575), 1.23 MiB | 3.14 MiB/s, done.
Resolving deltas: 100% (347/347), done.
/content/DIMPLE
Collecting biopython (from -r requirements.txt (line 1))
  Downloading biopython-1.83-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: biopython
Successfully installed biopython-1.83


In [2]:
#@title 2. Upload target file
#@markdown Please execute to upload file
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))
target_file = fn
!mkdir workspace
!mv $target_file workspace
directory = "workspace/"

Saving ptwist_EF1a_IL7_FLAG_PDGFR.fa to ptwist_EF1a_IL7_FLAG_PDGFR.fa
User uploaded file "ptwist_EF1a_IL7_FLAG_PDGFR.fa" with length 5772 bytes


In [3]:
#@title 3. Generate Mutation Fragments and Primers
#@markdown Oligo Parameters
from google.colab import files
import os
import argparse
from DIMPLE.DIMPLE import align_genevariation, print_all, post_qc, addgene, DIMPLE, generate_DMS_fragments
from DIMPLE.utilities import parse_custom_mutations, codon_usage
from Bio.Seq import Seq
from google.colab import data_table
import ast
import pandas as pd

class arguments:
  pass
args = arguments()

# if target_file doesnt exist use default test file
if 'target_file' not in locals():
  target_file = "DIMPLE/tests/combined_fasta.fa"
  args.wDir = "DIMPLE/tests/"
  print('test run')
else:
  #args.wDir = os.path.basename(target_file)
  args.wDir = directory

args.geneFile = target_file

args.oligoLen = 250 #@param {type:"integer"}
args.fragmentLen = "auto"  #@param {type:"string"}
if args.fragmentLen == "auto":
  args.fragmentLen = ""
args.melting_temp_low = 58 #@param {type:"integer"}
args.melting_temp_high = 62 #@param {type:"integer"}
args.overlap = 4 #@param {type:"integer"}
args.barcode_start = 0 #@param {type:"integer"}
type_IIS_restriction_enzyme = "BsmBI" #@param ["BsmBI", "BsaI"]
if type_IIS_restriction_enzyme == "BsmBI":
  args.restriction_sequence = "CGTCTC"
else:
  args.restriction_sequence = "GGTCTC"
sequences_to_avoid = "CGTCTC,GGTCTC" #@param {type:"string"}
args.avoid_sequence = sequences_to_avoid.split(", ")
args.matchSequences = False

#@markdown Codon Usage (If using custom usage you will see a button to upload file at the bottom)
args.usage = "human" #@param ["human", "ecoli"]
custom_codon_usage = False #@param {type:"boolean"}
# input custom mutation file
if custom_codon_usage:
  print("Look in DIMPLE/data/custom_codon_usage.txt for example file")
  uploaded = files.upload()
  usage_file = list(uploaded.keys())[0]
  with open(usage_file) as f:
    usage = f.readlines()
  args.usage = ast.literal_eval(usage[0].strip('\n'))
  pd.DataFrame(args.usage,index=[0])

#@markdown Select Mutations (Select one or more)
select_domain_insertion = False #@param {type:"boolean"}
if select_domain_insertion:
  args.dis = True
else:
  args.dis = False
args.handle = "AGCGGGAGACCGGGGTCTCTGAGC" #@param {type:"string"}
select_insertions = False #@param {type:"boolean"}
insertions = "GGG,GGGGGG" #@param {type:"string"}
if select_insertions:
  args.insertions = insertions.split(",")
else:
  args.insertions = ""
select_deletions = False #@param {type:"boolean"}
deletions = "3,6" #@param {type:"string"}
if select_deletions:
  args.deletions = deletions.split(",")
else:
  args.deletions = ""
select_deep_mutation_scan = False #@param {type:"boolean"}
if select_deep_mutation_scan:
  args.dms = True
else:
  args.dms = False
#@markdown Mutation Settings
amino_acid_substitutions = "Cys,Asp,Ser,Gln,Met,Asn,Pro,Lys,Thr,Phe,Ala,Gly,Ile,Leu,His,Arg,Trp,Val,Glu,Tyr" #@param {type:"string"}
args.include_stop_codons = False #@param {type:"boolean"}
args.include_synonymous_mutations = False #@param {type:"boolean"}

args.make_double_mutations = False #@param {type:"boolean"}
select_custom_mutations = False #@param {type:"boolean"}
if select_custom_mutations:
  print("Look in DIMPLE/data/custom_codon_usage.txt for example file")
  uploaded = files.upload()
  custom_mutations_file = list(uploaded.keys())[0]
  with open(custom_mutations_file) as f:
    lines = f.readlines()
  args.custom_mutations = parse_custom_mutations(lines[1:])
  pd.DataFrame(args.custom_mutations,index=[0])
else:
  args.custom_mutations = {}
args.avoid_breaksites = False #@param {type:"boolean"}
args.avoid_breaksites_list = "" #@param {type:"string"}

#!python3 DIMPLE/run_dimple.py -geneFile=$target_file -oligoLen=$oligo_length -DMS=$select_DMS -usage=$codon_usage_bias -include_stop_codons=$include_stop_codons -restriction_sequence=$type_IIS_restriction_enzyme -avoid_sequence=$sequences_to_avoid -insertions=$insertions -deletions=$deletions -fragmentLen=$fragment_length -overlap=$fragment_overlap -barcode_start=$barcode_start_position -include_stop_codons=$include_stop_codons -usage=$custom_mutations


# Check genetic handle for non nucleic characters
if any([x not in ['A', 'C', 'G', 'T', 'a', 'c', 'g', 't'] for x in args.handle]):
    raise ValueError('Genetic handle contains non nucleic bases')

DIMPLE.handle = args.handle
DIMPLE.synth_len = args.oligoLen
if args.fragmentLen:
    DIMPLE.maxfrag = args.fragmentLen
else:
    DIMPLE.maxfrag = args.oligoLen - 62 - args.overlap  # 62 allows for cutsites and barcodes

DIMPLE.dms = args.dms

#  adjust primer primerBuffer
DIMPLE.primerBuffer += args.overlap

DIMPLE.avoid_sequence = args.avoid_sequence
DIMPLE.barcodeF = DIMPLE.barcodeF[int(args.barcode_start):]
DIMPLE.barcodeR = DIMPLE.barcodeR[int(args.barcode_start):]
DIMPLE.cutsite = Seq(args.restriction_sequence)
DIMPLE.avoid_sequence = [Seq(x) for x in args.avoid_sequence]
DIMPLE.stop_codon = args.include_stop_codons
DIMPLE.make_double = args.make_double_mutations
DIMPLE.usage = codon_usage(args.usage)

OLS = addgene(os.path.join(args.wDir, args.geneFile).strip())
if args.avoid_breaksites_list and select_custom_mutations:
    OLS[0].problemsites = set(int(x) for x in args.custom_mutations.keys())
    # add extras
    if app.avoid_others_list.get() != '':
        OLS[0].problemsites.update([int(x) for x in app.avoid_others_list.get().split(',')])
    for i in range(len(OLS[0].breaksites)):
        switch_fragmentsize(OLS[0], 1, OLS)
if args.matchSequences == 'match':
    align_genevariation(OLS)
if args.deletions:
    args.deletions = [int(x) for x in args.deletions]
if not any([args.dis, args.dms, args.insertions, args.deletions]):
    raise ValueError("Didn't select any mutations to generate")
print(args.dms)
print(args.overlap)
generate_DMS_fragments(OLS, args.overlap, args.overlap, args.include_synonymous_mutations, args.custom_mutations, args.dms, args.insertions, args.deletions, args.dis, args.wDir)

post_qc(OLS)
print_all(OLS, args.wDir)

directory = args.wDir
!zip -r file.zip $directory
files.download("file.zip")



Initial Fragment Sizes for:ptwist_EF1a_IL7_FLAG_PDGFR
[150, 150, 150]
False
4
New max fragment:154
Non specific Fragment:1
[147, 153, 150]
[[37, 181], [184, 334], [337, 484]]
--------------------------------- Analyzing Gene:ptwist_EF1a_IL7_FLAG_PDGFR ---------------------------------
Creating Fragment:ptwist_EF1a_IL7_FLAG_PDGFR --- Fragment #1 AA:2-49
found primers:  ATACGTCTCGCAGTCGGCCACTGCG 63.8 15
found primers:  ATACGTCTCGGGATGTTTTTGTTCCGCGCG 62.2 10
Found non-specific match at 11bp:
match: GTGGGAGCCGCAGTGGCCGACTGCG
primer:ATACGTCTCGCAGTCGGCCACTGCG Tm:31.4
Barcodes used:128
Barcodes Remaining:2872
Creating Fragment:ptwist_EF1a_IL7_FLAG_PDGFR --- Fragment #2 AA:51-100
found primers:  ATACGTCTCTTTATTCGCGTCGCAGATATGTCTC 64.4 6
found primers:  ATACGTCTCTTAGGAGAAGCACAGCCCACAAAG 61.6 7
Barcodes used:26
Barcodes Remaining:2846
Creating Fragment:ptwist_EF1a_IL7_FLAG_PDGFR --- Fragment #3 AA:102-150
found primers:  ATACGTCTCGGCGGGCTTTCTCCCC 60.9 15
found primers:  ATACGTCTCGGAGGCAGCGGTGGAGG

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>