In [14]:
#import modules
from Bio.Seq import Seq
import primer3
import os
import pandas as pd
import numpy as np
import requests
import sys
pd.set_option('display.max_rows', None)
import warnings
warnings.filterwarnings('ignore')

In [15]:
#read in dataframe containing predicted guide RNAs
gRNAs_synth_min_df = pd.read_csv('gRNAs_synth_min_df.csv') 

#replace U with T in gRNA sequences
gRNAs_synth_min_df["gRNA_sequence_T"] = gRNAs_synth_min_df["gRNA sequence"].str.replace("U",'T')
#add column to indicate forward/reverse orientation of gRNA in relation to gene sequence
gRNAs_synth_min_df["gRNA_sequence_T_forward_rel_to_gene"] = gRNAs_synth_min_df["gRNA_sequence_T"]

#sanity check the dataframe
#gRNAs_synth_min_df

In [16]:
#get gene sequence in plain text
#REST API python3 Ensembl
server = "https://rest.ensembl.org"
ext = "/sequence/id/" + gRNAs_synth_min_df["ENSG ID"].iloc[0] +"?"

#retrieve plain text gene sequence
r = requests.get(server+ext, headers={ "Content-Type" : "text/plain"})

#sanity check object r
if not r.ok:
  r.raise_for_status()
  sys.exit()

#print(r.text)

In [17]:
#sanity check for orientation and existence of gRNA_sequence_T in gene sequence 
for_rev_orientation_gRNA = []

#using .find to match gRNA sequence_T to gene sequence - if not found output will be -1.
#In case of output -1 use reverse complement gRNA to search for match
for row in range(len(gRNAs_synth_min_df)):
        orientation = r.text.find(gRNAs_synth_min_df["gRNA_sequence_T"].iloc[row])
        for_rev_orientation_gRNA.append(orientation)
print(for_rev_orientation_gRNA)

#check for_rev_orientation_gRNA for -1
reverse_strand_gRNAs = []
for entry in range(len(for_rev_orientation_gRNA)):
    if for_rev_orientation_gRNA[entry] == -1:
        reverse_strand_gRNAs.append(entry)

#sanity check reverse_strand_gRNAs        
#reverse_strand_gRNAs

[-1, -1, -1, 1523]


In [18]:
# reverse complement gRNA if gRNA_sequence_T is reverse complement in relation to gene sequence
for entry in reverse_strand_gRNAs:
    gRNAs_synth_min_df["gRNA_sequence_T_forward_rel_to_gene"].iloc[entry] = Seq(gRNAs_synth_min_df["gRNA_sequence_T"].iloc[entry]).reverse_complement()
    gRNAs_synth_min_df["gRNA_sequence_T_forward_rel_to_gene"].iloc[entry] = str(gRNAs_synth_min_df["gRNA_sequence_T_forward_rel_to_gene"].iloc[entry])

    
gRNAs_synth_min_df["gRNA_sequence_T_forward_rel_to_gene"].iloc[entry].replace(",","")
gRNAs_synth_min_df["gRNA_sequence_T_forward_rel_to_gene"].iloc[entry].replace(" ", "")
#gRNAs_synth_min_df

'CGTCCACCATGCCGGGCATG'

In [19]:
#Tidy up gRNAs_synth_min_df df
gRNAs_synth_min_df = gRNAs_synth_min_df.rename(columns={'Unnamed: 0': 'gRNA Name'})
gRNAs_synth_min_df = gRNAs_synth_min_df.drop(columns=['gRNA sequence'])

In [20]:
#forward oriented gRNA positions in relation to gene sequence
for_orientation_gRNA = []
for row in range(len(gRNAs_synth_min_df)):
        orientation = r.text.find(gRNAs_synth_min_df["gRNA_sequence_T_forward_rel_to_gene"].iloc[row])
        for_orientation_gRNA.append(orientation)
#print(for_orientation_gRNA)

In [21]:
gRNAs_synth_min_df["Orientation"] = (gRNAs_synth_min_df['gRNA_sequence_T'] == gRNAs_synth_min_df['gRNA_sequence_T_forward_rel_to_gene'])

mask = gRNAs_synth_min_df.applymap(type) != bool
values = {True: 'True', False: 'False'}
gRNAs_synth_min_df = gRNAs_synth_min_df.where(mask, gRNAs_synth_min_df.replace(values))

gRNAs_synth_min_df["Orientation"] = gRNAs_synth_min_df["Orientation"].str.replace("True", "Forward")
gRNAs_synth_min_df["Orientation"] = gRNAs_synth_min_df["Orientation"].str.replace("False", "Reverse") 
gRNAs_synth_min_df

Unnamed: 0,gRNA Name,ENSG ID,Gene Name,gRNA_sequence_T,gRNA_sequence_T_forward_rel_to_gene,Orientation
0,MYCN-gRNA1,ENSG00000134323,MYCN,GACGTGGAGCAGCTCGGCAT,ATGCCGAGCTGCTCCACGTC,Reverse
1,MYCN-gRNA2,ENSG00000134323,MYCN,ATGGTGGACGTGGAGCAGCT,AGCTGCTCCACGTCCACCAT,Reverse
2,MYCN-gRNA3,ENSG00000134323,MYCN,CATGCCCGGCATGGTGGACG,CGTCCACCATGCCGGGCATG,Reverse
3,MYCN-gRNA4,ENSG00000134323,MYCN,CTGCTCCACGTCCACCATGC,CTGCTCCACGTCCACCATGC,Forward


In [22]:
#Extract gRNA flanking sequences ~250 nt for primer design
#dict to hold sequences
sequences_for_primer_design = {}

#use index to retrieve name
i = 0
for entry in for_orientation_gRNA:
    beginning = entry - 250
    end = entry + 250
    seq_slice = r.text[beginning:end]
    sequences_for_primer_design["Sequence flanking " + gRNAs_synth_min_df["gRNA Name"].iloc[i]] = seq_slice
    i += 1

#sanity check dict
#sequences_for_primer_design

In [23]:
primer_designed = {}
for key in sequences_for_primer_design:
    seq_dict = {
        'SEQUENCE_ID': key,
        'SEQUENCE_TEMPLATE': sequences_for_primer_design[key],
    }
    primer_designed[key + " Primer"] = primer3.designPrimers(seq_dict,    
        {
            'PRIMER_OPT_SIZE': 20,
            'PRIMER_PICK_INTERNAL_OLIGO': 1,
            'PRIMER_INTERNAL_MAX_SELF_END': 8,
            'PRIMER_MIN_SIZE': 18,
            'PRIMER_MAX_SIZE': 25,
            'PRIMER_OPT_TM': 60.0,
            'PRIMER_MIN_TM': 57.0,
            'PRIMER_MAX_TM': 63.0,
            'PRIMER_MIN_GC': 20.0,
            'PRIMER_MAX_GC': 80.0,
            'PRIMER_MAX_POLY_X': 100,
            'PRIMER_INTERNAL_MAX_POLY_X': 100,
            'PRIMER_SALT_MONOVALENT': 50.0,
            'PRIMER_DNA_CONC': 50.0,
            'PRIMER_MAX_NS_ACCEPTED': 0,
            'PRIMER_MAX_SELF_ANY': 12,
            'PRIMER_MAX_SELF_END': 8,
            'PRIMER_PAIR_MAX_COMPL_ANY': 12,
            'PRIMER_PAIR_MAX_COMPL_END': 8,
            'PRIMER_PRODUCT_SIZE_RANGE': [[375, 500]],
        })

In [24]:
primer_df = pd.DataFrame.from_dict(primer_designed, orient='index')
primer_df

Unnamed: 0,PRIMER_LEFT_EXPLAIN,PRIMER_RIGHT_EXPLAIN,PRIMER_INTERNAL_EXPLAIN,PRIMER_PAIR_EXPLAIN,PRIMER_LEFT_NUM_RETURNED,PRIMER_RIGHT_NUM_RETURNED,PRIMER_INTERNAL_NUM_RETURNED,PRIMER_PAIR_NUM_RETURNED,PRIMER_PAIR_0_PENALTY,PRIMER_LEFT_0_PENALTY,...,PRIMER_RIGHT_4_SELF_END_TH,PRIMER_INTERNAL_4_SELF_END_TH,PRIMER_LEFT_4_HAIRPIN_TH,PRIMER_RIGHT_4_HAIRPIN_TH,PRIMER_INTERNAL_4_HAIRPIN_TH,PRIMER_LEFT_4_END_STABILITY,PRIMER_RIGHT_4_END_STABILITY,PRIMER_PAIR_4_COMPL_ANY_TH,PRIMER_PAIR_4_COMPL_END_TH,PRIMER_PAIR_4_PRODUCT_SIZE
Sequence flanking MYCN-gRNA1 Primer,"considered 1008, low tm 38, high tm 724, high ...","considered 1008, GC content failed 200, low tm...","considered 4785, GC content failed 601, low tm...","considered 66, unacceptable product size 56, o...",5,5,5,5,0.722896,0.396278,...,0.0,0.0,0.0,0.0,43.36735,4.79,4.02,0.0,0.0,429
Sequence flanking MYCN-gRNA2 Primer,"considered 1008, low tm 39, high tm 720, high ...","considered 1008, GC content failed 193, low tm...","considered 4785, GC content failed 601, low tm...","considered 6, ok 6",5,5,5,5,0.14393,0.035458,...,0.0,0.0,35.597197,0.0,43.36735,3.32,4.35,5.919788,1.406589,385
Sequence flanking MYCN-gRNA3 Primer,"considered 1008, GC content failed 58, low tm ...","considered 1008, GC content failed 121, low tm...","considered 4785, GC content failed 601, low tm...","considered 5, ok 5",5,5,5,5,0.072034,0.035458,...,0.0,0.0,35.597197,41.061095,43.36735,3.32,3.79,0.0,0.0,393
Sequence flanking MYCN-gRNA4 Primer,"considered 1008, low tm 39, high tm 724, high ...","considered 1008, GC content failed 182, low tm...","considered 4785, GC content failed 601, low tm...","considered 6, ok 6",5,5,5,5,0.14393,0.035458,...,0.0,0.0,35.597197,0.0,43.36735,3.32,4.35,5.919788,1.406589,385


In [25]:
primer_df = primer_df[["PRIMER_LEFT_0_SEQUENCE", "PRIMER_RIGHT_0_SEQUENCE"]]

In [26]:
primer_df

Unnamed: 0,PRIMER_LEFT_0_SEQUENCE,PRIMER_RIGHT_0_SEQUENCE
Sequence flanking MYCN-gRNA1 Primer,GCTTGGAGGGAAGATTGGGG,TCTTCCAGATGTCCTCCCCC
Sequence flanking MYCN-gRNA2 Primer,CATTGCCTATCCCCTCGGTC,CTCAAGCAGCATCTCCGTGA
Sequence flanking MYCN-gRNA3 Primer,CATTGCCTATCCCCTCGGTC,AGCTCGTTCTCAAGCAGCAT
Sequence flanking MYCN-gRNA4 Primer,CATTGCCTATCCCCTCGGTC,CTCAAGCAGCATCTCCGTGA
