___________________________________________________________________________________________________________________________

___________________________________________________________________________________________________________________________

### Insert the correct project ID number as string

In [1]:
#provide project number ID as string (PO number in string format)
project_id = 'project_test'

___________________________________________________________________________________________________________________________

___________________________________________________________________________________________________________________________

In [2]:
#import for retrieving predicted gRNAs
from pybiomart import Server
import pandas as pd
import urllib.request
import os
# import nest_asyncio
# nest_asyncio.apply()
from selenium import webdriver
import time
pd.set_option('display.max_rows', None)
pd.options.display.max_columns = None
#imports for primer design
from Bio.Seq import Seq
import primer3
import numpy as np
import requests
import sys
import warnings
warnings.filterwarnings('ignore')

In [3]:
# define functions
def predict_gRNA(df_genes):
    
    #uses globally declared dataframe, declared prior to function calls
    global gRNA_predicted
    
    for entry in range(len(df_genes)):
        gene_ens_id = df_genes["GENE ID"].iloc[entry]

        #parse dataset for gene_id
        data_gene = dataset.query(attributes=['ensembl_gene_id', 'external_gene_name'],
                      filters={'link_ensembl_gene_id': gene_ens_id})

        #if gene_id found and unique, construct Synthego url
        if len(data_gene.index) == 1:
            #print(data_gene)
            gene_id = data_gene.iat[0,0]
            gene_symbol = data_gene.iat[0,1]
            url = 'https://design.synthego.com/#/design/results?genome=homo_sapiens_gencode_26_primary&nuclease=cas9&gene_id='+gene_id+'&symbol='+gene_symbol
            # specify the url
            urlpage = url
            #print(urlpage)
            #suppress opening Firefox browser window
            os.environ['MOZ_HEADLESS'] = '1'
            # run firefox webdriver from executable path of your choice
            driver = webdriver.Firefox(executable_path = 'D:\Alex\geckodriver-v0.31.0-win64\geckodriver.exe')
            # get web page
            driver.get(urlpage)
            #30 seconds wait to have page fully loaded - might need to increase time
            time.sleep(30)
            results = driver.find_elements("xpath", "//*[@class='ng-binding']")
            #print('Number of results', len(results))
            #Store predictions
            gRNAs_synth = []
            # loop over results
            for result in results:
                #print(result.text)
                gRNAs_synth.append(result.text)
            #extract minimal information for predicted gRNAs
            gRNAs_synth_min = gRNAs_synth[3:9]
            gRNAs_synth_min_df = pd.DataFrame(gRNAs_synth_min[2:], index =[gene_symbol+'-gRNA1', gene_symbol+'-gRNA2', gene_symbol+'-gRNA3', gene_symbol+'-gRNA4'],
                                                 columns =['gRNA sequence'])
            #add column(s) with ENSG number/gene name to df
            gRNAs_synth_min_df['GENE ID'] = gene_id
            gRNAs_synth_min_df['GENE NAME'] = gene_symbol
            #print(gRNAs_synth_min_df)
            gRNA_predicted = pd.concat([gRNA_predicted, gRNAs_synth_min_df])

        else:
            raise SystemExit("Stop right there - ambiguous gene name!")   

In [4]:
#connect to Biomart Server
server = Server(host='http://www.ensembl.org')

# use #dataset.list_filters() to see available filters for list_filters method
#dataset.list_filters()

#generate dataset (homo sapiens)
dataset = (server.marts['ENSEMBL_MART_ENSEMBL']
                 .datasets['hsapiens_gene_ensembl'])

In [5]:
#read gene ID numbers from csv file - gene ID in form of ENSG00000141510
df_genes_id = pd.read_csv(project_id + '.csv')
df_genes_id.columns = df_genes_id.columns.str.upper()
df_genes_id.head(10)

Unnamed: 0,GENE ID,GENE NAME
0,ENSG00000010404,IDS
1,ENSG00000150676,CCDC83
2,ENSG00000099308,MAST3
3,ENSG00000142676,RPL11
4,ENSG00000180776,ZDHHC20
5,ENSG00000108848,LUC7L3
6,ENSG00000103811,CTSH
7,ENSG00000101473,ACOT8
8,ENSG00000115414,FN1
9,ENSG00000156886,ITGAD


In [6]:
#loop through gene IDs, retrieve gene name (to double check with client data) 
#predict and scrape gRNAs
# create df for storage of predicted gRNAs
gRNA_predicted = pd.DataFrame(columns=['gRNA sequence','Gene ID', 'Gene Name'])

In [7]:
#call function to retrieve predicted gRNAs
predict_gRNA(df_genes_id)

In [8]:
#check if all genes have predicted gRNAs - if not, run predicted_gRNA function until all genes have predicted gRNAs)
while len(gRNA_predicted[gRNA_predicted['gRNA sequence'].isnull()]) > 0:
    print(gRNA_predicted[gRNA_predicted['gRNA sequence'].isnull()])
    nan_rows = gRNA_predicted[gRNA_predicted['gRNA sequence'].isnull()]
    nan_rows = nan_rows.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index='False')
    nan_rows = nan_rows[['GENE ID', 'GENE NAME']]
    nan_rows.columns = nan_rows.columns.str.upper()
    predict_gRNA(nan_rows)
    
print(gRNA_predicted[gRNA_predicted['gRNA sequence'].isnull()])

Empty DataFrame
Columns: [gRNA sequence, Gene ID, Gene Name, GENE ID, GENE NAME]
Index: []


In [9]:
#clean gRNA_predicted df
gRNA_predicted = gRNA_predicted.drop(['Gene ID', 'Gene Name'], axis=1)
#save predicted gRNAs as csv
gRNA_predicted.to_csv(project_id + '_gRNA_predicted.csv') 
#sanity check df
gRNA_predicted.head(10)

Unnamed: 0,gRNA sequence,GENE ID,GENE NAME
IDS-gRNA1,GAACGUUCUUCUCAUCAUCG,ENSG00000010404,IDS
IDS-gRNA2,CCCCAUAACAGCCCAGGGAG,ENSG00000010404,IDS
IDS-gRNA3,GAUGACCUGCGCCCCUCCCU,ENSG00000010404,IDS
IDS-gRNA4,CUUAUCCCCAUAACAGCCCA,ENSG00000010404,IDS
CCDC83-gRNA1,AAAUCUGUUCUUCUUUUAAG,ENSG00000150676,CCDC83
CCDC83-gRNA2,GCUUAAAAGAAGAACAGAUU,ENSG00000150676,CCDC83
CCDC83-gRNA3,AAGAACAGAUUUGGCACAUA,ENSG00000150676,CCDC83
CCDC83-gRNA4,GCACAUACGGCAUCUACUAA,ENSG00000150676,CCDC83
MAST3-gRNA1,CCCGGGAGGAUGAGCUUGAC,ENSG00000099308,MAST3
MAST3-gRNA2,ACCAGUCAAGCUCAUCCUCC,ENSG00000099308,MAST3


In [10]:
#replace U with T in gRNA sequences
gRNA_predicted["gRNA_sequence_T"] = gRNA_predicted["gRNA sequence"].str.replace("U",'T')
#add column to indicate forward/reverse orientation of gRNA in relation to gene sequence
gRNA_predicted["gRNA_sequence_T_forward_rel_to_gene"] = gRNA_predicted["gRNA_sequence_T"]

#sanity check the dataframe
gRNA_predicted.head(10)

Unnamed: 0,gRNA sequence,GENE ID,GENE NAME,gRNA_sequence_T,gRNA_sequence_T_forward_rel_to_gene
IDS-gRNA1,GAACGUUCUUCUCAUCAUCG,ENSG00000010404,IDS,GAACGTTCTTCTCATCATCG,GAACGTTCTTCTCATCATCG
IDS-gRNA2,CCCCAUAACAGCCCAGGGAG,ENSG00000010404,IDS,CCCCATAACAGCCCAGGGAG,CCCCATAACAGCCCAGGGAG
IDS-gRNA3,GAUGACCUGCGCCCCUCCCU,ENSG00000010404,IDS,GATGACCTGCGCCCCTCCCT,GATGACCTGCGCCCCTCCCT
IDS-gRNA4,CUUAUCCCCAUAACAGCCCA,ENSG00000010404,IDS,CTTATCCCCATAACAGCCCA,CTTATCCCCATAACAGCCCA
CCDC83-gRNA1,AAAUCUGUUCUUCUUUUAAG,ENSG00000150676,CCDC83,AAATCTGTTCTTCTTTTAAG,AAATCTGTTCTTCTTTTAAG
CCDC83-gRNA2,GCUUAAAAGAAGAACAGAUU,ENSG00000150676,CCDC83,GCTTAAAAGAAGAACAGATT,GCTTAAAAGAAGAACAGATT
CCDC83-gRNA3,AAGAACAGAUUUGGCACAUA,ENSG00000150676,CCDC83,AAGAACAGATTTGGCACATA,AAGAACAGATTTGGCACATA
CCDC83-gRNA4,GCACAUACGGCAUCUACUAA,ENSG00000150676,CCDC83,GCACATACGGCATCTACTAA,GCACATACGGCATCTACTAA
MAST3-gRNA1,CCCGGGAGGAUGAGCUUGAC,ENSG00000099308,MAST3,CCCGGGAGGATGAGCTTGAC,CCCGGGAGGATGAGCTTGAC
MAST3-gRNA2,ACCAGUCAAGCUCAUCCUCC,ENSG00000099308,MAST3,ACCAGTCAAGCTCATCCTCC,ACCAGTCAAGCTCATCCTCC


In [11]:
#REST API python3 Ensembl
server = "https://rest.ensembl.org"
#initiate dict to store gene sequences for all genes in gRNA_predicted
gene_sequences = {}

# get unique gene IDs from gRNA_predicted
unique_gene_IDs = gRNA_predicted['GENE ID'].unique()

for entry in unique_gene_IDs:
    ext = "/sequence/id/" + entry +"?"
    # retrieve plain text gene sequence
    r = requests.get(server+ext, headers={ "Content-Type" : "text/plain"})
    # sanity check object r
    if not r.ok:
        r.raise_for_status()
        sys.exit()
    gene_sequences[entry] = r.text 
#pd.DataFrame.from_dict(gene_sequences, orient='index')

In [12]:
#sanity check for orientation and existence of gRNA_sequence_T in gene sequences 
for_rev_orientation_gRNA = []

#using .find to match gRNA sequence_T to gene sequence - if not found output will be -1.
for row in range(len(gRNA_predicted)):
        #get key from gRNA_predicted to retrieve value from gene_sequences dict
        key = gRNA_predicted["GENE ID"].iloc[row]
        value = gene_sequences[key]
        orientation = value.find(gRNA_predicted["gRNA_sequence_T"].iloc[row])
        for_rev_orientation_gRNA.append(orientation)
print(for_rev_orientation_gRNA)

[16810, -1, 16832, -1, -1, 27419, 27428, 27441, -1, 25434, 25435, -1, 855, 863, -1, -1, -1, 38207, -1, -1, -1, 21547, 21605, 21616, -1, 12150, 12153, 12167, 2107, 2113, -1, 2121, -1, 1245, 1273, 1274, 884, -1, -1, 908, -1, 994, -1, 1010, -1, 66046, -1, 66102, 971, -1, -1, -1, 491, -1, -1, 524]


In [13]:
gRNA_predicted['gRNA reverse (-1 = True)'] = for_rev_orientation_gRNA
#gRNA_predicted

In [14]:
#In case of output -1 in for_rev_orientation_gRNA use reverse complement gRNA to search for match
for entry in range(len(gRNA_predicted)):
    if gRNA_predicted.iat[entry, 5] == -1:
        gRNA_predicted["gRNA_sequence_T_forward_rel_to_gene"].iloc[entry] = Seq(gRNA_predicted["gRNA_sequence_T"].iloc[entry]).reverse_complement()
        gRNA_predicted["gRNA_sequence_T_forward_rel_to_gene"].iloc[entry] = str(gRNA_predicted["gRNA_sequence_T_forward_rel_to_gene"].iloc[entry])


In [15]:
#sanity check for orientation and existence of gRNA_sequence_T in gene sequences 
for_orientation_gRNA = []

#using .find to match gRNA sequence_T to gene sequence - if not found output will be -1.
#In case of output -1 use reverse complement gRNA to search for match
for row in range(len(gRNA_predicted)):
        #get key from gRNA_predicted to retrieve value from gene_sequences dict
        key = gRNA_predicted["GENE ID"].iloc[row]
        value = gene_sequences[key]
        orientation = value.find(gRNA_predicted["gRNA_sequence_T_forward_rel_to_gene"].iloc[row])
        for_orientation_gRNA.append(orientation)
print(for_orientation_gRNA)

[16810, 16846, 16832, 16851, 27420, 27419, 27428, 27441, 25438, 25434, 25435, 25452, 855, 863, 893, 894, 38215, 38207, 38225, 38237, 21536, 21547, 21605, 21616, 12164, 12150, 12153, 12167, 2107, 2113, 2135, 2121, 1252, 1245, 1273, 1274, 884, 898, 905, 908, 1002, 994, 1012, 1010, 66060, 66046, 66081, 66102, 971, 1003, 1018, 1019, 491, 526, 532, 524]


In [16]:
gRNA_predicted["Forward position gRNA"] = for_orientation_gRNA

In [17]:
#Extract gRNA flanking sequences ~250 nt for primer design
#dict to hold sequences
sequences_for_primer_design = []

for row in range(len(gRNA_predicted)):
    beginning = gRNA_predicted['Forward position gRNA'].iloc[row] - 250
    end = gRNA_predicted['Forward position gRNA'].iloc[row] + 250
    key = gRNA_predicted["GENE ID"].iloc[row]
    value = gene_sequences[key]
    seq_slice = value[beginning:end]
    sequences_for_primer_design.append(seq_slice)
#sanity check dict
#sequences_for_primer_design

In [18]:
gRNA_predicted['Seq for primer design'] = sequences_for_primer_design
gRNA_predicted.head(10)

Unnamed: 0,gRNA sequence,GENE ID,GENE NAME,gRNA_sequence_T,gRNA_sequence_T_forward_rel_to_gene,gRNA reverse (-1 = True),Forward position gRNA,Seq for primer design
IDS-gRNA1,GAACGUUCUUCUCAUCAUCG,ENSG00000010404,IDS,GAACGTTCTTCTCATCATCG,GAACGTTCTTCTCATCATCG,16810,16810,CACTCACTATCTCGCTTCCTCACCCAGGTTGGAGGCCATGACCAAA...
IDS-gRNA2,CCCCAUAACAGCCCAGGGAG,ENSG00000010404,IDS,CCCCATAACAGCCCAGGGAG,CTCCCTGGGCTGTTATGGGG,-1,16846,CATGACCAAAGCCTAACCCTGCCACCCAGGACTCAGGCTTCCTCCT...
IDS-gRNA3,GAUGACCUGCGCCCCUCCCU,ENSG00000010404,IDS,GATGACCTGCGCCCCTCCCT,GATGACCTGCGCCCCTCCCT,16832,16832,CCCAGGTTGGAGGCCATGACCAAAGCCTAACCCTGCCACCCAGGAC...
IDS-gRNA4,CUUAUCCCCAUAACAGCCCA,ENSG00000010404,IDS,CTTATCCCCATAACAGCCCA,TGGGCTGTTATGGGGATAAG,-1,16851,CCAAAGCCTAACCCTGCCACCCAGGACTCAGGCTTCCTCCTCGAGC...
CCDC83-gRNA1,AAAUCUGUUCUUCUUUUAAG,ENSG00000150676,CCDC83,AAATCTGTTCTTCTTTTAAG,CTTAAAAGAAGAACAGATTT,-1,27420,TTTAATAATGTCATAAGACTCTAGATCTTATTTGTAATCTTCTGTT...
CCDC83-gRNA2,GCUUAAAAGAAGAACAGAUU,ENSG00000150676,CCDC83,GCTTAAAAGAAGAACAGATT,GCTTAAAAGAAGAACAGATT,27419,27419,TTTTAATAATGTCATAAGACTCTAGATCTTATTTGTAATCTTCTGT...
CCDC83-gRNA3,AAGAACAGAUUUGGCACAUA,ENSG00000150676,CCDC83,AAGAACAGATTTGGCACATA,AAGAACAGATTTGGCACATA,27428,27428,TGTCATAAGACTCTAGATCTTATTTGTAATCTTCTGTTTTACCTAG...
CCDC83-gRNA4,GCACAUACGGCAUCUACUAA,ENSG00000150676,CCDC83,GCACATACGGCATCTACTAA,GCACATACGGCATCTACTAA,27441,27441,TAGATCTTATTTGTAATCTTCTGTTTTACCTAGCTTTTACTGGCAT...
MAST3-gRNA1,CCCGGGAGGAUGAGCUUGAC,ENSG00000099308,MAST3,CCCGGGAGGATGAGCTTGAC,GTCAAGCTCATCCTCCCGGG,-1,25438,TCCTCAGCCTATGGGGTCAGGGCCTGACCAGAGGGGGACTTGGGGC...
MAST3-gRNA2,ACCAGUCAAGCUCAUCCUCC,ENSG00000099308,MAST3,ACCAGTCAAGCTCATCCTCC,ACCAGTCAAGCTCATCCTCC,25434,25434,GATATCCTCAGCCTATGGGGTCAGGGCCTGACCAGAGGGGGACTTG...


In [19]:
#use index to make column named guideRNA name
gRNA_predicted['gRNA name'] = gRNA_predicted.index

In [20]:
primer_designed = {}
for entry in range(len(gRNA_predicted)):
    seq_dict = {
        'SEQUENCE_ID': gRNA_predicted['gRNA name'].iloc[entry],
        'SEQUENCE_TEMPLATE': gRNA_predicted['Seq for primer design'].iloc[entry],
    }
    primer_designed[gRNA_predicted['gRNA name'].iloc[entry]] = primer3.designPrimers(seq_dict,    
        {
            'PRIMER_OPT_SIZE': 20,
            'PRIMER_PICK_INTERNAL_OLIGO': 1,
            'PRIMER_INTERNAL_MAX_SELF_END': 8,
            'PRIMER_MIN_SIZE': 18,
            'PRIMER_MAX_SIZE': 25,
            'PRIMER_OPT_TM': 60.0,
            'PRIMER_MIN_TM': 57.0,
            'PRIMER_MAX_TM': 63.0,
            'PRIMER_MIN_GC': 20.0,
            'PRIMER_MAX_GC': 80.0,
            'PRIMER_MAX_POLY_X': 100,
            'PRIMER_INTERNAL_MAX_POLY_X': 100,
            'PRIMER_SALT_MONOVALENT': 50.0,
            'PRIMER_DNA_CONC': 50.0,
            'PRIMER_MAX_NS_ACCEPTED': 0,
            'PRIMER_MAX_SELF_ANY': 12,
            'PRIMER_MAX_SELF_END': 8,
            'PRIMER_PAIR_MAX_COMPL_ANY': 12,
            'PRIMER_PAIR_MAX_COMPL_END': 8,
            'PRIMER_PRODUCT_SIZE_RANGE': [[375, 500]],
        })

In [21]:
primer_df = pd.DataFrame.from_dict(primer_designed, orient='index')
primer_df = primer_df[["PRIMER_LEFT_0_SEQUENCE", "PRIMER_RIGHT_0_SEQUENCE", "PRIMER_PAIR_0_PRODUCT_SIZE", "PRIMER_LEFT_0", "PRIMER_RIGHT_0", "PRIMER_INTERNAL_0", "PRIMER_LEFT_0_TM", "PRIMER_RIGHT_0_TM"]]
primer_df.head(10)

Unnamed: 0,PRIMER_LEFT_0_SEQUENCE,PRIMER_RIGHT_0_SEQUENCE,PRIMER_PAIR_0_PRODUCT_SIZE,PRIMER_LEFT_0,PRIMER_RIGHT_0,PRIMER_INTERNAL_0,PRIMER_LEFT_0_TM,PRIMER_RIGHT_0_TM
IDS-gRNA1,TGACCAAAGCCTAACCCTGC,CACACACCCACAGCTAGAGG,375,"(38, 20)","(412, 20)","(71, 20)",60.251165,60.036952
IDS-gRNA2,TGACCAAAGCCTAACCCTGC,CACACACCCACAGCTAGAGG,375,"(2, 20)","(376, 20)","(35, 20)",60.251165,60.036952
IDS-gRNA3,TGACCAAAGCCTAACCCTGC,CACACACCCACAGCTAGAGG,375,"(16, 20)","(390, 20)","(49, 20)",60.251165,60.036952
IDS-gRNA4,AGGACTCAGGCTTCCTCCTC,AGATGTCCCGCACAATCTGT,424,"(22, 20)","(445, 20)","(74, 20)",60.325203,59.384669
CCDC83-gRNA1,AAGTCCAGGCTTCCCACTTG,ACAGCATTTTGAACATATTGGAAGA,381,"(119, 20)","(499, 25)","(143, 22)",59.888747,57.769151
CCDC83-gRNA2,TTGCTCTGCCAGGGGAAGTA,TGGCACAACTCTATGATAGTTTTCT,379,"(67, 20)","(445, 25)","(121, 21)",60.840288,58.288723
CCDC83-gRNA3,AAGTCCAGGCTTCCCACTTG,ACAGCATTTTGAACATATTGGAAGA,381,"(111, 20)","(491, 25)","(135, 22)",59.888747,57.769151
CCDC83-gRNA4,CCCACTTGCCTTCCTGTGAT,GCCGTTTAAAATACACTTAACAGCA,388,"(110, 20)","(497, 25)","(130, 21)",59.961256,58.855825
MAST3-gRNA1,TGCATAGGAGCTAGGCAGGA,GTGAAGGAGGAGCCACAACT,375,"(112, 20)","(486, 20)","(194, 20)",60.105504,59.601682
MAST3-gRNA2,TGCATAGGAGCTAGGCAGGA,GTGAAGGAGGAGCCACAACT,375,"(116, 20)","(490, 20)","(198, 20)",60.105504,59.601682


In [23]:
gRNA_primer_df = gRNA_predicted.join(primer_df)
gRNA_primer_df = gRNA_primer_df.drop(columns=["gRNA name", "gRNA reverse (-1 = True)"])
gRNA_primer_df_wo_seq = gRNA_primer_df.drop(columns=["Seq for primer design"])
gRNA_primer_df_wo_seq.head(10)

Unnamed: 0,gRNA sequence,GENE ID,GENE NAME,gRNA_sequence_T,gRNA_sequence_T_forward_rel_to_gene,Forward position gRNA,PRIMER_LEFT_0_SEQUENCE,PRIMER_RIGHT_0_SEQUENCE,PRIMER_PAIR_0_PRODUCT_SIZE,PRIMER_LEFT_0,PRIMER_RIGHT_0,PRIMER_INTERNAL_0,PRIMER_LEFT_0_TM,PRIMER_RIGHT_0_TM
IDS-gRNA1,GAACGUUCUUCUCAUCAUCG,ENSG00000010404,IDS,GAACGTTCTTCTCATCATCG,GAACGTTCTTCTCATCATCG,16810,TGACCAAAGCCTAACCCTGC,CACACACCCACAGCTAGAGG,375,"(38, 20)","(412, 20)","(71, 20)",60.251165,60.036952
IDS-gRNA2,CCCCAUAACAGCCCAGGGAG,ENSG00000010404,IDS,CCCCATAACAGCCCAGGGAG,CTCCCTGGGCTGTTATGGGG,16846,TGACCAAAGCCTAACCCTGC,CACACACCCACAGCTAGAGG,375,"(2, 20)","(376, 20)","(35, 20)",60.251165,60.036952
IDS-gRNA3,GAUGACCUGCGCCCCUCCCU,ENSG00000010404,IDS,GATGACCTGCGCCCCTCCCT,GATGACCTGCGCCCCTCCCT,16832,TGACCAAAGCCTAACCCTGC,CACACACCCACAGCTAGAGG,375,"(16, 20)","(390, 20)","(49, 20)",60.251165,60.036952
IDS-gRNA4,CUUAUCCCCAUAACAGCCCA,ENSG00000010404,IDS,CTTATCCCCATAACAGCCCA,TGGGCTGTTATGGGGATAAG,16851,AGGACTCAGGCTTCCTCCTC,AGATGTCCCGCACAATCTGT,424,"(22, 20)","(445, 20)","(74, 20)",60.325203,59.384669
CCDC83-gRNA1,AAAUCUGUUCUUCUUUUAAG,ENSG00000150676,CCDC83,AAATCTGTTCTTCTTTTAAG,CTTAAAAGAAGAACAGATTT,27420,AAGTCCAGGCTTCCCACTTG,ACAGCATTTTGAACATATTGGAAGA,381,"(119, 20)","(499, 25)","(143, 22)",59.888747,57.769151
CCDC83-gRNA2,GCUUAAAAGAAGAACAGAUU,ENSG00000150676,CCDC83,GCTTAAAAGAAGAACAGATT,GCTTAAAAGAAGAACAGATT,27419,TTGCTCTGCCAGGGGAAGTA,TGGCACAACTCTATGATAGTTTTCT,379,"(67, 20)","(445, 25)","(121, 21)",60.840288,58.288723
CCDC83-gRNA3,AAGAACAGAUUUGGCACAUA,ENSG00000150676,CCDC83,AAGAACAGATTTGGCACATA,AAGAACAGATTTGGCACATA,27428,AAGTCCAGGCTTCCCACTTG,ACAGCATTTTGAACATATTGGAAGA,381,"(111, 20)","(491, 25)","(135, 22)",59.888747,57.769151
CCDC83-gRNA4,GCACAUACGGCAUCUACUAA,ENSG00000150676,CCDC83,GCACATACGGCATCTACTAA,GCACATACGGCATCTACTAA,27441,CCCACTTGCCTTCCTGTGAT,GCCGTTTAAAATACACTTAACAGCA,388,"(110, 20)","(497, 25)","(130, 21)",59.961256,58.855825
MAST3-gRNA1,CCCGGGAGGAUGAGCUUGAC,ENSG00000099308,MAST3,CCCGGGAGGATGAGCTTGAC,GTCAAGCTCATCCTCCCGGG,25438,TGCATAGGAGCTAGGCAGGA,GTGAAGGAGGAGCCACAACT,375,"(112, 20)","(486, 20)","(194, 20)",60.105504,59.601682
MAST3-gRNA2,ACCAGUCAAGCUCAUCCUCC,ENSG00000099308,MAST3,ACCAGTCAAGCTCATCCTCC,ACCAGTCAAGCTCATCCTCC,25434,TGCATAGGAGCTAGGCAGGA,GTGAAGGAGGAGCCACAACT,375,"(116, 20)","(490, 20)","(198, 20)",60.105504,59.601682


In [25]:
gRNA_primer_df_wo_seq.to_csv(project_id + '_gRNA_primer.csv')