### Notebook to scrape gRNA predictions and designing primers around gRNA cut-site
#### Requires gpe-module

In [1]:
import gpe_module as gpe
import pandas as pd

### Insert name of csv-file (for example - 'project_test.csv').
##### csv-file needs to contain a column containing ENSEMBL GENE ID's, and labeled 'GENE ID'

In [2]:
name_of_csv_file = 'project_test.csv'

#### Execute functions

In [3]:
#create df with sequences from ids supplied in csv file
df_gen_seq_and_id = gpe.extract_gene_seq_from_ens_id(name_of_csv_file)
#create df with gene name from ids supplied in csv file
df_gen_name_and_id = gpe.extract_gene_name_from_id(name_of_csv_file)
#create Synthego URLs from gene IDs and gene names 
df_url_and_id = gpe.construct_synthego_urls_gene_names_ids(df_gen_name_and_id)
#create df with scraped gRNAs
df_gRNA_predicted_and_id = gpe.predict_gRNA_from_urls(df_url_and_id, path_to_gecko='D:\Alex\geckodriver-v0.31.0-win64\geckodriver.exe')
#merge df with predicted gRNAs and gene sequence
df_gRNA_predicted_and_gen_seq = pd.merge(df_gRNA_predicted_and_id, df_gen_seq_and_id,  on='GENE ID', how='inner')
#create df with information of location and orientation of gRNA (this can be used for primer design)
df_gRNA_orientation = gpe.gRNA_hybridisation(df_gRNA_predicted_and_gen_seq)
df_label_gRNA = gpe.add_count_to_gRNA(df_gRNA_orientation)
df_primer_design = gpe.primer3_primer_around_gRNA(df_label_gRNA)

Successfully processed csv file
Successfully retrieved gene sequences from IDs
Successfully processed csv file
Successfully identified gene names from IDs
Successfully constructed URLs from gene names and gene id's for scraping predicted gRNAs
Retrieving gRNAs for 14 gene(s) 
Working...
Remaining gRNA predictions: 13 genes 
Working...
Remaining gRNA predictions: 12 genes 
Working...
Remaining gRNA predictions: 11 genes 
Working...
Remaining gRNA predictions: 10 genes 
Working...
Remaining gRNA predictions: 9 genes 
Working...
Remaining gRNA predictions: 8 genes 
Working...
Remaining gRNA predictions: 7 genes 
Working...
Remaining gRNA predictions: 6 genes 
Working...
Remaining gRNA predictions: 5 genes 
Working...
Remaining gRNA predictions: 4 genes 
Working...
Remaining gRNA predictions: 3 genes 
Working...
Remaining gRNA predictions: 2 genes 
Working...
Retrieving gRNA predictions for last gene 
...almost there...
Predictions succesfully completed!
Starting gRNA analyses...
Analyses 

OSError: SEQUENCE_INCLUDED_REGION length < min PRIMER_PRODUCT_SIZE_RANGE

#### Tidy up final df and save results as csv.file

In [None]:
final_gRNA_primer_df = pd.merge(df_label_gRNA, df_primer_design, on='GRNA NAME', how='inner')
final_gRNA_primer_df = final_gRNA_primer_df.drop(columns=["GRNA CODING STRAND", "PREDICTED GRNA_T", "GENE SEQUENCES", "SEQ SLICE"])
final_gRNA_primer_df.to_csv(name_of_csv_file + '_gRNA_primer_final.csv')
final_gRNA_primer_df.head(15)