### Notebook to scrape gRNA predictions and designing primers around gRNA cut-site
#### Requires gpe-module

In [None]:
import gpe_module as gpe
import pandas as pd
pd.set_option('display.max_rows', 500)

### Insert name of csv-file (for example - 'project_test.csv').
##### csv-file needs to contain a column containing ENSEMBL GENE ID's, and labeled 'GENE ID'

In [None]:
gene_id_scanned = 'ZFAT'
#name_of_csv_file = 'ENSG00000130234_project_test.csv'

#### Execute functions

In [None]:
#create df with sequences from ids supplied in csv file
df_gen_seq_and_id = gpe.extract_gene_seq_from_ens_id(gene_id_scanned + '.csv')
df_gen_seq_and_id.to_csv(gene_id_scanned + '_gen_seq_id.csv')

In [None]:
#create df with gene name from ids supplied in csv file
#df_gen_name_and_id, df_no_gen_name_found_for_id = gpe.extract_gene_name_from_id(gene_id_scanned)

In [None]:
#create Synthego URLs from gene IDs and gene names 
df_gen_name_and_id = pd.read_csv(gene_id_scanned + ".csv")
df_url_and_id = gpe.construct_synthego_urls_gene_names_ids(df_gen_name_and_id)

In [None]:
df_url_and_id.to_csv(gene_id_scanned + '_urls.csv')

In [None]:
#create df with scraped gRNAs
df_gRNA_predicted_and_id, no_gRNA_predicted = gpe.predict_gRNA_from_urls(df_url_and_id, path_to_gecko='D:\geckodriver\geckodriver.exe')
df_gRNA_predicted_and_id.to_csv(gene_id_scanned + '_predicted_gRNA_id.csv')

In [None]:
df_gRNA_predicted_and_id

In [None]:
#merge df with predicted gRNAs and gene sequence
df_gRNA_predicted_and_gen_seq = pd.merge(df_gRNA_predicted_and_id, df_gen_seq_and_id,  on='GENE ID', how='inner')

In [None]:
# remove entries in GRNA PREDICTED without gRNA sequence
df_gRNA_predicted_and_gen_seq_curated = df_gRNA_predicted_and_gen_seq[df_gRNA_predicted_and_gen_seq['PREDICTED GRNA'].str.contains('SYNTHEGO')==False]
df_gRNA_predicted_and_gen_seq_curated = df_gRNA_predicted_and_gen_seq_curated[df_gRNA_predicted_and_gen_seq_curated['PREDICTED GRNA'].str.contains('V1.3')==False]

In [None]:
df_gRNA_orientation = gpe.gRNA_hybridisation(df_gRNA_predicted_and_gen_seq_curated)

In [None]:
df_primer_design, df_no_primer_design = gpe.primer3_primer_around_gRNA(df_gRNA_orientation)

#### Tidy up final df and save results as csv.file

In [None]:
df_gRNA_orientation.to_csv(gene_id_scanned + "_project_gRNAs.csv")
df_no_primer_design.to_csv(gene_id_scanned + "_project_no_primers_designed.csv")

In [None]:
final_gRNA_primer_df = pd.merge(df_gRNA_orientation, df_primer_design, on='GRNA NAME', how='inner')
final_gRNA_primer_df = final_gRNA_primer_df.drop(columns=["GRNA CODING STRAND", "PREDICTED GRNA_T", "GENE SEQUENCES", "SEQ SLICE"])
final_gRNA_primer_df.to_csv(gene_id_scanned + '_gRNA_primer_final.csv')
final_gRNA_primer_df.head(25)

In [None]:
# convert no_gRNA_predicted list into df
if no_gRNA_predicted:
    df_no_gRNA_predicted = pd.DataFrame (no_gRNA_predicted, columns = ['GENE NAME'])
    df_no_gRNA_predicted.to_csv(gene_id_scanned + '_no_gRNA_predicted_id.csv')