# DETECTR 005 Analysis
This notebook is for formatting, analyzing and plotting the DETECTR_005 data as practice for analyzing DETECTR data via this python script (created by AL)

## Objective
Comparing DNAse vs non-DNAse treatment on guide RNAs. 

## Formatting to CSV file
Take txt file from plate reader and turn into tidy data format

In [None]:
#import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statistics

import holoviews as hv
import bokeh
import hvplot.pandas

In [None]:
#read in CSV file
DETECTR_raw = pd.read_csv("../raw_txt_files/DETECTR_005.txt", encoding='utf-16', sep="\t", delimiter="\t",index_col=None, skiprows= 3,header = None)

In [None]:
end_indices = DETECTR_raw[0][DETECTR_raw[0]=='~End'].index.tolist()
end_indices[0]

In [None]:
#remove unnecessary colums at the beginning of the file and unnecessary rows at the end of the file
#we want to keep the file with the time information
DETECTR_raw = DETECTR_raw.drop(list(range(end_indices[0], len(DETECTR_raw))), axis = 0)

In [None]:
#select only the first columns
DETECTR_raw = DETECTR_raw.iloc[:,[0,6,7]]

In [None]:
#how many plate reads did we have?
len(DETECTR_raw)/16

In [None]:
#set a variable to identify the number of plate reads
num_plate_reads = int(len(DETECTR_raw)/16)

In [None]:
#create a dictionary mapping the row number to a letter for 384 and 96
well_dict_384 = {1:"A",2:"B",3:"C",4:"D",5:"E",6:"F",7:"G",8:"H",9:"I",10:"J",11:"K",12:"L",13:"M",14:"N",15:"O",16:"P"}
well_dict_96 = {1:"A",2:"A",3:"B",4:"B",5:"C",6:"C",7:"D",8:"D",9:"E",10:"E",11:"F",12:"F",13:"G",14:"G",15:"H",16:"H"}

In [None]:
#initialize the cleaned data frame
DETECTR_cleaned = pd.DataFrame(columns = ['time','row_384','column_384', 'rfu_value'])

In [None]:
DETECTR_raw.head(10)

In [None]:
#iterate over each plate to create a dataframe with the correct time, the 384 and 96 position and the RFU_value
for i in list(range(0,num_plate_reads)):
    time = DETECTR_raw.iloc[i*16,0]
    for j in list(range(6, 8)):
        for k in list(range(i*16, i*16+16)):
            rfu_value = DETECTR_raw.loc[k,j]
            row_384 = well_dict_384[k%16+1]
            column_384 = j-1
            DETECTR_cleaned.loc[len(DETECTR_cleaned)] = [time, row_384, column_384,rfu_value]

In [None]:
#how many rows in our data frame now? should be 384*number of plates
len(DETECTR_cleaned)

In [None]:
#write this file to a CSV
DETECTR_cleaned.to_csv("../tidy_data/DETECTR_005_tidy.csv")

In [None]:
DETECTR_cleaned

## Filter the data frame
Select only wells we are interested in

In [None]:
def kinetics(DETECTR_cleaned, num_plate_reads):
    DETECTR_cleaned = DETECTR_cleaned.loc[(DETECTR_cleaned["row_384"].isin(['A','C','E','G','I','K','M','O']))]
    DETECTR_cleaned = DETECTR_cleaned.assign(DNAse=num_plate_reads*(["non-DNAse"]*8+['DNAse']*8))
    DETECTR_cleaned = DETECTR_cleaned.assign(gRNA_stock_concentration = num_plate_reads*([110,55,27.5,13.75,6.875,3.43,1.72,0]*2))
    DETECTR_cleaned = DETECTR_cleaned.assign(well = DETECTR_cleaned['row_384'].map(str)+DETECTR_cleaned['column_384'].map(str))
    DETECTR_cleaned['rfu_value'] = DETECTR_cleaned['rfu_value'].map(float)
    return DETECTR_cleaned

In [None]:
DETECTR_kinetics = kinetics(DETECTR_cleaned, num_plate_reads)

In [None]:
DETECTR_kinetics.hvplot.scatter(x='time',y='rfu_value',by = 'DNAse', groupby = 'gRNA_stock_concentration',rot = 90)

In [None]:
def filter_time(DETECTR_cleaned, time):
    DETECTR_cleaned = DETECTR_cleaned.loc[(DETECTR_cleaned["row_384"].isin(['A','C','E','G','I','K','M','O']))]
    DETECTR_cleaned = DETECTR_cleaned[DETECTR_cleaned['time']==time]
    DETECTR_cleaned = DETECTR_cleaned.assign(DNAse=(["non-DNAse"]*8+['DNAse']*8))
    DETECTR_cleaned = DETECTR_cleaned.assign(gRNA_stock_concentration = ([110,55,27.5,13.75,6.875,3.43,1.72,0]*2))
    DETECTR_cleaned = DETECTR_cleaned.assign(well = DETECTR_cleaned['row_384'].map(str)+DETECTR_cleaned['column_384'].map(str))
    DETECTR_cleaned['rfu_value'] = DETECTR_cleaned['rfu_value'].map(float)
    return DETECTR_cleaned

In [None]:
DETECTR_2hr = filter_time(DETECTR_cleaned,'02:00:00')

In [None]:
DETECTR_2hr.hvplot(x='gRNA_stock_concentration',y='rfu_value', by = 'DNAse')

In [None]:
DETECTR_4hr = filter_time(DETECTR_cleaned,'04:00:00')

In [None]:
DETECTR_4hr.hvplot(x='gRNA_stock_concentration',y='rfu_value', by = 'DNAse')

## Conclusions
all guides work, with a few failures -  B3, E10 and G11