# Step 1
### Import packages

In [1]:
import os
import pandas as pd
import numpy as np
import random
import scipy.stats as stats
import json

# Step 2
### Create empty csv files with headers

In [100]:
import csv
with open('database/FRAP_database.csv', 'w', newline='') as file:
    fieldnames = ["filename", "experiment", "protein", 'tau', 't_half','t_half2', 'mobile_fraction', 'bleach_values', 'time_steps', 'sse', 'rsquare' ]
    writer = csv.DictWriter(file, fieldnames=fieldnames)
    writer.writeheader()

# Step 3

### Read in the pyan.txt file and write tau, t-half, mobile fraction, bleach curve values and the time-steps in csv file

In [101]:
# Read in all data
path = '.../FRAP/data'
######################################################

def walklevel(some_dir, level=1):
     some_dir = some_dir.rstrip(os.path.sep)
     assert os.path.isdir(some_dir)
     num_sep = some_dir.count(os.path.sep)
     for root, dirs, files in os.walk(some_dir):
         yield root, dirs, files ,
         return files
         num_sep_this = root.count(os.path.sep)
         if num_sep + level <= num_sep_this:
             del dirs[:]

# function to extract the matlab values for mobile fraction, tau value, timesteps and bleach values from the pyan.txt
def matlab_ana_values(path):
    # statcount = 0 ,
    # creating lists for desired variables and values, 
    bleachpoints_all = []
    time_all = []
    tau_values = []
    mobile_fraction = []
    t_half = []
    t_half2 = []
    error = []
    rs = []

    # iterating through path ,
    for files in os.listdir(path):

        # check if file to be analyzed is text format ,
        if files[-8:] == 'pyan.txt' and not files.startswith('._'):
            # read file line by line and get all variables ,
            reader = open(os.path.join(path,files), 'r', errors='ignore')
            tau = float(reader.readline())
            mob = float(reader.readline())
            im = float(reader.readline()) 

            
            time_and_bleach_values = []
            for i in reader:
                time_and_bleach_values.append(float(i)) 
            bleachpoints_norm = time_and_bleach_values[:len(time_and_bleach_values) // 2] 
            timesteps = time_and_bleach_values[len(time_and_bleach_values) // 2:] 

                
            # check if the sse or the rsquare value from GOF are good enough to let the value into the mobs and tau txt files   
            sse = ""
            rsquare = ""
            with open (path+"/"+files[:-8]+"gof.csv") as stdin:
                stdin.readline()
                line_with_values = stdin.readline()
                each_value = line_with_values.split(",")[:-1]
                sse=each_value[0]
                rsquare=each_value[1]
            if float(sse)>0.4 and float(rsquare)<0.6:  #these can be adjusted to be more permissive or restrictive 
                print (sse,rsquare)
                print("File {} did not pass the cutoff".format(files))
                continue
         
        
            with open (path+"/"+files[:-8]+"t_half_values.txt") as taf:
                thalf  = taf.readline()
                
            with open (path+"/"+files[:-8]+"t_half_values_2.txt") as taf2:
                thalf2  = taf2.readline()    
        
        
          # check if mob has a positive value
            if mob <0 :
                print (mob)
                print("File {} has a negative mobile fraction".format(files))
                continue
           
            
                 
                
            # fill created lists with read variables from file ,
            bleachpoints_all.append(bleachpoints_norm)
            time_all.append(timesteps)
            tau_values.append(tau)
            mobile_fraction.append(mob)
            t_half.append(thalf)
            t_half2.append(thalf2)
            error.append(sse)
            rs.append(rsquare)
            
            
            # write values in the right position in the csv file
            with open('database/FRAP_database.csv', 'a', newline='') as file:
                fieldnames = ["filename","experiment" ,"protein", 'tau', 't_half', 't_half2', 'mobile_fraction', 'bleach_values', 'time_steps', 'sse', 'rsquare']
                writer = csv.DictWriter(file, fieldnames=fieldnames, delimiter=',',lineterminator='\n')
                writer.writerow({"filename": files, "experiment": dataSet, 'tau': tau, 't_half': thalf, 't_half2': thalf2, 'mobile_fraction': mob, 'bleach_values':bleachpoints_norm, 'time_steps': timesteps, 'sse':sse, 'rsquare': rsquare })
            

            
                     

    with open(os.path.join(path, dataSet + '_mobs.txt'), 'w+') as mo:
        for i in mobile_fraction:
            mo.write(str(i) + '\n')
    with open(os.path.join(path, dataSet + '_taus.txt'), 'w+') as ta:
        for i in tau_values:
            ta.write(str(i) + '\n')
            
    with open(os.path.join(path, dataSet + '_time_all.txt'), 'w+') as ti:
        for i in time_all:
            ti.write(str(i) + '\n')      
            
    with open(os.path.join(path, dataSet + '_bleachpoints_all.txt'), 'w+') as bl:
        for i in bleachpoints_all:
            bl.write(str(i) + '\n')   
      

        # if file is not .txt. format, it is about to be ignored by script ,
        else:
            pass
        return tau_values, mobile_fraction, time_all, bleachpoints_all, t_half, t_half2
        


In [102]:
#loop for running the matlab_value_collection for all the datasets
for root, resultDirs, AppleTrug in walklevel(path, level=0):
    try:
        for dataSet in resultDirs:
            #getting all the values for plotting and writes the files 'mobs.txt' and 'taus.txt',
            (tau_values, mobile_phase, time_all, bleachpoints, t_half, t_half2) = matlab_ana_values(os.path.join(path,dataSet))
    finally:
        print ("File {} could not be openned.".format(dataSet))    

0.40088 0.50103
File 200504_SPL8_hs1hr35_3hrRC20C_lif_-_FRAP_008_pyan.txt did not pass the cutoff
File 210312_spl15_bkg could not be openned.


# Step 4

#### Fill in the protein and condition of the table

In [103]:
df = pd.read_csv('.../database/FRAP_database.csv')

In [104]:
df.head()

Unnamed: 0,filename,experiment,protein,condition,tau,t_half,t_half2,mobile_fraction,bleach_values,time_steps,sse,rsquare
0,200530_SPL8_hs1hr35_rc8hr20_lif_-_FRAP_006_pya...,200530_spl8,,,5.805892,4.18,3.3,0.374834,"[1.060889, 1.008881, 1.053333, 1.02661, 1.0139...","[0.22, 0.44, 0.66, 0.88, 1.1, 1.32, 1.54, 1.76...",0.24989,0.49793
1,200530_SPL8_hs1hr35_rc8hr20_lif_-_FRAP_007_pya...,200530_spl8,,,8.318542,5.94,3.96,0.264699,"[1.04635, 1.104421, 0.967013, 1.027364, 1.0217...","[0.22, 0.44, 0.66, 0.88, 1.1, 1.32, 1.54, 1.76...",0.17283,0.47933
2,200530_SPL8_hs1hr35_rc8hr20_lif_-_FRAP_009_pya...,200530_spl8,,,1.604541,1.32,0.88,0.203197,"[1.054365, 1.025518, 1.035296, 1.023328, 1.027...","[0.22, 0.44, 0.66, 0.88, 1.1, 1.32, 1.54, 1.76...",0.17522,0.19545
3,200530_SPL8_hs1hr35_rc8hr20_lif_-_FRAP_029_pya...,200530_spl8,,,2.41976,1.98,1.1,0.166205,"[1.037188, 1.056412, 1.042269, 0.994047, 1.031...","[0.22, 0.44, 0.66, 0.88, 1.1, 1.32, 1.54, 1.76...",0.12162,0.10477
4,200530_SPL8_hs1hr35_rc8hr20_lif_-_FRAP_030_pya...,200530_spl8,,,8.138622,5.94,4.18,0.133133,"[1.045889, 1.021138, 0.995914, 1.00209, 1.0034...","[0.22, 0.44, 0.66, 0.88, 1.1, 1.32, 1.54, 1.76...",0.074685,0.4125


In [105]:
# Create a column with date
df['date'] = df.experiment.str[0:6]

#Create a column with strain and condition
df['strain'] = df.experiment.str[7:]

#Create a column with immobile fraction 
df['immobile_fraction'] = 1- df.mobile_fraction


#Fill in the column 'protein'
df.loc[df['strain']== 'spl8', 'protein'] = 'DPY-27'
df.loc[df['strain']== 'eg8899', 'protein'] = 'free-GFP'
df.loc[df['strain']== 'eg8961', 'protein'] = 'H2B-GFP'


df.head()

Unnamed: 0,filename,experiment,protein,condition,tau,t_half,t_half2,mobile_fraction,bleach_values,time_steps,sse,rsquare,date,strain,immobile_fraction
0,200530_SPL8_hs1hr35_rc8hr20_lif_-_FRAP_006_pya...,200530_spl8,DPY-27,,5.805892,4.18,3.3,0.374834,"[1.060889, 1.008881, 1.053333, 1.02661, 1.0139...","[0.22, 0.44, 0.66, 0.88, 1.1, 1.32, 1.54, 1.76...",0.24989,0.49793,200530,spl8,0.625166
1,200530_SPL8_hs1hr35_rc8hr20_lif_-_FRAP_007_pya...,200530_spl8,DPY-27,,8.318542,5.94,3.96,0.264699,"[1.04635, 1.104421, 0.967013, 1.027364, 1.0217...","[0.22, 0.44, 0.66, 0.88, 1.1, 1.32, 1.54, 1.76...",0.17283,0.47933,200530,spl8,0.735301
2,200530_SPL8_hs1hr35_rc8hr20_lif_-_FRAP_009_pya...,200530_spl8,DPY-27,,1.604541,1.32,0.88,0.203197,"[1.054365, 1.025518, 1.035296, 1.023328, 1.027...","[0.22, 0.44, 0.66, 0.88, 1.1, 1.32, 1.54, 1.76...",0.17522,0.19545,200530,spl8,0.796803
3,200530_SPL8_hs1hr35_rc8hr20_lif_-_FRAP_029_pya...,200530_spl8,DPY-27,,2.41976,1.98,1.1,0.166205,"[1.037188, 1.056412, 1.042269, 0.994047, 1.031...","[0.22, 0.44, 0.66, 0.88, 1.1, 1.32, 1.54, 1.76...",0.12162,0.10477,200530,spl8,0.833795
4,200530_SPL8_hs1hr35_rc8hr20_lif_-_FRAP_030_pya...,200530_spl8,DPY-27,,8.138622,5.94,4.18,0.133133,"[1.045889, 1.021138, 0.995914, 1.00209, 1.0034...","[0.22, 0.44, 0.66, 0.88, 1.1, 1.32, 1.54, 1.76...",0.074685,0.4125,200530,spl8,0.866867


In [106]:
# Save CSV for plotting 
df.to_csv('.../database/FRAP_database_2.csv', index=False)  