In [1]:
# Enter the path to the experiment folder eg r"F:\Light_curves\Experiment1"
# There should be a report (csv) and a folder called FastKinetics (no space with capitals) in this folder.
path = r"/media/will/3930-2B1A/Light_curve_05_08_19/Experiment1"

# What name would you like to give to the results file? 
name = "Experiment1"

# Enter the pulse width (ms)
pulse_width = 300

# Do you have a report file in the path folder you wish to add the kinetics to? True or False (notice the capitals)
integrate_report = True

# what is the width of the initial slope? (ms)
fitting_window = 10

In [28]:

import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from sklearn import preprocessing
from scipy import stats, signal

dir_list = os.listdir(path)

def scale_fast_kinetics(df,pulse_width,columns):

    for col in columns:
        series = df[col]
        top = np.mean(series[(df["time/ms"]>=pulse_width-5) & (df["time/ms"]<=pulse_width)])
        bottom = np.mean(series[-100:])
        df[col] = (series-bottom)/(top-bottom)
    return df
    

def Kinetics_Fitting(path, pulse_width = 300, integrate_report = False, Report_DF = None,fitting_window =10, plot_results=False):       
    if not os.path.exists(path+"/FastKinetics"):
        print("The folder",str(path+"/FastKinetics"),"does not exist")
        return Report_DF
    # test the number of fast kinetic files = number of SP
    if integrate_report == True:
        if len(Report_DF[Report_DF["ID"]=="SP"]) != len(os.listdir(path+"/FastKinetics")):
            print("Integration with report not possible. The number of files in FastKinetics is not equal to the number of SPs")
            return(Report_DF)
    
    FK_columns = ["Fluo k","Fluo r value", "Fluo p value", "Fluo std err","Fluo lag",
                  "P700 k","P700 r value", "P700 p value", "P700 std err", "P700 lag",
                  "PC k","PC r value", "PC p value", "PC std err","PC lag",
                  "Fd k","Fd r value", "Fd p value", "Fd std err","Fd lag",
                  "P700 fit t1/2","PC fit t1/2","Fd fit t1/2","Fluo observed t1/2",
                  "P700 observed t1/2","PC observed t1/2","Fd observed t1/2"]
    # "Fluo k","Fluo r value", "Fluo p value", "Fluo std err",
    FK_data = []
    for fk in os.listdir(path+"/FastKinetics"):
        fastkin_df = pd.read_csv(path+"/FastKinetics"+"/"+fk,sep=';',skiprows=1)
        fastkin_df["Fluo deltaI/I x10e3"] = signal.savgol_filter(fastkin_df["Fluo deltaI/I x10e3"],21,1)
        fastkin_df["Fd rel."] = signal.savgol_filter(fastkin_df["Fd rel."],31,1)

        cols_to_normalise = ["Fluo deltaI/I x10e3","PC rel.","P700 rel.","Fd rel."]
        #x = fastkin_df[cols_to_normalise[:]].values #returns a numpy array
        #min_max_scaler = preprocessing.MinMaxScaler()
        #x_scaled = min_max_scaler.fit_transform(x)
        normalised_df = scale_fast_kinetics(fastkin_df,pulse_width,cols_to_normalise)#pd.DataFrame(x_scaled,columns = cols_to_normalise)
        
        #normalised_df.to_csv(path+"/"+name+"_"+"Normalised_kinetics_"+fk+".CSV")
        #normalised_df["Fluo deltaI/I x10e3"] = normalised_df["Fluo deltaI/I x10e3"].rolling(window=3).mean()
        #normalised_df["Fd rel."] = normalised_df["Fd rel."] .rolling(window=3).mean()
        
        
        # DIRK initial rate (linear fit)
        lag =  3# there is a 2 ms lag
        DIRK_df_P700 = normalised_df[(fastkin_df["time/ms"]>=pulse_width+lag) & (fastkin_df["time/ms"]<=pulse_width+fitting_window)]
        DIRK_df = normalised_df[(fastkin_df["time/ms"]>=pulse_width+lag) & (fastkin_df["time/ms"]<=pulse_width+10)]
        #DIRK_df["time/ms"]= pd.Series([x/2.0 for x in range(len(DIRK_df.index))], index = DIRK_df.index)

        # Generated linear fit
        P700_slope, P700_intercept, P700_r_value, P700_p_value, P700_std_err = stats.linregress(np.array(DIRK_df_P700["time/ms"]),np.array(DIRK_df_P700["P700 rel."]))
        P700_lag = ((1-P700_intercept)/P700_slope) - pulse_width
        
        PC_slope, PC_intercept, PC_r_value, PC_p_value, PC_std_err = stats.linregress(np.array(DIRK_df["time/ms"]),np.array(DIRK_df["PC rel."]))
        PC_lag = ((1-PC_intercept)/PC_slope) - pulse_width
        
        Fd_slope, Fd_intercept, Fd_r_value, Fd_p_value, Fd_std_err = stats.linregress(np.array(DIRK_df["time/ms"]),np.array(DIRK_df["Fd rel."]))
        Fd_lag = ((1-Fd_intercept)/Fd_slope) - pulse_width
        
        Fluo_slope, Fluo_intercept, Fluo_r_value, Fluo_p_value, Fluo_std_err = stats.linregress(np.array(DIRK_df["time/ms"]),np.array(DIRK_df["Fluo deltaI/I x10e3"]))
        Fluo_lag = ((1-Fluo_intercept)/Fluo_slope) - pulse_width
        
        # Half time determination 
        DIRK_HALFTIME_df = normalised_df[(fastkin_df["time/ms"]>=pulse_width) & (fastkin_df["time/ms"]<=pulse_width+100)]
        DIRK_HALFTIME_df["time/ms"]= pd.Series([x/2.0 for x in range(len(DIRK_HALFTIME_df.index))], index = DIRK_HALFTIME_df.index)

        # second normalisation and baseline correction
        """
        DIRK_HALFTIME_df["Fluo deltaI/I x10e3"] -= np.mean(np.array(DIRK_HALFTIME_df["Fluo deltaI/I x10e3"])[-100:])
        DIRK_HALFTIME_df["Fluo deltaI/I x10e3"] /= np.mean(np.array(DIRK_HALFTIME_df["Fluo deltaI/I x10e3"])[:3])

        DIRK_HALFTIME_df["PC rel."] -= np.mean(np.array(DIRK_HALFTIME_df["PC rel."])[-100:])
        DIRK_HALFTIME_df["PC rel."] /= np.mean(np.array(DIRK_HALFTIME_df["PC rel."])[:3])

        DIRK_HALFTIME_df["P700 rel."] -= np.mean(np.array(DIRK_HALFTIME_df["P700 rel."])[-100:])
        DIRK_HALFTIME_df["P700 rel."] /= np.mean(np.array(DIRK_HALFTIME_df["P700 rel."])[:3])

        DIRK_HALFTIME_df["Fd rel."] = 1 - DIRK_HALFTIME_df["Fd rel."]
        DIRK_HALFTIME_df["Fd rel."] -= np.mean(np.array(DIRK_HALFTIME_df["Fd rel."])[-100:])
        DIRK_HALFTIME_df["Fd rel."] /= np.mean(np.array(DIRK_HALFTIME_df["Fd rel."])[:3])

        #DIRK_HALFTIME_df["Fd rel."] = DIRK_HALFTIME_df["Fd rel."].rolling(window=4).mean() # to smooth out the noisy Fd signal
        #DIRK_HALFTIME_df["Fluo deltaI/I x10e3"] = DIRK_HALFTIME_df["Fluo deltaI/I x10e3"].rolling(window=4).mean() 
        
        """
        # Find Half-time (+/- 10 %)
        Fluo_HT_obs = np.mean(np.array(DIRK_HALFTIME_df[(DIRK_HALFTIME_df["Fluo deltaI/I x10e3"]<=0.55) & (DIRK_HALFTIME_df["Fluo deltaI/I x10e3"]>=0.45)]["time/ms"]))
        P700_HT_obs = np.mean(np.array(DIRK_HALFTIME_df[(DIRK_HALFTIME_df["P700 rel."]<=0.55) & (DIRK_HALFTIME_df["P700 rel."]>=0.45)]["time/ms"]))
        PC_HT_obs = np.mean(np.array(DIRK_HALFTIME_df[(DIRK_HALFTIME_df["PC rel."]<=0.55) & (DIRK_HALFTIME_df["PC rel."]>=0.45)]["time/ms"]))
        Fd_HT_obs = np.mean(np.array(DIRK_HALFTIME_df[(DIRK_HALFTIME_df["Fd rel."]<=0.55) & (DIRK_HALFTIME_df["Fd rel."]>=0.45)]["time/ms"]))
        
        P700_HT = np.log(2)/np.abs(P700_slope)
        PC_HT = np.log(2)/np.abs(PC_slope)
        Fd_HT = np.log(2)/np.abs(Fd_slope)
        FK_data.append([Fluo_slope, Fluo_r_value, Fluo_p_value, Fluo_std_err,Fluo_lag,
                        P700_slope,P700_r_value, P700_p_value, P700_std_err,P700_lag,
                        PC_slope,PC_r_value, PC_p_value, PC_std_err,PC_lag,
                        Fd_slope,Fd_r_value, Fd_p_value, Fd_std_err, Fd_lag,
                        P700_HT, PC_HT, Fd_HT,Fluo_HT_obs, P700_HT_obs, PC_HT_obs, Fd_HT_obs])
        
        if plot_results == True:
                # plot DIRK
                cols = ["PC rel.","P700 rel.","Fd rel."]
                if not os.path.exists(path+"/DIRK_Halftime"):
                    os.makedirs(path+"/DIRK_Halftime")

                else:
                    pass

                for column in cols:
                    fig=plt.figure()
                    ax=fig.add_subplot(111)
                    ax.set_xlabel("Time (ms)")
                    plt.scatter(np.array(DIRK_df["time/ms"]),np.array(DIRK_df[column]),s=0.1,color='k')

                    plt.savefig(path+"/DIRK_Halftime/"+column+fk[:-4]+".png",format='png',dpi=600)
                    plt.clf()
                    plt.cla()
                    plt.close('all')


    if integrate_report == True: 
        DIRK_results_df = pd.DataFrame(columns = FK_columns, data = np.array(FK_data), index = Report_DF.index )
        results_df = pd.merge(Report_DF,DIRK_results_df,on = Report_DF.index)
        results_df = results_df[['Date', 'Time', 'Rec.Time', 'Action', 'ID', 'Name', 'Temp',
               'PAR', 'F(I)', "Fo,Fo'", "Fm,Fm'", 'FMTm', 'F', 'Y(II)', 'Y(4S)',
               'ETR(II)', 'Y(NO)', 'Y(NPQ)', 'NPQ', 'qN', 'qP', 'qL', 'F/Fm', "Fm'/Fm",
               'P700ox', "P700m,P700m'", 'Y(I)', 'Y(ND)', 'Y(NA)', 'ETR(I)', 'PCox',
               "PCm,PCm'", 'Rel PCox', "Rel PCm'", 'Fdred', "Fdm,Fdm'", 'Rel Fdred',
               "Rel Fdm'","Fluo k","Fluo r value", "Fluo p value", "Fluo std err","Fluo lag",
                  "P700 k","P700 r value", "P700 p value", "P700 std err", "P700 lag",
                  "PC k","PC r value", "PC p value", "PC std err","PC lag",
                  "Fd k","Fd r value", "Fd p value", "Fd std err","Fd lag",
                  "P700 fit t1/2","PC fit t1/2","Fd fit t1/2",
                  "Fluo observed t1/2","P700 observed t1/2","PC observed t1/2","Fd observed t1/2"]]
    else:
        results_df = pd.DataFrame(columns = FK_columns, data = np.array(FK_data))
    return results_df




In [29]:
# load report
for f in dir_list:
    if (f.endswith(".CSV") or f.endswith(".csv")) and not (f.endswith("Results.CSV") or f.endswith("Results.csv")):
        file_path = path+"/"+str(f)
        Report_DF = pd.read_csv(file_path,sep=';',skiprows=1)
        Report_DF = Report_DF[Report_DF["ID"]=="SP"]

        
results_df = Kinetics_Fitting(path, pulse_width = pulse_width, integrate_report = integrate_report, Report_DF = Report_DF,fitting_window =fitting_window,plot_results=True)


# Make a time column in seconds/minutes from start
time_in_seconds = []
time_in_minutes = []
for i in results_df["Time"]:
    t = i.split(":")
    s = int(t[1])*60 + int(t[2])
    m = float(t[1]) + float(t[2])/60.0
    time_in_seconds.append(s)
    time_in_minutes.append(m)
results_df["Time/s"] = pd.Series(time_in_seconds,index=results_df.index)
results_df["Time/min"] = pd.Series(time_in_minutes,index=results_df.index)

# other engineered features (1-qL, YI/YII, ETR(I)-ETR(II))
results_df["1-qL"] = 1 - results_df["qL"]
results_df["Y(I)/Y(II)"] = results_df["Y(I)"] / results_df["Y(II)"]
results_df["ETR(I)-ETR(II)"] = results_df["ETR(I)"] - results_df["ETR(II)"]

results_df.to_csv(path+"/"+name+"_"+"Results.CSV")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
