In [None]:
import os
import pandas as pd
import tkinter as tk
from tkinter import filedialog
"""
This script is part of the publication:  Martinek J. et al., "ARP2/3 complex associates with peroxisomes to participate in pexophagy in plants"
This script is available under the CC-BY-4.0 License

This script loops through all the sample-containg subfolders, finds TrackMate and ComDet files and process them to obtain colocalization results that are subsequenctly exported to a csv file.

It requires the following file structure and minimum content:
    > Parent_folder
        >Sample_folder
            *coloc_results.csv
            *_param.csv
            ch00_spots.csv
            ch01_spots.csv

*coloc_results.csv and *_param.csv files are obtained after running the AutoThr_Coloc.ijm ImageJ macro.
ch00_spots.csv and ch01_spots.csv files are obtained during the tracking process using the TrackMate ImageJ plugin, as described in the corresponding code documentation.

------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Author: Judith García-González
Year: 2023
License: BSD-3

Copyright (c) 2023, Judith García-González, Department of Experimental Plant Biology (Faculty of Science, Charles University in Prague)
All rights reserved.

Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
"""
#This function iterates through the subfolders to find the required files and once they are found, it processes them further and stores them to a dataframe.
def find_files_and_process(folder_path):
    # Create an empty dataframe to store results
    results_df = pd.DataFrame(columns=['subfolder_name', 'spots_green', 'spots_red', 'coloc_num', 'coloc_freq_green', 'coloc_freq_red'])

    # Loop through each subfolder in the chosen folder
    for subfolder_name in os.listdir(folder_path):
        subfolder_path = os.path.join(folder_path, subfolder_name)

        # Create variables to store the paths of the desired files
        coloc_results_path = None
        param_path = None
        spots_green_path = None
        spots_red_path = None

        # Loop through each file in the subfolder and its subfolders
        for root, dirs, files in os.walk(subfolder_path):
            for file in files:
                if file.endswith('coloc_results.csv'):
                    coloc_results_path = os.path.join(root, file)
                elif file.endswith('_param.txt'):
                    param_path = os.path.join(root, file)
                elif file.endswith('ch00_spots.csv'):
                    spots_green_path = os.path.join(root, file)
                elif file.endswith('ch01_spots.csv'):
                    spots_red_path = os.path.join(root, file)

            # If all desired files are found, process them and add results to dataframe
            if all([coloc_results_path, param_path, spots_green_path, spots_red_path]):
                subfolder_results = process_files(coloc_results_path, param_path, spots_green_path, spots_red_path)
                subfolder_results.insert(0, subfolder_name)
                results_df = results_df.append(pd.Series(subfolder_results, index = ['subfolder_name', 'spots_green', 'spots_red', 'coloc_num', 'coloc_freq_green', 'coloc_freq_red']), ignore_index=True)
                break

    return results_df

def process_files(coloc_results_path, param_path, spots_green_path, spots_red_path):
    # Read input files
    coloc_results = pd.read_csv(coloc_results_path)
    trackmate_red = pd.read_csv(spots_red_path, skiprows=[1,2,3])
    trackmate_green = pd.read_csv(spots_green_path, skiprows=[1,2,3])
    param = pd.read_csv(param_path,sep="\t", names=['param','unit','value'])

    # Use scale stored in param file to convert spot centroid X and Y coordinates to pixels
    scale = param.iloc[0]['value']
    trackmate_red["X_(px)"] = trackmate_red["POSITION_X"] * scale
    trackmate_red["Y_(px)"] = trackmate_red["POSITION_Y"] * scale
    trackmate_red["FRAME"] = trackmate_red["FRAME"] +1

    trackmate_green["X_(px)"] = trackmate_green["POSITION_X"] * scale
    trackmate_green["Y_(px)"] = trackmate_green["POSITION_Y"] * scale
    trackmate_green["FRAME"] = trackmate_green["FRAME"] +1

    # Rename Slices column in coloc_results to match trackmate files "FRAME" column for later merging
    coloc_results = coloc_results.rename(columns={"Slice": "FRAME"})

    # Convert X and Y pixel coordinates to float64 values and round them to have 0 decimal positions
    trackmate_red["X_(px)"] = trackmate_red["X_(px)"].astype('float64').round(0)
    trackmate_red["Y_(px)"] = trackmate_red["Y_(px)"].astype('float64').round(0)
    trackmate_green["X_(px)"] = trackmate_green["X_(px)"].astype('float64').round(0)
    trackmate_green["Y_(px)"] = trackmate_green["Y_(px)"].astype('float64').round(0)
    coloc_results["X_(px)"] = coloc_results["X_(px)"].astype('float64').round(0)
    coloc_results["Y_(px)"] = coloc_results["Y_(px)"].astype('float64').round(0)

    # Split coloc results into two tables for green and red channel to match trackmate files
    coloc_red = coloc_results[(coloc_results['Channel'] == 1)]
    coloc_green = coloc_results[(coloc_results['Channel'] == 2)]

    # Merge coloc results and trackmate files data for each channel
    df_merged_red = pd.merge(trackmate_red,coloc_red, on=['X_(px)', 'Y_(px)','FRAME'])
    df_merged_green = pd.merge(trackmate_green,coloc_green, on=['X_(px)', 'Y_(px)','FRAME'])

    # Filter spots that are 3 or more frames in the time series
    df_merged_red_tracks = df_merged_red['TRACK_ID'].value_counts()
    df_merged_green_tracks = df_merged_green['TRACK_ID'].value_counts()

    df_merged_red_filtered = df_merged_red[df_merged_red['TRACK_ID'].isin(df_merged_red_tracks[df_merged_red_tracks > 2].index)]
    df_merged_green_filtered = df_merged_green[df_merged_green['TRACK_ID'].isin(df_merged_green_tracks[df_merged_green_tracks > 2].index)]

    # Count number of unique spots
    spots_red_num = df_merged_red_filtered['TRACK_ID'].nunique()
    spots_green_num = df_merged_green_filtered['TRACK_ID'].nunique()

    # Define a custom function to apply to each row of the dataframe for positive colocalization
    def add_new_column(row):
        if row['ColocCh1'] == 1 and (row['ColocCh2'] == 1):
            return 1
        else:
            return 0

    # Apply the custom function to each row of the dataframe to create a new column
    df_merged_red_filtered['COLOC_bool'] = df_merged_red_filtered.apply(add_new_column, axis=1)
    df_merged_green_filtered['COLOC_bool'] = df_merged_green_filtered.apply(add_new_column, axis=1)

    # Find unique colocalizations - first filter unique combinations of spot index in coloc file
    unique_coloc_red = df_merged_red_filtered[(df_merged_red_filtered['COLOC_bool'] == 1)].drop_duplicates(subset=['ColocIndCh1', 'ColocIndCh1'])
    unique_coloc_green = df_merged_green_filtered[(df_merged_green_filtered['COLOC_bool'] == 1)].drop_duplicates(subset=['ColocIndCh1', 'ColocIndCh1'])

    # Second filter for individual spots detected in coloc file that belong to the same track
    coloc_red_tracks= unique_coloc_red[['TRACK_ID', 'ColocIndCh1','ColocIndCh2']].copy()
    coloc_red_tracks=coloc_red_tracks.rename(columns={"TRACK_ID": "TRACK_ID_RED"})

    coloc_green_tracks= unique_coloc_green[['TRACK_ID', 'ColocIndCh1', 'ColocIndCh2']].copy()
    coloc_green_tracks=coloc_green_tracks.rename(columns={"TRACK_ID": "TRACK_ID_GREEN"})

    coloc_tracks = pd.merge(coloc_red_tracks,coloc_green_tracks, on=['ColocIndCh1','ColocIndCh2'])
    coloc_counts = coloc_tracks.groupby(['TRACK_ID_RED', 'TRACK_ID_GREEN']).size().reset_index(name='counts')

    # Group by columns 'TRACK_ID_RED' and 'TRACK_ID_GREEN', count the occurrences, and discard rows where either 'TRACK_ID_RED' or 'TRACK_ID_GREEN' have a lower count value
    max_coloc_counts = coloc_counts[coloc_counts.groupby('TRACK_ID_RED')['counts'].transform(lambda x: x == x.max()) & coloc_counts.groupby('TRACK_ID_GREEN')['counts'].transform(lambda x: x == x.max())]

    #Calculate final number of detected colocalizations
    coloc_num = len(max_coloc_counts)

    #Calculate frequency of detected colocalizations
    coloc_freq_red=None
    if spots_red_num == 0:
        coloc_freq_red == 0
    else:
        coloc_freq_red = coloc_num / spots_red_num

    coloc_freq_green=None
    if spots_green_num == 0:
        coloc_freq_green == 0
    else:
        coloc_freq_green = coloc_num / spots_green_num

    return [spots_green_num, spots_red_num, coloc_num, coloc_freq_green, coloc_freq_red]

# Prompt user to choose a folder
root = tk.Tk()
root.withdraw() # hide the root window

folder_path = filedialog.askdirectory()
#folder_path = input("Please enter the path of the folder you'd like to process: ")

# Call function to find files and process them
results_df = find_files_and_process(folder_path)

# Display results dataframe
print(results_df)
results_df.to_csv(os.path.join(folder_path,r'results_df.csv'))