# create combined files after manual checking

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.optimize import curve_fit
import glob
import os
import matplotlib as mpl
from itertools import cycle
import re

In [2]:
experiments = ['23.11.14-15', '23.12.22-23', '24.04.01', '24.07.08', '24.11.09', '24.11.14']
scan_rates = {'23.11.14-15':15, '23.12.22-23':15, '24.04.01':10, '24.07.08':10, '24.11.09':10, '24.11.14':10}  # in sec

In [3]:
column_labels = ['particle', 'x' ,'time(sec)', 'pH', 'pHlow', 'pHhigh', 'delta', 'epsilon', 'vacuole', 'radius', 'pH_vfirst', 'vsignal' ,'trackID', 'f_location', 'particleID_original', 'exp_date']

In [4]:
for exp in experiments:
    # Get all CSV files for the current experiment
    data_files = glob.glob(f'24.10.13 manual_vcheck v1/output vfirst files ({exp}) manual_vcheck/*.csv')

    print(exp)

    # Initialize a list to store dataframes
    df_list = []

    # Variable to keep track of unique particle IDs across files
    particle_offset = 0

    # different locations, some can have multiple files
    f_location = [re.search(fr' f(\d+)', path).group(1) for path in data_files]

    # Loop through each file and process it
    for f_idx, file in enumerate(data_files):

        print(file)
        # Read the CSV file
        df = pd.read_csv(file)

        # Extract the filename (without path and extension) to use in trackID
        file_name = os.path.basename(file).replace('.csv', '')

        # Save the original particle IDs before adjusting them
        original_particle_ids = df['particle'].copy()
        
        # Select the relevant columns
        df_selected = df[['particle', 'x', 'time_label', 'pH', 'pHlow', 'pHhigh', 'delta', 'epsilon', 'vacuole', 'radius', 'vsignal']].copy()

        ###
        # This section is super critical
        # The manual vcheck files has a smaller summary data on the right which contains all the particles which were manually checked
        ###
        v_particles = df['particle_vfirst'].values
        particle_type_manual = df['manual_vcheck'].values
        pH_v = df['pH_vfirst'].values
        particle_array = df_selected['particle'].values

        pH_vfirst_array = np.full(particle_array.shape, np.nan, dtype=np.float64)
        manual_vacuole = np.full(particle_array.shape, 0.0, dtype=np.float64)

        # Loop through each particle in df_selected
        for i, particle in enumerate(particle_array):
            # Check if the particle is in the v_particles array
            if particle in v_particles:
                # Get the index of the matching particle in v_particles
                idx = np.where(v_particles == particle)[0][0]
                if particle_type_manual[idx] == 0.5:
                    manual_vacuole[i] = 0.5
                elif particle_type_manual[idx] == 1:
                    # print(file, idx,i,pH_v[idx])
                    # Set the corresponding pH_vfirst value in pH_vfirst_array
                    manual_vacuole[i] = 1
                    pH_vfirst_array[i] = pH_v[idx]
        ###
        #
        #
        ###


        # Create 'pH_vfirst' column, filling with pH_vfirst values where particle is in v_particles
        df_selected['pH_vfirst'] = pH_vfirst_array

        # creating vacuoles column (0 or 1)
        df_selected['vacuole'] = manual_vacuole

        # Calculate 'time(sec)' as 'time_label' * scan_rate for the current experiment
        df_selected['time(sec)'] = df_selected['time_label'] * scan_rates[exp]
        
        # Adjust 'particle' to make IDs unique across files
        df_selected['particle'] += particle_offset

        # Update the particle offset for the next file
        particle_offset += df_selected['particle'].max() + 1

        # Create the 'trackID' column using the original particle ID and filename
        df_selected['trackID'] = file_name + '_particle' + original_particle_ids.astype(str)

        # f location 
        df_selected['f_location'] = f_location[f_idx]

        # experiment date
        df_selected['exp_date'] = exp

        # particleID in original file
        df_selected['particleID_original'] = original_particle_ids
        
        # Append the dataframe to the list
        df_list.append(df_selected)
    
    # Concatenate all dataframes for the current experiment
    combined_df = pd.concat(df_list, ignore_index=True)
    
    # Reorder the columns to match the desired order
    combined_df = combined_df[column_labels]

    # Sort by 'particle' and 'time_label'
    combined_df = combined_df.sort_values(by=['particle', 'time(sec)'])

    # Save the combined dataframe to a new CSV file
    combined_df.to_csv(f'combined_output_{exp}_manual_vcheck.csv', index=False)


23.11.14-15
24.10.13 manual_vcheck v1/output vfirst files (23.11.14-15) manual_vcheck/24.09.23 updating pH calibration v3 for drops dpHdt (23.11.14-15 f05 1000-2080) vfirst manual_vcheck.csv
24.10.13 manual_vcheck v1/output vfirst files (23.11.14-15) manual_vcheck/24.09.23 updating pH calibration v3 for drops dpHdt (23.11.14-15 f01 0-2080) vfirst manual_vcheck.csv
24.10.13 manual_vcheck v1/output vfirst files (23.11.14-15) manual_vcheck/24.09.23 updating pH calibration v3 for drops dpHdt (23.11.14-15 f04 1010-2080) vfirst manual_vcheck.csv
24.10.13 manual_vcheck v1/output vfirst files (23.11.14-15) manual_vcheck/24.09.23 updating pH calibration v3 for drops dpHdt (23.11.14-15 f012 0-999) vfirst manual_vcheck.csv
24.10.13 manual_vcheck v1/output vfirst files (23.11.14-15) manual_vcheck/24.09.23 updating pH calibration v3 for drops dpHdt (23.11.14-15 f012 1000-2080) vfirst manual_vcheck.csv
24.10.13 manual_vcheck v1/output vfirst files (23.11.14-15) manual_vcheck/24.09.23 updating pH cal