In [1]:
'''Run all to update the analysis queue - any manual changes to status should be retained'''

import os
import sys

def get_git_root():
    git_root = os.popen('git rev-parse --show-toplevel').read()[:-1]
    return git_root

git_root = get_git_root()
sys.path.insert(0, git_root) # add git root to path
analysis_queue_dir = os.path.join(git_root, 'analysis_queue')

In [2]:
from modules import lib_helper_functions as helper
import os
import pandas as pd
from modules import mea_processing_library as mpl

# Define allowed scan types
allowed_scan_types = 'AxonTracking'

# Define parent directories for .h5 files (comments retained for future use)
h5_parent_dirs = [
    #'''data backup locations'''
    '/mnt/ben-shalom_nas/rbsmaxtwo/media/rbs-maxtwo/harddisk20tb/', #maxtwo computer backup
    '/mnt/disk20tb/', #old server
    
    #'''Specific data locations'''
    #"/mnt/ben-shalom_nas/rbsmaxtwo/media/rbs-maxtwo/harddisk20tb/B6J_DensityTest_10012024_AR/B6J_DensityTest_10012024_AR", #AR's density testing
]

# Get list of .h5 files
h5_files = helper.get_list_of_h5_files(h5_parent_dirs, allowed_scan_types=allowed_scan_types)
file_sizes = []

# Process each file
for h5_file in h5_files:
    conditions = [
        #'Footprint' not in h5_file,
        '/analysis/' not in h5_file,
    ]
    if all(conditions):
        details = mpl.extract_recording_details(h5_file)
        project_name = details[0]['projectName']
        file_size = os.path.getsize(h5_file)
        print(f"{h5_file} - {file_size} bytes")
        file_sizes.append({'project_name': project_name,'file': h5_file, 'size': file_size, 'analysis_status': 'Not Started'})

# Create a DataFrame
df_files = pd.DataFrame(file_sizes)

# Process each project separately
for project_name, project_df in df_files.groupby('project_name'):
    # Define CSV file path for the project
    csv_file = f'{analysis_queue_dir}/{project_name}.csv'

    # Check if the CSV file exists
    if os.path.exists(csv_file):
        # Load existing CSV data
        existing_df = pd.read_csv(csv_file)

        # Loop through new data and check for duplicates
        for index, row in project_df.iterrows():
            if row['file'] in existing_df['file'].values:
                # If file exists, print its information and retain its status
                existing_row = existing_df[existing_df['file'] == row['file']].iloc[0]
                print(f"{existing_row['file']} - {existing_row['size']} bytes - Status: {existing_row['analysis_status']}")
            else:
                # If file does not exist, add it to the DataFrame and print its information
                existing_df = existing_df.append(row, ignore_index=True)
                print(f"{row['file']} - {row['size']} bytes - Status: {row['analysis_status']}")

        # Write the updated DataFrame back to CSV
        existing_df.to_csv(csv_file, index=False)
    else:
        # If file does not exist, create and save the new DataFrame
        project_df.to_csv(csv_file, index=False)

    ## Print the final DataFrame to verify results
    #print(existing_df if 'existing_df' in locals() else project_df)
    print(f"Project: {project_name} - {len(project_df)} files")
    print('csv_file:', csv_file)

  from .autonotebook import tqdm as notebook_tqdm
2024-11-01 12:22:18,880 - INFO - Extracting recording details from h5 directories: - mea_processing_library.extract_recording_details
2024-11-01 12:22:18,888 - INFO - Extracting recording details from h5 directories: - mea_processing_library.extract_recording_details
2024-11-01 12:22:18,892 - INFO - Extracting recording details from h5 directories: - mea_processing_library.extract_recording_details
2024-11-01 12:22:18,896 - INFO - Extracting recording details from h5 directories: - mea_processing_library.extract_recording_details
2024-11-01 12:22:18,901 - INFO - Extracting recording details from h5 directories: - mea_processing_library.extract_recording_details
2024-11-01 12:22:18,905 - INFO - Extracting recording details from h5 directories: - mea_processing_library.extract_recording_details
2024-11-01 12:22:18,909 - INFO - Extracting recording details from h5 directories: - mea_processing_library.extract_recording_details
2024-11-01 1

/mnt/ben-shalom_nas/rbsmaxtwo/media/rbs-maxtwo/harddisk20tb/ADNPTherapy_T2_08022024/ADNPTherapy_T2_08022024/240809/M08022/AxonTracking/000031/data.raw.h5 - 16475138226 bytes
/mnt/ben-shalom_nas/rbsmaxtwo/media/rbs-maxtwo/harddisk20tb/ADNPTherapy_T2_08022024/ADNPTherapy_T2_08022024/240813/M08022/AxonTracking/000047/data.raw.h5 - 10883816178 bytes
/mnt/ben-shalom_nas/rbsmaxtwo/media/rbs-maxtwo/harddisk20tb/ADNPTherapy_T2_08022024/ADNPTherapy_T2_08022024/240816/M08022/AxonTracking/000064/data.raw.h5 - 11342205677 bytes
/mnt/ben-shalom_nas/rbsmaxtwo/media/rbs-maxtwo/harddisk20tb/ADNPTherapy_T2_08022024/ADNPTherapy_T2_08022024/240820/M08022/AxonTracking/000082/data.raw.h5 - 7845689690 bytes
/mnt/ben-shalom_nas/rbsmaxtwo/media/rbs-maxtwo/harddisk20tb/ADNPTherapy_T2_08022024/ADNPTherapy_T2_08022024/240823/M08022/AxonTracking/000104/data.raw.h5 - 7045938482 bytes
/mnt/ben-shalom_nas/rbsmaxtwo/media/rbs-maxtwo/harddisk20tb/ADNPTherapy_T2_08022024/ADNPTherapy_T2_08022024/240827/M08022/AxonTracki

2024-11-01 12:22:19,090 - INFO - Extracting recording details from h5 directories: - mea_processing_library.extract_recording_details
2024-11-01 12:22:19,093 - INFO - Extracting recording details from h5 directories: - mea_processing_library.extract_recording_details
2024-11-01 12:22:19,099 - INFO - Extracting recording details from h5 directories: - mea_processing_library.extract_recording_details
2024-11-01 12:22:19,102 - INFO - Extracting recording details from h5 directories: - mea_processing_library.extract_recording_details
2024-11-01 12:22:19,108 - INFO - Extracting recording details from h5 directories: - mea_processing_library.extract_recording_details
2024-11-01 12:22:19,112 - INFO - Extracting recording details from h5 directories: - mea_processing_library.extract_recording_details
2024-11-01 12:22:19,117 - INFO - Extracting recording details from h5 directories: - mea_processing_library.extract_recording_details
2024-11-01 12:22:19,122 - INFO - Extracting recording details 

/mnt/ben-shalom_nas/rbsmaxtwo/media/rbs-maxtwo/harddisk20tb/B6J_DensityTest_10012024_AR/B6J_DensityTest_10012024_AR/241031/M08029/AxonTracking/000159/data.raw.h5 - 16401229923 bytes
/mnt/ben-shalom_nas/rbsmaxtwo/media/rbs-maxtwo/harddisk20tb/B6J_DensityTest_10012024_AR/B6J_DensityTest_10012024_AR/241031/M08029/AxonTracking/000160/data.raw.h5 - 543539459 bytes
/mnt/ben-shalom_nas/rbsmaxtwo/media/rbs-maxtwo/harddisk20tb/B6J_Stimulations_07102024_Xi_M_Christina_J_Xula/B6J_Stimulations_07102024_Xi_M_Christina_J_Xula/240718/M08020/AxonTracking/000011/data.raw.h5 - 543337 bytes
/mnt/ben-shalom_nas/rbsmaxtwo/media/rbs-maxtwo/harddisk20tb/B6J_Stimulations_07102024_Xi_M_Christina_J_Xula/B6J_Stimulations_07102024_Xi_M_Christina_J_Xula/240718/M08020/AxonTracking/000012/data.raw.h5 - 4695723955 bytes
/mnt/ben-shalom_nas/rbsmaxtwo/media/rbs-maxtwo/harddisk20tb/CDKL5-E6D_T1_C1_05152024/CDKL5-E6D_T1_C1_05152024/240520/M08018/AxonTracking/000014/data.raw.h5 - 2531179488 bytes
/mnt/ben-shalom_nas/rbsma

2024-11-01 12:22:19,292 - INFO - Extracting recording details from h5 directories: - mea_processing_library.extract_recording_details
2024-11-01 12:22:19,296 - INFO - Extracting recording details from h5 directories: - mea_processing_library.extract_recording_details
2024-11-01 12:22:19,301 - INFO - Extracting recording details from h5 directories: - mea_processing_library.extract_recording_details
2024-11-01 12:22:19,305 - INFO - Extracting recording details from h5 directories: - mea_processing_library.extract_recording_details
2024-11-01 12:22:19,310 - INFO - Extracting recording details from h5 directories: - mea_processing_library.extract_recording_details
2024-11-01 12:22:19,315 - INFO - Extracting recording details from h5 directories: - mea_processing_library.extract_recording_details
2024-11-01 12:22:19,320 - INFO - Extracting recording details from h5 directories: - mea_processing_library.extract_recording_details
2024-11-01 12:22:19,326 - INFO - Extracting recording details 

/mnt/ben-shalom_nas/rbsmaxtwo/media/rbs-maxtwo/harddisk20tb/KCNT1_T4_C1_04122024/KCNT1_T4_C1_04122024/240423/M08034/AxonTracking/000043/data.raw.h5 - 14754691683 bytes
/mnt/ben-shalom_nas/rbsmaxtwo/media/rbs-maxtwo/harddisk20tb/KCNT1_T4_C1_04122024/KCNT1_T4_C1_04122024/240426/M08034/AxonTracking/000055/data.raw.h5 - 19443957906 bytes
/mnt/ben-shalom_nas/rbsmaxtwo/media/rbs-maxtwo/harddisk20tb/KCNT1_T4_C1_04122024/KCNT1_T4_C1_04122024/240430/M08034/AxonTracking/000067/data.raw.h5 - 29758355893 bytes
/mnt/ben-shalom_nas/rbsmaxtwo/media/rbs-maxtwo/harddisk20tb/KCNT1_T4_C1_04122024/KCNT1_T4_C1_04122024/240503/M08034/AxonTracking/000082/data.raw.h5 - 27127693817 bytes
/mnt/ben-shalom_nas/rbsmaxtwo/media/rbs-maxtwo/harddisk20tb/KCNT1_T4_C1_04122024/KCNT1_T4_C1_04122024/240507/M08034/AxonTracking/000094/data.raw.h5 - 23897765260 bytes
/mnt/ben-shalom_nas/rbsmaxtwo/media/rbs-maxtwo/harddisk20tb/KCNT1_T4_C1_04122024/KCNT1_T4_C1_04122024/240510/M08034/AxonTracking/000106/data.raw.h5 - 2297719079