In [1]:
%%capture
%load_ext autoreload
%autoreload 2
import os
import shutil
from natsort import natsorted
import cv2
import data_organization2_functions as dataOrg
from dask.distributed import Client, LocalCluster
import pandas as pd

## 1. Set directory and save parameters:

Set the `parent_folders` which should include multiple subfolders containing imaging and behavior data folders. Ensure consistent naming of data folders using `data_folder_names` (such as 'behav' and 'img').

<div class="alert alert-info">
<strong>folder structure</strong>

```
Mouse1  [parent_folder]
└─────11_30_00   [subfolder within parent_folder]
│       │   behav  [data_folder_names]
│       │   img    [data_folder_names]
│    
└─────11_45_00   [subfolder within parent_folder]
│       │   behav  [data_folder_names]
│       │   img    [data_folder_names]

```
</div>

In [None]:
#parent_folders = [ 'Z:\\CHRYSA\\WMT_UCLA\\R2_DAT2']

# Specify the root folder
root_folder = 'Z:\\CHRYSA\\WMT_UCLA\\LRD'

# Get a list of all subfolders within the root folder
all_subfolders = [os.path.join(root_folder, folder) for folder in os.listdir(root_folder) if os.path.isdir(os.path.join(root_folder, folder))]

# Use the list of subfolders for parent_folders
parent_folders = all_subfolders

data_folder_names = ['behav','img']

output_folder_name = "all"

filetype= 'avi'


#If the saveas_name isn't right, try alternate folder name extraction to get differen part of path name
saveas_name=dataOrg.get_basenames_of_folders_within_parent_folder_chrysa(parent_folders)

print(saveas_name)

## 2. Rename behavior and imaging files: 

Subfolders within `parent_folders` will be categorized and given an index. Within each subfolder, ensure consistent naming of subfolders using `data_folder_names` (such as 'behav' and 'img') where the files requiring renaming are stored. The renaming process will be based on the assigned subfolder index.

In [None]:
dataOrg.rename_files_with_prefix(parent_folders, data_folder_names)

## 3. Consolidate re-named files into `output_folder`:

In [None]:
data_folder_names = ['behav','img']
output_folder_name = "all"

#dataOrg.consolidate_files(parent_folders, data_folder_names, output_folder_name)

In [None]:
def consolidate_files(parent_folders, subfolder_names, output_folder_name):
# Iterate through each set of folders
    for parent_folder in parent_folders:
        output_folder=os.path.join(parent_folder,output_folder_name)
  
        # Ensure output_folder exists; create it if not
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)
            
    
        # Iterate through all folders in the parent_folder
        for root, dirs, files in os.walk(parent_folder):
            # Check if the subfolder_name is in the current directory
        
            for subfolder_name in subfolder_names:
                output_subfolder = os.path.join(output_folder,subfolder_name)  
           
                if not os.path.exists(output_subfolder):
                    os.makedirs(output_subfolder)
                
                if subfolder_name in dirs:
                    subfolder_path = os.path.join(root, subfolder_name)
                    # Skip the current iteration if output_folder_name is in the path
                    if output_folder_name in os.path.join(root, subfolder_name):
                        
                        continue
                        
                    
                    # Copy all files from the subfolder to the output_folder
                    for filename in os.listdir(subfolder_path):
                        file_path = os.path.join(subfolder_path, filename)
                        #print(f"Copying {file_path} to {output_subfolder}")
                        try:
                            shutil.copy(file_path, output_subfolder)
                        except Exception as e:
                            print(f"Error copying {file_path}: {e}")
                                
                # for filename in os.listdir(subfolder_path):
                        #print(subfolder_path)
                        #file_path = os.path.join(subfolder_path, filename)
                        #shutil.copy(file_path, output_subfolder)

data_folder_names = ['behav','img']
output_folder_name = "all"

consolidate_files(parent_folders, data_folder_names, output_folder_name)

## 4. Combine (merge) `filetype` files, save the resulting concatenated file as `saveas_name`, and remove the original files that were used to create the merged file. (Optional, recommended) merge corresponding timestamps

**Option to merge corresponding timestamps and position (ie DeepLabCut) data**

In [2]:
n_workers = int(os.getenv("DATA_ORG", 4))

cluster = LocalCluster(
    n_workers=n_workers,
    memory_limit="30GB",
    resources={"MEM": 1},
    threads_per_worker=2,
    dashboard_address=":8787",
)

client = Client(cluster)

In [None]:
#root_folder = 'Z:\\CHRYSA\\WMT_UCLA'
#filetype = 'avi'
#merge_timestamps = True
#merge_position_data = False  # for DeepLabCut for example

# Get a list of all subfolders within the root folder
#all_subfolders = [os.path.join(root_folder, folder) for folder in os.listdir(root_folder) if os.path.isdir(os.path.join(root_folder, folder))]

#for folder_path in all_subfolders:
#    saveas_name = os.path.basename(folder_path)
#   folders_with_data_to_merge = os.path.join(folder_path, 'all', 'behav')  # Adjust the path as needed
    
#    dataOrg.concatenate_videos(folders_with_data_to_merge, filetype, saveas_name)

#    if merge_timestamps:
#        dataOrg.concatenate_timestamp_files(folders_with_data_to_merge, file_extension='_timeStamps.csv')
#
#    if merge_position_data:
#        dataOrg.concatenate_h5_files(folders_with_data_to_merge, file_extension='.h5')




folders_with_data_to_merge = 'Z:\\CHRYSA\\WMT_UCLA\\B2\\B2_DAT2\\all\\behav'                             

filetype='avi'
saveas_name=['B2_DAT2']
merge_timestamps= True
merge_position_data=False #for DeepLabCut for example


dataOrg.concatenate_videos(folders_with_data_to_merge, filetype, saveas_name)

#if merge_timestamps:
    #dataOrg.concatenate_timestamp_files(folders_with_data_to_merge, file_extension='_timeStamps.csv')
    
if merge_position_data:
    dataOrg.concatenate_h5_files(folders_with_data_to_merge, file_extension='.h5')



['1_0.avi', '1_1.avi', '1_2.avi', '1_3.avi', '1_4.avi', '1_5.avi', '1_6.avi', '1_7.avi', '1_8.avi', '1_9.avi', '1_10.avi', '1_11.avi', '2_0.avi', '2_1.avi', '2_2.avi', '2_3.avi', '2_4.avi', '2_5.avi', '2_6.avi', '2_7.avi', '2_8.avi', '2_9.avi', '2_10.avi', '2_11.avi', '2_12.avi', '2_13.avi', '3_0.avi', '3_1.avi', '3_2.avi', '3_3.avi', '3_4.avi', '3_5.avi', '3_6.avi', '3_7.avi', '3_8.avi', '3_9.avi', '3_10.avi', '3_11.avi', '3_12.avi', '3_13.avi', '3_14.avi']



## 5. Downsample timeStamps.csv for LocationTracking alignment:


In [None]:
# Load the CSV file into a DataFrame
file_path = 'Z:\\CHRYSA\\WMT_UCLA\\R2_DAT1\\all\\behav\\timeStamps.csv'
df = pd.read_csv(file_path)

# Convert 'timestamp' column to datetime if it's not already
timestamps = df['merged_timestamps'] 

# Downsample by 2
df_downsampled = df.iloc[::2]

# Save the downsampled DataFrame to a new CSV file
downsampled_file_path = 'Z:\\CHRYSA\\WMT_UCLA\\R2_DAT1\\all\\behav\\timeStamps_downsampled.csv'
df_downsampled.to_csv(downsampled_file_path, index=False)

## 6. (Optional) Move data:

In [None]:
input_directory=''
output_directory=''
dataOrg.move_data_folders(input_directory, output_directory)
