In [1]:
import os
import re
import pandas as pd
import numpy as np

directory_path = '/Volumes/group/LiDAR/LidarProcessing/changedetection_m3c2/grid_output/delmar_grid_run_20240927' # second run


In [6]:
def find_csv_files(directory, pattern="grid_1m.csv", erosion=True):
    """
    Traverse the given directory and find all CSV files that match the pattern.
    If erosion=True, include files without 'acc' in the name.
    If erosion=False, include only files with 'acc' in the name.
    Return a sorted list of full file paths based on the dates in the filenames.
    """
    csv_files = []
    
    # Regex pattern to extract date from the filename
    date_pattern = re.compile(r"(\d{8})")  # Looks for an 8-digit date (YYYYMMDD)

    # Walk through the directory and subdirectories
    for root, dirs, files in os.walk(directory):
        for file in files:
            # Check if the file matches the conditions for erosion or acc
            if erosion and "acc" not in file and file.endswith(pattern):
                # Get the full path to the file
                full_path = os.path.join(root, file)
                
                # Extract the date from the filename
                match = date_pattern.search(file)
                if match:
                    date_str = match.group(1)  # Extract the first matched date
                    csv_files.append((full_path, date_str))
            elif not erosion and "acc" in file and file.endswith(pattern):
                # Get the full path to the file
                full_path = os.path.join(root, file)
                
                # Extract the date from the filename
                match = date_pattern.search(file)
                if match:
                    date_str = match.group(1)  # Extract the first matched date
                    csv_files.append((full_path, date_str))

    # Sort the list of tuples (path, date) based on the date string
    csv_files.sort(key=lambda x: x[1])  # Sort by the date part
    
    # Return only the paths, now sorted by date
    return [file[0] for file in csv_files]


# ------------------------------------------------------------------------------------------------------------------ # 

def load_csv_to_numpy(file_list):
    """
    Given a list of CSV file paths, load them into a 3D NumPy array, ignoring row and column labels.
    Each CSV is expected to have the same dimensions, but row and column labels should be ignored.
    
    Parameters:
    file_list (list of str): List of file paths to the CSV files.
    
    Returns:
    numpy.ndarray: A 3D NumPy array where each CSV is a 2D grid.
    """
    grid_list = []
    
    for file in file_list:
        # Load the CSV file into a pandas DataFrame
        # We skip the first row (column labels) and treat the first column as row labels (index_col=0)
        df = pd.read_csv(file, index_col=0, header=0)
        
        # Convert the DataFrame to a NumPy array and append to the list
        grid_list.append(df.values)
    
    # Stack all the 2D grids into a 3D NumPy array
    grid_3d = np.stack(grid_list, axis=0)
    
    return grid_3d

In [7]:
# get the cleaned csv files
acc_grid_files_10x10cm_cleaned = find_csv_files(directory_path, pattern="acc_grid_cleaned.csv", erosion=False)
acc_cluster_files_10x10cm_cleaned = find_csv_files(directory_path, pattern="acc_clusters_cleaned.csv", erosion=False)

In [8]:
# load the clusters to numpy arrays
acc_clusters_10x10cm_cleaned = load_csv_to_numpy(acc_cluster_files_10x10cm_cleaned)

In [10]:
# load the grids to numpy arrays
acc_grids_10x10cm_cleaned = load_csv_to_numpy(acc_grid_files_10x10cm_cleaned)

In [11]:
# crop them both and save 
acc_cropped_grid_cleaned = acc_grids_10x10cm_cleaned[:, 4745:27595, :]
acc_cropped_clusters_cleaned = acc_clusters_10x10cm_cleaned[:, 4745:27595, :]

np.savez("/Users/cjmack/Documents/Papers/Cliffs/Code/local_grids/cleaned/acc_grid_files_cleaned.npz", data=acc_grid_files_10x10cm_cleaned)
np.savez("/Users/cjmack/Documents/Papers/Cliffs/Code/local_grids/cleaned/acc_cluster_files_cleaned.npz", data=acc_cluster_files_10x10cm_cleaned)
np.savez("/Users/cjmack/Documents/Papers/Cliffs/Code/local_grids/cleaned/acc_cropped_grid_cleaned.npz", data=acc_cropped_grid_cleaned)
np.savez("/Users/cjmack/Documents/Papers/Cliffs/Code/local_grids/cleaned/acc_cropped_clusters_cleaned.npz", data=acc_cropped_clusters_cleaned)

In [None]:
# repeat for the erosion grids
ero_grid_files_10x10cm_cleaned = find_csv_files(directory_path, pattern="ero_grid_cleaned.csv", erosion=True)
ero_cluster_files_10x10cm_cleaned = find_csv_files(directory_path, pattern="ero_clusters_cleaned.csv", erosion=True)

In [14]:
ero_clusters_10x10cm_cleaned = load_csv_to_numpy(ero_cluster_files_10x10cm_cleaned)

In [15]:
ero_grids_10x10cm_cleaned = load_csv_to_numpy(ero_grid_files_10x10cm_cleaned)

In [16]:
ero_cropped_grid_cleaned = ero_grids_10x10cm_cleaned[:, 4745:27595, :]
ero_cropped_clusters_cleaned = ero_clusters_10x10cm_cleaned[:, 4745:27595, :]

In [17]:
np.savez("/Users/cjmack/Documents/Papers/Cliffs/Code/local_grids/cleaned/ero_grid_files_cleaned.npz", data=ero_grid_files_10x10cm_cleaned)
np.savez("/Users/cjmack/Documents/Papers/Cliffs/Code/local_grids/cleaned/ero_cluster_files_cleaned.npz", data=ero_cluster_files_10x10cm_cleaned)
np.savez("/Users/cjmack/Documents/Papers/Cliffs/Code/local_grids/cleaned/ero_cropped_grid_cleaned.npz", data=ero_cropped_grid_cleaned)
np.savez("/Users/cjmack/Documents/Papers/Cliffs/Code/local_grids/cleaned/ero_cropped_clusters_cleaned.npz", data=ero_cropped_clusters_cleaned)
