In [1]:
import numpy as np
import polars as pl
from sklearn.decomposition import PCA
from sklearn.neighbors import NearestNeighbors
from featureHelpers import process_dataframe
import os
import glob
from tqdm.notebook import tqdm
import gc

In [4]:
base_path = r'C:\Users\USER\Downloads\adoquin'
output_base = r'C:\Users\USER\PEF\Terrain-Traversability-Analysis\adoquin_features'
class_label = 'adoquin'

# Create output directory
os.makedirs(output_base, exist_ok=True)

# Get all CSV files
csv_files = glob.glob(os.path.join(base_path, '**', '*.csv'), recursive=True)

print("="*80)
print(f"Found {len(csv_files)} CSV files")
print("="*80)

# Track progress and errors
successful = 0
failed = 0
errors = []

for csv_file in tqdm(csv_files, desc="Processing files", unit="file"):
    filename = os.path.basename(csv_file)
    
    # Create output filename
    output_filename = filename.replace('.csv', '_features.csv')
    output_path = os.path.join(output_base, output_filename)
    
    # Skip if already processed
    if os.path.exists(output_path):
        print(f"Skipping (already exists): {filename}")
        successful += 1
        continue
    
    print(f"Processing: {filename}")
    
    try:
        df_features = process_dataframe(csv_file, output_path, class_label)
        successful += 1
        print(f"Completed: {filename}")
        
        # Free memory after each file
        del df_features
        gc.collect()
        
    except Exception as e:
        failed += 1
        error_msg = f"{filename}: {str(e)}"
        errors.append(error_msg)
        print(f"Error: {e}")

# Summary
print("\n" + "="*80)
print(f"All files processed!")
print(f"Successful: {successful}/{len(csv_files)}")
print(f"Failed: {failed}/{len(csv_files)}")

if errors:
    print("\nFailed files:")
    for error in errors:
        print(f"  - {error}")
        
print("="*80)

# Save error log if any
if errors:
    error_log_path = os.path.join(output_base, 'processing_errors.txt')
    with open(error_log_path, 'w') as f:
        f.write("Processing Errors\n")
        f.write("="*80 + "\n\n")
        for error in errors:
            f.write(f"{error}\n")
    print(f"\nError log saved to: {error_log_path}")

Found 100 CSV files


Processing files:   0%|          | 0/100 [00:00<?, ?file/s]

Skipping (already exists): adoquin_atras_de_rectoria_frame_15.csv
Skipping (already exists): adoquin_atras_de_rectoria_frame_2.csv
Skipping (already exists): adoquin_atras_de_rectoria_frame_33.csv
Skipping (already exists): adoquin_atras_de_rectoria_frame_46.csv
Skipping (already exists): adoquin_atras_de_rectoria_frame_48.csv
Skipping (already exists): adoquin_atras_de_rectoria_frame_54.csv
Skipping (already exists): adoquin_atras_de_rectoria_frame_6.csv
Skipping (already exists): adoquin_atras_de_rectoria_frame_61.csv
Skipping (already exists): adoquin_atras_de_rectoria_frame_70.csv
Skipping (already exists): adoquin_atras_de_rectoria_frame_72.csv
Skipping (already exists): adoquin_cancha_de_futbol_10.csv
Processing: adoquin_cancha_de_futbol_18.csv
Processing 20,831 points with label 'adoquin'...


  0%|          | 0/20831 [00:00<?, ?it/s]

KeyboardInterrupt: 