In [74]:
from pathlib import Path
import pandas as pd
import os

def load_dataframes_from_folder(folder_path, extension=".csv", encoding="utf-8", use_full_path=False):
    """
    Recursively loads all files with the given extension from a folder into pandas DataFrames.

    Args:
        folder_path (str): Windows-style path to the folder.
        extension (str): File extension to search for (e.g., ".csv", ".txt").
        encoding (str): Encoding used to read the files.
        use_full_path (bool): If True, keys will be full paths; otherwise, just filenames.

    Returns:
        dict: Dictionary of DataFrames keyed by filename or full path.
    """
    # Convert Windows path to WSL format if needed
    if os.name != "nt":  # If running in WSL/Linux
        folder_path = folder_path.replace("\\", "/")
        if folder_path.startswith("/mnt/") is False:
            drive_letter = folder_path[0].lower()
            folder_path = f"/mnt/{drive_letter}{folder_path[2:]}"
    
    folder = Path(folder_path)

    # Recursively find all matching files
    files = list(folder.rglob(f"*{extension}"))

    if not files:
        print(f"⚠️ No files with extension '{extension}' found in {folder_path}")
        return {}

    # Load each file into a DataFrame
    dataframes = {}
    for file in files:
        try:
            df = pd.read_csv(file, encoding=encoding) if extension == ".csv" else pd.read_table(file, encoding=encoding)
            key = str(file) if use_full_path else file.name
            dataframes[key] = df
        except Exception as e:
            print(f"❌ Failed to load {file}: {e}")

    return dataframes


In [75]:
folder = r"C:\Users\bhuns\OneDrive\___Health Data\__DD studies\InBody CSV\ib97"
dfs = load_dataframes_from_folder(folder, extension=".csv")


❌ Failed to load /mnt/c/Users/bhuns/OneDrive/___Health Data/__DD studies/InBody CSV/ib97/091725-1_20250918074512.csv: 'utf-8' codec can't decode byte 0xb2 in position 15: invalid start byte
❌ Failed to load /mnt/c/Users/bhuns/OneDrive/___Health Data/__DD studies/InBody CSV/ib97/091725-1_20250918080917.csv: 'utf-8' codec can't decode byte 0xb2 in position 15: invalid start byte
❌ Failed to load /mnt/c/Users/bhuns/OneDrive/___Health Data/__DD studies/InBody CSV/ib97/091725-1_20250918224425.csv: 'utf-8' codec can't decode byte 0xb2 in position 15: invalid start byte
❌ Failed to load /mnt/c/Users/bhuns/OneDrive/___Health Data/__DD studies/InBody CSV/ib97/091725-1_20250919065043.csv: 'utf-8' codec can't decode byte 0xb2 in position 15: invalid start byte
❌ Failed to load /mnt/c/Users/bhuns/OneDrive/___Health Data/__DD studies/InBody CSV/ib97/091725-1_20250922081410.csv: 'utf-8' codec can't decode byte 0xb2 in position 15: invalid start byte
❌ Failed to load /mnt/c/Users/bhuns/OneDrive/___He

In [76]:
dfs['506352275 2025.10.06 04.58.44-2025.10.06 08.27.15.csv']

KeyError: '506352275 2025.10.06 04.58.44-2025.10.06 08.27.15.csv'

In [77]:
dfs

{}

In [None]:
def add_serial_to_dfs(dfs):
    """
    Adds a serial number to each entry in a dictionary of DataFrames.

    Args:
        dfs_dict (dict): Dictionary with keys as filenames and values as DataFrames.

    Returns:
        dict: Dictionary with keys as filenames and values as dicts containing:
              - 'name': original filename
              - 'df': the DataFrame
              - 'serial': computed index
    """
    enriched = {}
    for i, (name, df) in enumerate(dfs.items()):
        enriched[name] = {
            "name": name,
            "df": df,
            "serial": i
        }
    return enriched


In [None]:
dfs_enriched = add_serial_to_dfs(dfs)

In [None]:
df2 = next(d["df"] for d in dfs_enriched.values() if d["serial"] == 2)

In [None]:
df2

In [None]:
dfs_enriched

In [None]:
#dfs = load_dataframes_from_folder(folder_path, extension=".csv")
dfs_enriched = add_serial_to_dfs(dfs)

# Access by filename
#dfs_enriched["data1.csv"]["df"]

# Access by serial
#df2 = next(d["df"] for d in dfs_enriched.values() if d["serial"] == 2)


In [None]:
 df2 = next(d["df"] for d in dfs_enriched.values() if d["serial"] == 2)

In [None]:
df2

In [None]:
# Find the entry with serial == 2
entry = next(d for d in dfs_enriched.values() if d["serial"] == 2)

# Get the name
name_of_df2 = entry["name"]
print(name_of_df2)


In [None]:
dfs["506352275 2025.10.10 16.44.27-2025.10.10 18.46.13.csv"]