In [None]:
import os
import re

def remove_any_parentheses(base_path):
    # Matches: optional space + (anything) 
    pattern = re.compile(r"\s*\([^)]*\)")

    for dirpath, _, filenames in os.walk(base_path):
        for filename in filenames:
            if pattern.search(filename):
                old_path = os.path.join(dirpath, filename)
                new_filename = pattern.sub('', filename).strip()
                new_path = os.path.join(dirpath, new_filename)

                if not os.path.exists(new_path):
                    os.rename(old_path, new_path)
                    print(f"✅ Renamed: {old_path} → {new_path}")
                else:
                    print(f"⚠️ Skipped (target exists): {old_path} → {new_path}")

    print("\n🎉 All matching files processed.")

# =======================
# Run
# =======================
if __name__ == "__main__":
    base_path = r"C:\Users\hp\Downloads\DS_AF - Copy\DS_AF"
    if os.path.exists(base_path):
        print(f"\n🔄 Renaming files in: {base_path}")
        remove_any_parentheses(base_path)
        print("\n✅ All done.")
    else:
        print("❌ The path does not exist.")


# Create log for duplicate

In [9]:
import os
import re

def find_duplicate_named_files(base_path):
    duplicate_log = r"E:\DS_RAW_COMPLETE\logs\5-actual_duplicates_after_deletion_log.txt"
    orphan_log = r"E:\DS_RAW_COMPLETE\logs\orphan_duplicate_named_log.txt"

    original_files = set()
    duplicate_named_files = []

    # Walk and collect
    for dirpath, _, filenames in os.walk(base_path):
        for filename in filenames:
            full_path = os.path.join(dirpath, filename)
            original_files.add(full_path)

            # Match files with suffix like (1), (2), (3)
            match = re.search(r"(.*?)(?: ?\((\d+)\))(\.\w+)$", filename)
            if match:
                duplicate_named_files.append((match.group(1) + match.group(3), full_path))

    # Compare with originals
    with open(duplicate_log, 'w') as dlog, open(orphan_log, 'w') as olog:
        for original_name, dup_full_path in duplicate_named_files:
            original_full_path = os.path.join(os.path.dirname(dup_full_path), original_name)
            if os.path.exists(original_full_path):
                dlog.write(f"{dup_full_path}\n")
            else:
                olog.write(f"{dup_full_path}\n")

    print("✅ Scanning complete.")
    print(f"📄 Actual duplicates logged in: {duplicate_log}")
    print(f"📄 Orphan-like duplicate names logged in: {orphan_log}")

# =======================
# Run
# =======================
if __name__ == "__main__":
    base_path = r"E:\DS_RAW_COMPLETE\DS_Structured"
    if os.path.exists(base_path):
        print(f"🔍 Checking for duplicate-named files in: {base_path}")
        find_duplicate_named_files(base_path)
    else:
        print("❌ The path does not exist.")


🔍 Checking for duplicate-named files in: E:\DS_RAW_COMPLETE\DS_Structured
✅ Scanning complete.
📄 Actual duplicates logged in: E:\DS_RAW_COMPLETE\logs\5-actual_duplicates_after_deletion_log.txt
📄 Orphan-like duplicate names logged in: E:\DS_RAW_COMPLETE\logs\orphan_duplicate_named_log.txt


# Remove duplicate Files and Keep their original copies

In [7]:
import os
import re

def remove_any_parentheses(base_path):
    # Matches: optional space + (anything) 
    pattern = re.compile(r"\s*\([^)]*\)")

    for dirpath, _, filenames in os.walk(base_path):
        for filename in filenames:
            if pattern.search(filename):
                old_path = os.path.join(dirpath, filename)
                new_filename = pattern.sub('', filename).strip()
                new_path = os.path.join(dirpath, new_filename)

                if os.path.abspath(old_path) == os.path.abspath(new_path):
                    continue  # already clean, skip

                # If the clean file already exists — remove it to avoid duplicates
                if os.path.exists(new_path):
                    os.remove(new_path)
                    print(f"🗑️ Removed existing file to overwrite: {new_path}")

                os.rename(old_path, new_path)
                print(f"✅ Renamed: {old_path} → {new_path}")

    print("\n🎉 All matching files processed.")

# =======================
# Run
# =======================
if __name__ == "__main__":
    base_path = r"E:\DS_RAW_COMPLETE\DS_Structured"
    if os.path.exists(base_path):
        print(f"\n🔄 Renaming files in: {base_path}")
        remove_any_parentheses(base_path)
        print("\n✅ All done.")
    else:
        print("❌ The path does not exist.")



🔄 Renaming files in: E:\DS_RAW_COMPLETE\DS_Structured
🗑️ Removed existing file to overwrite: E:\DS_RAW_COMPLETE\DS_Structured\aliza\open_big_box\phone_accelerometer_e0.csv
✅ Renamed: E:\DS_RAW_COMPLETE\DS_Structured\aliza\open_big_box\phone_accelerometer_e0(1).csv → E:\DS_RAW_COMPLETE\DS_Structured\aliza\open_big_box\phone_accelerometer_e0.csv
🗑️ Removed existing file to overwrite: E:\DS_RAW_COMPLETE\DS_Structured\aliza\open_big_box\phone_accelerometer_e1.csv
✅ Renamed: E:\DS_RAW_COMPLETE\DS_Structured\aliza\open_big_box\phone_accelerometer_e1(1).csv → E:\DS_RAW_COMPLETE\DS_Structured\aliza\open_big_box\phone_accelerometer_e1.csv
🗑️ Removed existing file to overwrite: E:\DS_RAW_COMPLETE\DS_Structured\aliza\open_big_box\phone_accelerometer_e2.csv
✅ Renamed: E:\DS_RAW_COMPLETE\DS_Structured\aliza\open_big_box\phone_accelerometer_e2(1).csv → E:\DS_RAW_COMPLETE\DS_Structured\aliza\open_big_box\phone_accelerometer_e2.csv
🗑️ Removed existing file to overwrite: E:\DS_RAW_COMPLETE\DS_Structur

🗑️ Removed existing file to overwrite: E:\DS_RAW_COMPLETE\DS_Structured\bcsf24m530\quick_walk\watch_magnetometer_uncalibrated.csv
✅ Renamed: E:\DS_RAW_COMPLETE\DS_Structured\bcsf24m530\quick_walk\watch_magnetometer_uncalibrated(1).csv → E:\DS_RAW_COMPLETE\DS_Structured\bcsf24m530\quick_walk\watch_magnetometer_uncalibrated.csv
🗑️ Removed existing file to overwrite: E:\DS_RAW_COMPLETE\DS_Structured\bcsf24m530\reading\watch_gyroscope.csv
✅ Renamed: E:\DS_RAW_COMPLETE\DS_Structured\bcsf24m530\reading\watch_gyroscope(1).csv → E:\DS_RAW_COMPLETE\DS_Structured\bcsf24m530\reading\watch_gyroscope.csv
🗑️ Removed existing file to overwrite: E:\DS_RAW_COMPLETE\DS_Structured\bcsf24m530\reading\watch_linear_acceleration.csv
✅ Renamed: E:\DS_RAW_COMPLETE\DS_Structured\bcsf24m530\reading\watch_linear_acceleration(1).csv → E:\DS_RAW_COMPLETE\DS_Structured\bcsf24m530\reading\watch_linear_acceleration.csv
🗑️ Removed existing file to overwrite: E:\DS_RAW_COMPLETE\DS_Structured\bcsf24m530\standing\watch_gr

🗑️ Removed existing file to overwrite: E:\DS_RAW_COMPLETE\DS_Structured\bitf21m542\open_door\phone_gyroscope_uncalibrated_e2.csv
✅ Renamed: E:\DS_RAW_COMPLETE\DS_Structured\bitf21m542\open_door\phone_gyroscope_uncalibrated_e2(1).csv → E:\DS_RAW_COMPLETE\DS_Structured\bitf21m542\open_door\phone_gyroscope_uncalibrated_e2.csv
🗑️ Removed existing file to overwrite: E:\DS_RAW_COMPLETE\DS_Structured\bitf21m542\open_door\phone_gyroscope_uncalibrated_e7.csv
✅ Renamed: E:\DS_RAW_COMPLETE\DS_Structured\bitf21m542\open_door\phone_gyroscope_uncalibrated_e7(1).csv → E:\DS_RAW_COMPLETE\DS_Structured\bitf21m542\open_door\phone_gyroscope_uncalibrated_e7.csv
🗑️ Removed existing file to overwrite: E:\DS_RAW_COMPLETE\DS_Structured\bitf21m549\bending\phone_magnetometer_e0.csv
✅ Renamed: E:\DS_RAW_COMPLETE\DS_Structured\bitf21m549\bending\phone_magnetometer_e0(1).csv → E:\DS_RAW_COMPLETE\DS_Structured\bitf21m549\bending\phone_magnetometer_e0.csv
🗑️ Removed existing file to overwrite: E:\DS_RAW_COMPLETE\DS_

🗑️ Removed existing file to overwrite: E:\DS_RAW_COMPLETE\DS_Structured\mahjabeen\close_door\phone_accelerometer_calibrated_e7.csv
✅ Renamed: E:\DS_RAW_COMPLETE\DS_Structured\mahjabeen\close_door\phone_accelerometer_calibrated_e7(1).csv → E:\DS_RAW_COMPLETE\DS_Structured\mahjabeen\close_door\phone_accelerometer_calibrated_e7.csv
🗑️ Removed existing file to overwrite: E:\DS_RAW_COMPLETE\DS_Structured\mahjabeen\close_door\phone_gyroscope_e2.csv
✅ Renamed: E:\DS_RAW_COMPLETE\DS_Structured\mahjabeen\close_door\phone_gyroscope_e2(1).csv → E:\DS_RAW_COMPLETE\DS_Structured\mahjabeen\close_door\phone_gyroscope_e2.csv
🗑️ Removed existing file to overwrite: E:\DS_RAW_COMPLETE\DS_Structured\mahjabeen\close_door\phone_gyroscope_uncalibrated_e0.csv
✅ Renamed: E:\DS_RAW_COMPLETE\DS_Structured\mahjabeen\close_door\phone_gyroscope_uncalibrated_e0(1).csv → E:\DS_RAW_COMPLETE\DS_Structured\mahjabeen\close_door\phone_gyroscope_uncalibrated_e0.csv
🗑️ Removed existing file to overwrite: E:\DS_RAW_COMPLETE\

# Remove unwanted and keep only prescribed ones

In [4]:
import os

def log_unwanted_files(base_path):
    allowed_keywords = ["gyroscope", "magnetometer", "accelerometer", "gravity", "linear_acceleration"]

    unwanted_keywords = [
        "phone_TMD4906_Proximity_Sensor",
        "phone_Step_Detector_Sensor",
        "phone_Screen_Orientation_Sensor",
        "phone_Tilt_Detector",
        "phone_Game_Rotation_Vector"
    ]

    log_path = r"E:\DS_RAW_COMPLETE\logs\filtered_files_log.txt"
    with open(log_path, 'w') as log:
        for dirpath, _, filenames in os.walk(base_path):
            for filename in filenames:
                file_path = os.path.join(dirpath, filename)

                if any(kw in filename for kw in unwanted_keywords):
                    log.write(f"{file_path}\n")
                    os.remove(file_path)
                    print(f"📝 Logged (unwanted): {file_path}")
                    continue

                if not any(kw in filename for kw in allowed_keywords):
                    log.write(f"{file_path}\n")
                    os.remove(file_path)
                    print(f"📝 Logged (not allowed): {file_path}")
                    continue

    print(f"\n📄 Log file saved to: {log_path}")
    print("✅ All files scanned.")

# =======================
# Run
# =======================
if __name__ == "__main__":
    base_path = r"E:\DS_RAW_COMPLETE\DS_Structured"
    if os.path.exists(base_path):
        print(f"\n🔍 Scanning files in: {base_path}")
        log_unwanted_files(base_path)
    else:
        print("❌ The path does not exist.")



🔍 Scanning files in: E:\DS_RAW_COMPLETE\DS_Structured
📝 Logged (not allowed): E:\DS_RAW_COMPLETE\DS_Structured\filtered_files_log.txt
📝 Logged (not allowed): E:\DS_RAW_COMPLETE\DS_Structured\bcsf22m527\fall_backward\phone_Orientation_Sensor_e0.csv
📝 Logged (unwanted): E:\DS_RAW_COMPLETE\DS_Structured\bcsf22m527\fall_backward\phone_Screen_Orientation_Sensor_e0.csv
📝 Logged (unwanted): E:\DS_RAW_COMPLETE\DS_Structured\bcsf22m527\squatting\phone_TMD4906_Proximity_Sensor_e0.csv
📝 Logged (unwanted): E:\DS_RAW_COMPLETE\DS_Structured\bcsf22m527\squatting\phone_TMD4906_Proximity_Sensor_e1.csv
📝 Logged (unwanted): E:\DS_RAW_COMPLETE\DS_Structured\guljanan\bending\phone_Game_Rotation_Vector_e0.csv
📝 Logged (unwanted): E:\DS_RAW_COMPLETE\DS_Structured\guljanan\bending\phone_Game_Rotation_Vector_e1.csv
📝 Logged (unwanted): E:\DS_RAW_COMPLETE\DS_Structured\guljanan\bending\phone_Game_Rotation_Vector_e2.csv
📝 Logged (unwanted): E:\DS_RAW_COMPLETE\DS_Structured\guljanan\bending\phone_Game_Rotation_V

📝 Logged (unwanted): E:\DS_RAW_COMPLETE\DS_Structured\maham\open_big_box\phone_Game_Rotation_Vector_e2.csv
📝 Logged (unwanted): E:\DS_RAW_COMPLETE\DS_Structured\maham\open_big_box\phone_Game_Rotation_Vector_e3.csv
📝 Logged (unwanted): E:\DS_RAW_COMPLETE\DS_Structured\maham\open_big_box\phone_Game_Rotation_Vector_e4.csv
📝 Logged (unwanted): E:\DS_RAW_COMPLETE\DS_Structured\maham\open_big_box\phone_Game_Rotation_Vector_e5.csv
📝 Logged (unwanted): E:\DS_RAW_COMPLETE\DS_Structured\maham\open_big_box\phone_Game_Rotation_Vector_e6.csv
📝 Logged (unwanted): E:\DS_RAW_COMPLETE\DS_Structured\maham\open_big_box\phone_Game_Rotation_Vector_e7.csv
📝 Logged (unwanted): E:\DS_RAW_COMPLETE\DS_Structured\maham\open_door\phone_Game_Rotation_Vector_e0.csv
📝 Logged (unwanted): E:\DS_RAW_COMPLETE\DS_Structured\maham\open_door\phone_Game_Rotation_Vector_e1.csv
📝 Logged (unwanted): E:\DS_RAW_COMPLETE\DS_Structured\maham\open_door\phone_Game_Rotation_Vector_e2.csv
📝 Logged (unwanted): E:\DS_RAW_COMPLETE\DS_Str


📄 Log file saved to: E:\DS_RAW_COMPLETE\logs\filtered_files_log.txt
✅ All files scanned.


# creates Log File

In [9]:
def prep_data(feat_filename):
    errors = []  # list of errors to return

    try:
        X = pd.read_csv(feat_filename, delimiter=",", decimal='.')
    except Exception as e:
        errors.append(f"Error reading {feat_filename}.")
        return errors
    if "(" in feat_filename:
        errors.append(f"Error parsing event number in {feat_filename}.")
    # If the file has 4 or more columns, use only the first 4 columns.
    if len(X.columns) >= 4:
        X = X.iloc[:, :4]
        X.columns = ['Timestamp', 'X', 'Y', 'Z']
    elif len(X.columns) >= 1:
        # You had this part commented — it does nothing
        # Possibly rename columns here if needed
        pass
    else:
        errors.append(f"Unexpected number of columns in {feat_filename}.")
    
    # (Your normal processing would continue here...)

    return errors


In [17]:
import os
import glob
import pandas as pd

error_log = []  # collect all errors here


base_path = r"C:\Users\Malik Haider\Documents\HUMCARE\DS_AF"
subject_folders = glob.glob(os.path.join(base_path, "*"))

for subject in subject_folders:
    print("🚀 Processing subject ", subject);
    if not os.path.isdir(subject):
        continue

    activity_folders = glob.glob(os.path.join(subject, "*"))
    for activity_folder in activity_folders:
        if not os.path.isdir(activity_folder):
            continue

        event_files = glob.glob(os.path.join(activity_folder, "*.csv"))
        for f in event_files:
            errors = prep_data(f)
            if errors:
                error_log.extend(errors)

# Write all errors to a log file
with open(r"C:\Users\Malik Haider\Documents\HUMCARE\preprocessing_errors_log_af_after_clean.txt", "w") as log_file:
    for line in error_log:
        log_file.write(line + "\n")

print(f"✅ Finished. Logged {len(error_log)} issues to 'preprocessing_errors_log.txt'")


🚀 Processing subject  C:\Users\Malik Haider\Documents\HUMCARE\DS_AF\sub1
🚀 Processing subject  C:\Users\Malik Haider\Documents\HUMCARE\DS_AF\sub10
🚀 Processing subject  C:\Users\Malik Haider\Documents\HUMCARE\DS_AF\sub11
🚀 Processing subject  C:\Users\Malik Haider\Documents\HUMCARE\DS_AF\sub12
🚀 Processing subject  C:\Users\Malik Haider\Documents\HUMCARE\DS_AF\sub13
🚀 Processing subject  C:\Users\Malik Haider\Documents\HUMCARE\DS_AF\sub14
🚀 Processing subject  C:\Users\Malik Haider\Documents\HUMCARE\DS_AF\sub15
🚀 Processing subject  C:\Users\Malik Haider\Documents\HUMCARE\DS_AF\sub16
🚀 Processing subject  C:\Users\Malik Haider\Documents\HUMCARE\DS_AF\sub17
🚀 Processing subject  C:\Users\Malik Haider\Documents\HUMCARE\DS_AF\sub18
🚀 Processing subject  C:\Users\Malik Haider\Documents\HUMCARE\DS_AF\sub19
🚀 Processing subject  C:\Users\Malik Haider\Documents\HUMCARE\DS_AF\sub2
🚀 Processing subject  C:\Users\Malik Haider\Documents\HUMCARE\DS_AF\sub20
🚀 Processing subject  C:\Users\Malik Hai

# delete files with particular event no

In [None]:
import os
import re

def delete_files_with_same_event(log_path, dry_run=True):
    with open(log_path, 'r') as log_file:
        lines = log_file.readlines()

    event_files = set()

    # Extract unique (folder, event_suffix) pairs
    for line in lines:
        match = re.search(r'(C:.*?)(_e\d+\.csv)', line.strip(), re.IGNORECASE)
        if match:
            base_path = match.group(1)
            event_suffix = match.group(2)  # e.g., _e0.csv
            event_files.add((os.path.dirname(base_path), event_suffix))

    for folder, event_suffix in event_files:
        print(f"\n📂 Looking in: {folder}")
        pattern = re.compile(re.escape(event_suffix) + r'$', re.IGNORECASE)

        for fname in os.listdir(folder):
            if pattern.search(fname):
                fpath = os.path.join(folder, fname)
                if dry_run:
                    print(f"🔎 Would delete: {fpath}")
                else:
                    os.remove(fpath)
                    print(f"🗑️ Deleted: {fpath}")

    if dry_run:
        print("\n✅ Dry-run completed. No files were actually deleted.")
    else:
        print("\n✅ All matching event files removed.")

# ================================
# Run
# ================================
if __name__ == "__main__":
    log_file_path = r"C:\Users\Malik Haider\Documents\HUMCARE\DS_AF_LOGS\missing_event_file.txt"
    dry_run = False  # set to False to actually delete

    if os.path.exists(log_file_path):
        delete_files_with_same_event(log_file_path, dry_run=dry_run)
    else:
        print("❌ Log file not found!")


# Delete files without event number

In [15]:
import os
import re

def delete_files_without_event(base_path, dry_run=True):
    """
    Deletes files that do NOT contain _e<number> in their name
    """
    event_pattern = re.compile(r'_e\d+', re.IGNORECASE)

    for dirpath, _, filenames in os.walk(base_path):
        for fname in filenames:
            if not event_pattern.search(fname):
                fpath = os.path.join(dirpath, fname)
                if dry_run:
                    print(f"🔎 Would delete (no event): {fpath}")
                else:
                    os.remove(fpath)
                    print(f"🗑️ Deleted (no event): {fpath}")

    if dry_run:
        print("\n✅ Dry-run completed. No files were actually deleted.")
    else:
        print("\n✅ All files without _e<number> have been removed.")

# ================================
# Run
# ================================
if __name__ == "__main__":
    base_path = r"C:\Users\Malik Haider\Documents\HUMCARE\DS_AF"
    dry_run = False  # set to False to actually delete

    if os.path.exists(base_path):
        delete_files_without_event(base_path, dry_run=dry_run)
    else:
        print("❌ Base path does not exist!")


🗑️ Deleted (no event): C:\Users\Malik Haider\Documents\HUMCARE\DS_AF\sub10\standing\glass_accelerometer.csv
🗑️ Deleted (no event): C:\Users\Malik Haider\Documents\HUMCARE\DS_AF\sub10\standing\glass_gyroscope.csv
🗑️ Deleted (no event): C:\Users\Malik Haider\Documents\HUMCARE\DS_AF\sub10\standing\glass_magnetometer.csv
🗑️ Deleted (no event): C:\Users\Malik Haider\Documents\HUMCARE\DS_AF\sub12\downstairs\phone_accelerometer.csv
🗑️ Deleted (no event): C:\Users\Malik Haider\Documents\HUMCARE\DS_AF\sub12\downstairs\phone_gyroscope.csv
🗑️ Deleted (no event): C:\Users\Malik Haider\Documents\HUMCARE\DS_AF\sub12\downstairs\phone_magnetometer.csv
🗑️ Deleted (no event): C:\Users\Malik Haider\Documents\HUMCARE\DS_AF\sub26\clean_the_table\phone_accelerometer.csv
🗑️ Deleted (no event): C:\Users\Malik Haider\Documents\HUMCARE\DS_AF\sub26\clean_the_table\phone_gyroscope.csv
🗑️ Deleted (no event): C:\Users\Malik Haider\Documents\HUMCARE\DS_AF\sub26\clean_the_table\phone_magnetometer.csv
🗑️ Deleted (no e

# Separate the Fall data from ADL

In [None]:
import os
import shutil

# List of exact fall activity folder names to match
fall_activities = [
    "fall_backward",
    "fall_backward_when_trying_to_sit_down",
    "fall_backward_while_trying_to_sit_down",
    "fall_backward_when_trying_to_stand_up",
    "fall_backward_while_trying_to_stand_up",
    "fall_forward",
    "fall_forward_when_trying_to_sit_down",
    "fall_forward_while_trying_to_sit_down",
    "fall_forward_when_trying_to_stand_up",
    "fall_forward_while_trying_to_stand_up",
    "fall_left",
    "fall_right"
]

def copy_and_remove_fall_folders(src_root, dest_root):
    if not os.path.exists(src_root):
        print("❌ Source path does not exist.")
        return

    # Create destination if it doesn't exist
    if not os.path.exists(dest_root):
        os.makedirs(dest_root)

    # Iterate through each user folder
    for user_folder in os.listdir(src_root):
        user_path = os.path.join(src_root, user_folder)

        if os.path.isdir(user_path):
            # Create user folder in destination
            dest_user_path = os.path.join(dest_root, user_folder)
            os.makedirs(dest_user_path, exist_ok=True)

            # List all subfolders (activities) inside user folder
            for activity_folder in os.listdir(user_path):
                activity_path = os.path.join(user_path, activity_folder)

                if os.path.isdir(activity_path) and activity_folder in fall_activities:
                    dest_activity_path = os.path.join(dest_user_path, activity_folder)
                    print(f"📁 Copying {activity_path} to {dest_activity_path}")
                    try:
                        shutil.copytree(activity_path, dest_activity_path, dirs_exist_ok=True)
                        shutil.rmtree(activity_path)  # ✅ Delete after copy
                        print(f"🗑️ Deleted source folder: {activity_path}")
                    except Exception as e:
                        print(f"⚠️ Error copying/deleting {activity_path}: {e}")

    print("✅ Copy and delete operation completed.")

# Main execution
if __name__ == "__main__":
    src = r"C:\Users\hp\Downloads\DS_AF - Copy - Copy\DS_AF"
    dst = r"C:\Users\hp\Downloads\DS_AF - Copy - Copy\DS_FALL"
    copy_and_remove_fall_folders(src, dst)


# remove empty folder and rename them

In [None]:
import os
import shutil

def delete_empty_and_rename(base_path):
    """
    Delete empty folders and rename remaining subfolders sequentially as sub1, sub2, …
    """
    # Step 1: Delete empty folders
    for root, dirs, _ in os.walk(base_path, topdown=False):
        for d in dirs:
            folder_path = os.path.join(root, d)
            if not os.listdir(folder_path):
                os.rmdir(folder_path)
                print(f"🗑️ Deleted empty folder: {folder_path}")

    # Step 2: Get remaining folders
    remaining = [d for d in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, d))]
    remaining.sort()  # optional: sort alphabetically

    # Step 3: Rename them to sub1, sub2, …
    for idx, folder in enumerate(remaining, 1):
        old_path = os.path.join(base_path, folder)
        new_name = f"sub{idx}"
        new_path = os.path.join(base_path, new_name)

        if old_path != new_path:
            if os.path.exists(new_path):
                print(f"⚠️ Cannot rename {old_path} to {new_path} (already exists!)")
            else:
                os.rename(old_path, new_path)
                print(f"✅ Renamed: {old_path} → {new_path}")

    print("\n🎉 Done: Empty folders deleted and remaining renamed sequentially.")

# ===================================
# Run
# ===================================
if __name__ == "__main__":
    base_path = r"C:\Users\hp\Downloads\DS_AF - Copy - Copy\DS_FALL"

    if os.path.exists(base_path):
        delete_empty_and_rename(base_path)
    else:
        print("❌ Base path does not exist!")


In [18]:
import os

def delete_empty_and_rename(base_path):
    """
    Delete empty folders and rename remaining subfolders sequentially as sub1, sub2, …
    """
    # Step 1: Delete empty folders
    for root, dirs, _ in os.walk(base_path, topdown=False):
        for d in dirs:
            folder_path = os.path.join(root, d)
            if not os.listdir(folder_path):
                os.rmdir(folder_path)
                print(f"🗑️ Deleted empty folder: {folder_path}")

    # Step 2: Get remaining folders
    remaining = [d for d in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, d))]
    remaining.sort()  # sort alphabetically

    # Step 3: Rename to temporary names to avoid conflicts
    temp_names = []
    for idx, folder in enumerate(remaining):
        old_path = os.path.join(base_path, folder)
        tmp_name = f"tmp_rename_{idx}"
        tmp_path = os.path.join(base_path, tmp_name)
        os.rename(old_path, tmp_path)
        temp_names.append(tmp_name)
        print(f"🔄 Renamed to temp: {folder} → {tmp_name}")

    # Step 4: Rename temp folders to final sequential names
    for idx, tmp_name in enumerate(temp_names, 1):
        tmp_path = os.path.join(base_path, tmp_name)
        final_name = f"sub{idx}"
        final_path = os.path.join(base_path, final_name)
        os.rename(tmp_path, final_path)
        print(f"✅ Renamed: {tmp_name} → {final_name}")

    print("\n🎉 Done: Empty folders deleted and remaining renamed sequentially.")

# ===================================
# Run
# ===================================
if __name__ == "__main__":
    base_path = r"C:\Users\hp\Downloads\DS_AF - Copy - Copy\DS_AF"

    if os.path.exists(base_path):
        delete_empty_and_rename(base_path)
    else:
        print("❌ Base path does not exist!")


🔄 Renamed to temp: sub1 → tmp_rename_0
🔄 Renamed to temp: sub10 → tmp_rename_1
🔄 Renamed to temp: sub11 → tmp_rename_2
🔄 Renamed to temp: sub12 → tmp_rename_3
🔄 Renamed to temp: sub13 → tmp_rename_4
🔄 Renamed to temp: sub14 → tmp_rename_5
🔄 Renamed to temp: sub15 → tmp_rename_6
🔄 Renamed to temp: sub16 → tmp_rename_7
🔄 Renamed to temp: sub17 → tmp_rename_8
🔄 Renamed to temp: sub18 → tmp_rename_9
🔄 Renamed to temp: sub19 → tmp_rename_10
🔄 Renamed to temp: sub2 → tmp_rename_11
🔄 Renamed to temp: sub20 → tmp_rename_12
🔄 Renamed to temp: sub21 → tmp_rename_13
🔄 Renamed to temp: sub22 → tmp_rename_14
🔄 Renamed to temp: sub23 → tmp_rename_15
🔄 Renamed to temp: sub24 → tmp_rename_16
🔄 Renamed to temp: sub25 → tmp_rename_17
🔄 Renamed to temp: sub26 → tmp_rename_18
🔄 Renamed to temp: sub27 → tmp_rename_19
🔄 Renamed to temp: sub28 → tmp_rename_20
🔄 Renamed to temp: sub29 → tmp_rename_21
🔄 Renamed to temp: sub3 → tmp_rename_22
🔄 Renamed to temp: sub30 → tmp_rename_23
🔄 Renamed to temp: sub31 → tm

# Verify The Dataset for 9 sensors for each event

In [71]:
import os,glob,re
from collections import defaultdict

base_path = r"C:\Users\Malik Haider\Documents\HUMCARE\DS_AF"
log_path = r'C:\Users\Malik Haider\Documents\HUMCARE\DS_AF_LOGS\missing_event_file.txt'
sensor9 = [
    'glass_accelerometer','glass_gyroscope','glass_magnetometer',
      'phone_accelerometer','phone_gyroscope','phone_magnetometer',
      'watch_accelerometer','watch_gyroscope','watch_magnetometer'
]
sensor6 = [ 
    'phone_accelerometer', 'phone_gyroscope', 'phone_magnetometer',
    'watch_accelerometer', 'watch_gyroscope', 'watch_magnetometer'
]
sixSensorActivities = {
    'fall_forward','fall_right','fall_backward','fall_left',
    'fall_forward_when_trying_to_sit_down','fall_backward_while_trying_to_sit_down',
    'fall_forward_while_trying_to_stand_up','fall_backward_while_trying_to_stand_up'
}

pattern = re.compile(r'^(glass|phone|watch)_(accelerometer|gyroscope|magnetometer)_e(\d+)\.csv$', re.I)

open(log_path,'w').close()

for subj in glob.glob(os.path.join(base_path, '*')):
    if not os.path.isdir(subj): continue
    for act in glob.glob(os.path.join(subj, '*')):
        if not os.path.isdir(act): continue
        req = sensor6 if os.path.basename(act) in sixSensorActivities else sensor9
        evs = defaultdict(set)
        fm = {}
        for fp in glob.glob(os.path.join(act, '*.csv')):
            m = pattern.match(os.path.basename(fp))
            if m:
                k = f"{m.group(1).lower()}_{m.group(2).lower()}"
                e = int(m.group(3))
                evs[e].add(k)
                fm[(e, k)] = fp
        with open(log_path, 'a') as log:
            for e, seen in sorted(evs.items()):
                if len(seen) < len(req):
                    for k in seen:
                        log.write(f"{fm[(e, k)]}\n")



# Get Folders with Imbalance events

In [90]:
import os,glob

base_path = r"C:\Users\Malik Haider\Documents\HUMCARE\DS_AF"
six = {
    'fall_forward','fall_right','fall_backward','fall_left',
    'fall_forward_when_trying_to_sit_down','fall_backward_while_trying_to_sit_down',
    'fall_forward_while_trying_to_stand_up','fall_backward_while_trying_to_stand_up'
}

for subj in glob.glob(os.path.join(base_path,'*')):
    if not os.path.isdir(subj): continue
    for act in glob.glob(os.path.join(subj,'*')):
        if not os.path.isdir(act): continue
        req = 6 if os.path.basename(act) in six else 9
        cnt = len(glob.glob(os.path.join(act,'*.csv')))
        if cnt % req != 0:
            print(act, cnt)
print("Complete")

Complete


# Get DS stats

In [1]:
import os
import glob
from collections import defaultdict

# Define sensor list
sensor_order = [
    'glass_accelerometer',
    'glass_gyroscope',
    'glass_magnetometer',
    'phone_accelerometer_calibrated',
    'phone_accelerometer',
    'phone_gravity',
    'phone_linear_acceleration',
    'phone_gyroscope_uncalibrated',
    'phone_gyroscope',
    'phone_magnetometer_uncalibrated',
    'phone_magnetometer',
    'phone_interrupt_gyroscope',
    'watch_accelerometer',
    'watch_gyroscope_uncalibrated',
    'watch_linear_acceleration',
    'watch_gyroscope',
    'watch_magnetometer_uncalibrated',
    'watch_magnetometer',
    'watch_gravity',
]

# Initialize counters
sensor_file_counts = defaultdict(int)
total_event_instances = 0

# Path to dataset
base_path = r"C:\Users\Malik Haider\Documents\HUMCARE\DATASET_FILLING\test_data"

# Traverse all subject folders
subject_folders = glob.glob(os.path.join(base_path, "*"))

for subject in subject_folders:
    if not os.path.isdir(subject):
        continue
    activity_folders = glob.glob(os.path.join(subject, "*"))
    
    for activity_folder in activity_folders:
        if not os.path.isdir(activity_folder):
            continue

        event_files = glob.glob(os.path.join(activity_folder, "*.csv"))
        event_dict = defaultdict(dict)

        for f in event_files:
            filename = os.path.basename(f).lower()
            if "_e" in filename:
                try:
                    event_num = int(filename.split("_e")[1].split(".")[0])
                except Exception:
                    continue

                for sensor in sensor_order:
                    if sensor in filename:
                        sensor_file_counts[sensor] += 1
                        event_dict[event_num][sensor] = f
                        break
#         if(sensor_file_counts["watch_accelerometer"] != sensor_file_counts["phone_accelerometer"] or sensor_file_counts["watch_gyroscope"] != sensor_file_counts["watch_accelerometer"]):
            
#             print(sensor_file_counts["watch_accelerometer"])
#             print(sensor_file_counts["phone_accelerometer"])
#             print(activity_folder)
        # Count events where at least one sensor file exists
        total_event_instances += len(event_dict)
   
    
# Display Results
print("📊 Sensor File Counts:")
for sensor in sensor_order:
    print(f"  {sensor:25s}: {sensor_file_counts[sensor]} files")

print(f"\n📦 Total Distinct Event Instances: {total_event_instances}")


📊 Sensor File Counts:
  glass_accelerometer      : 639 files
  glass_gyroscope          : 639 files
  glass_magnetometer       : 639 files
  phone_accelerometer_calibrated: 910 files
  phone_accelerometer      : 910 files
  phone_gravity            : 396 files
  phone_linear_acceleration: 910 files
  phone_gyroscope_uncalibrated: 910 files
  phone_gyroscope          : 903 files
  phone_magnetometer_uncalibrated: 903 files
  phone_magnetometer       : 910 files
  phone_interrupt_gyroscope: 790 files
  watch_accelerometer      : 908 files
  watch_gyroscope_uncalibrated: 908 files
  watch_linear_acceleration: 908 files
  watch_gyroscope          : 908 files
  watch_magnetometer_uncalibrated: 907 files
  watch_magnetometer       : 907 files
  watch_gravity            : 286 files

📦 Total Distinct Event Instances: 911


# Generate log file for empty folders

In [1]:
import os

base_path = r"E:\DS_RAW_COMPLETE\DS_Structured"
log_path = r'E:\DS_RAW_COMPLETE\logs\empty_folders.txt'

def is_empty(dir_path):
    return not any(os.scandir(dir_path))

open(log_path, 'w').close()

for subject in os.listdir(base_path):
    subj_path = os.path.join(base_path, subject)
    if os.path.isdir(subj_path):
        if is_empty(subj_path):
            with open(log_path, 'a') as log:
                log.write(f"{subj_path}\n")
        else:
            for activity in os.listdir(subj_path):
                act_path = os.path.join(subj_path, activity)
                if os.path.isdir(act_path) and is_empty(act_path):
                    with open(log_path, 'a') as log:
                        log.write(f"{act_path}\n")
                    os.rmdir(act_path)


# Generate log file with less number of required samples for 4 seconds

In [None]:
import os
import glob
import pandas as pd
from collections import defaultdict

# ========== Configuration ==========
base_path = r"E:\DS_RAW_COMPLETE\DS_Structured"
log_dir =  r"E:\DS_RAW_COMPLETE\logs\7-less_sample_logs"
os.makedirs(log_dir, exist_ok=True)

# ========== Minimum expected samples ==========
expected_samples = {
    'glass_accelerometer':  20,
    'glass_gyroscope':      20,
    'glass_magnetometer':   20,
    'phone_accelerometer_calibrated' : 400,
    'phone_accelerometer': 2000,
    'phone_gravity': 800,
    'phone_linear_acceleration': 400,
    'phone_gyroscope_uncalibrated': 2000,
    'phone_gyroscope':     2000,
    'phone_magnetometer_uncalibrated': 400,
    'phone_magnetometer':  400,
    'phone_interrupt_gyroscope': 100,
    'watch_accelerometer': 400,
    'watch_gyroscope_uncalibrated': 800,
    'watch_linear_acceleration': 400,
    'watch_gyroscope':     400,
    'watch_magnetometer_uncalibrated': 400,
    'watch_magnetometer':  400,
    'watch_gravity': 400,
}

# ========== Detect sensors from filenames ==========
sensor_keys = list(expected_samples.keys())

# ========== Track undersampled files ==========
lengths = defaultdict(dict)

# ========== Processing ==========
for subj in glob.glob(os.path.join(base_path, '*')):
    if not os.path.isdir(subj): continue
    for act in glob.glob(os.path.join(subj, '*')):
        if not os.path.isdir(act): continue
        for fp in glob.glob(os.path.join(act, '*.csv')):
            fname = os.path.basename(fp).lower()

            # Filter only files with '_e' in name
            if "_e" not in fname:
                continue

            sensor = next((s for s in sensor_keys if s in fname), None)
            if not sensor:
                continue

            try:
                df = pd.read_csv(fp)
                cnt = len(df)
            except Exception as e:
                print(f"❌ Error reading: {fp} — {e}")
                continue

            lengths[sensor][fp] = cnt

            if cnt < expected_samples[sensor]:
                log_file = os.path.join(log_dir, f"{sensor}_undersample_log.txt")
                with open(log_file, 'a', encoding='utf-8') as log:
                    log.write(f"{fp}: {cnt} rows (expected ≥ {expected_samples[sensor]})\n")

print("✅ All undersampled logs written per sensor.")


❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bcsf24m535\fall_forward\watch_accelerometer_e1.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bcsf24m535\fall_forward\watch_gyroscope_e1.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bcsf24m535\fall_forward\watch_gyroscope_uncalibrated_e1.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bcsf24m535\fall_forward\watch_linear_acceleration_e1.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bcsf24m535\fall_forward\watch_magnetometer_e1.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bcsf24m535\fall_forward\watch_magnetometer_uncalibrated_e1.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bcsf24m535\fall_forward_while_trying_to_sit_down\watch_accelerometer_e0.csv — No columns to parse from file
❌ Error reading: E:\DS

❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bcsf24m554\plugin\phone_interrupt_gyroscope_e3.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bitf21m502\eat_small_things\watch_accelerometer_e3.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bitf21m502\eat_small_things\watch_gyroscope_e3.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bitf21m502\eat_small_things\watch_gyroscope_uncalibrated_e3.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bitf21m502\eat_small_things\watch_linear_acceleration_e3.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bitf21m502\eat_small_things\watch_magnetometer_e3.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bitf21m502\eat_small_things\watch_magnetometer_uncalibrated_e3.csv — No columns to parse from file
❌ Error reading: E:\DS_

❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bitf21m536\close_lid_by_rotation\watch_accelerometer_e5.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bitf21m536\close_lid_by_rotation\watch_gyroscope_e5.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bitf21m536\close_lid_by_rotation\watch_gyroscope_uncalibrated_e5.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bitf21m536\close_lid_by_rotation\watch_linear_acceleration_e5.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bitf21m536\close_lid_by_rotation\watch_magnetometer_e5.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bitf21m536\close_lid_by_rotation\watch_magnetometer_uncalibrated_e5.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bitf21m536\fall_right\watch_accelerometer_e1.csv — No columns to parse from 

❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bitf21m542\standing_up_from_laying\watch_linear_acceleration_e5.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bitf21m542\standing_up_from_laying\watch_magnetometer_e5.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bitf21m542\standing_up_from_laying\watch_magnetometer_uncalibrated_e5.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bitf21m549\bending\watch_linear_acceleration_e1.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bitf21m549\bending\watch_magnetometer_e1.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bitf21m549\bending\watch_magnetometer_uncalibrated_e1.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bitf21m549\fall_backward\watch_accelerometer_e2.csv — No columns to parse from file
❌ Error reading:

❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bsef21m541\open_bag\phone_interrupt_gyroscope_e0.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bsef21m541\open_bag\phone_interrupt_gyroscope_e1.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bsef21m541\open_bag\phone_interrupt_gyroscope_e2.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bsef21m541\plugin\phone_interrupt_gyroscope_e1.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bsef21m541\throw_out\watch_accelerometer_e6.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bsef21m541\throw_out\watch_gyroscope_e6.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bsef21m541\throw_out\watch_gyroscope_uncalibrated_e6.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bsef21m541\throw_

❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bsef21m558\fall_backward\watch_linear_acceleration_e5.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bsef21m558\fall_backward\watch_magnetometer_e5.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bsef21m558\fall_backward\watch_magnetometer_uncalibrated_e5.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bsef21m558\fall_forward\watch_accelerometer_e2.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bsef21m558\fall_forward\watch_gyroscope_e2.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bsef21m558\fall_forward\watch_gyroscope_uncalibrated_e2.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\bsef21m558\fall_forward\watch_linear_acceleration_e2.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS

❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\hassankaramat\close_lid_by_rotation\watch_gyroscope_uncalibrated_e0.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\hassankaramat\fall_backward_while_trying_to_sit_down\watch_accelerometer_e1.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\hassankaramat\fall_backward_while_trying_to_sit_down\watch_gyroscope_e1.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\hassankaramat\fall_backward_while_trying_to_sit_down\watch_gyroscope_uncalibrated_e1.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\hassankaramat\fall_backward_while_trying_to_sit_down\watch_linear_acceleration_e1.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\hassankaramat\fall_backward_while_trying_to_sit_down\watch_magnetometer_e1.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW

❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\msdsf23m015\sitting_down_from_standing\watch_linear_acceleration_e1.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\msdsf23m015\sitting_down_from_standing\watch_magnetometer_e1.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\msdsf23m015\sitting_down_from_standing\watch_magnetometer_uncalibrated_e1.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\msdsf23m015\standing_up_from_laying\phone_interrupt_gyroscope_e3.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\qaim\bending\watch_gyroscope_e0.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\qaim\bending\watch_gyroscope_uncalibrated_e0.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\qaim\bending\watch_linear_acceleration_e0.csv — No columns to parse from file
❌ Error rea

❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\raahim\standing_up_from_laying\watch_accelerometer_e5.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\raahim\standing_up_from_laying\watch_gyroscope_e5.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\raahim\standing_up_from_laying\watch_gyroscope_uncalibrated_e5.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\raahim\standing_up_from_laying\watch_linear_acceleration_e5.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\raahim\standing_up_from_laying\watch_magnetometer_e5.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\raahim\standing_up_from_laying\watch_magnetometer_uncalibrated_e5.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\raahim\throw_out\watch_gyroscope_uncalibrated_e1.csv — No columns to parse from file
❌ E

❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\shahbaz\open_big_box\watch_magnetometer_e0.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\shahbaz\open_big_box\watch_magnetometer_e1.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\shahbaz\open_big_box\watch_magnetometer_uncalibrated_e0.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\shahbaz\open_big_box\watch_magnetometer_uncalibrated_e1.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\shahbaz\open_door\watch_accelerometer_e7.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\shahbaz\open_door\watch_gyroscope_e7.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\shahbaz\open_door\watch_gyroscope_uncalibrated_e7.csv — No columns to parse from file
❌ Error reading: E:\DS_RAW_COMPLETE\DS_Structured\shahbaz\open_door\watc

In [10]:
# print(lengths['glass_accelerometer'].values())
sum = 0
count = 0
for val in lengths['phone_accelerometer'].values():
    sum += val
    if(val < 100):
        count+=1
        

print(count)

NameError: name 'lengths' is not defined

# Generate logs for empty and invalid and no rows files

In [13]:
import os
import pandas as pd

def log_empty_csvs(base_path):
    empty_log = r"E:\DS_RAW_COMPLETE\logs\6-empty_csv_files_log.txt"
    corrupt_log = r"E:\DS_RAW_COMPLETE\logs\6-corrupt_csv_files_log.txt"
    no_rows_log = r"E:\DS_RAW_COMPLETE\logs\6-no_rows_csv_files_log.txt"

    # clear or create
    open(empty_log, 'w').close()
    open(corrupt_log, 'w').close()
    open(no_rows_log, 'w').close()

    for dirpath, _, filenames in os.walk(base_path):
        for fname in filenames:
            if not fname.lower().endswith('.csv'):
                continue
            full_path = os.path.join(dirpath, fname)

            # size zero
            try:
                if os.path.getsize(full_path) == 0:
                    with open(empty_log, 'a') as f:
                        f.write(f"{full_path}\n")
                    continue
            except OSError:
                # cannot stat file; treat as corrupt
                with open(corrupt_log, 'a') as f:
                    f.write(f"STAT_ERROR: {full_path}\n")
                continue

            # try reading header / rows
            try:
                df = pd.read_csv(full_path, nrows=5, low_memory=False)
            except Exception as e:
                with open(corrupt_log, 'a') as f:
                    f.write(f"{full_path}  ERROR: {e}\n")
                continue

            # check if it has zero data rows (read full if small)
            try:
                # get actual row count efficiently
                row_count = sum(1 for _ in open(full_path, 'r', errors='ignore')) - 1  # subtract header
            except Exception:
                row_count = df.shape[0]

            if row_count <= 0:
                with open(no_rows_log, 'a') as f:
                    f.write(f"{full_path}\n")
                continue

            # optionally: if all values in first few rows are NaN (poor data)
            if df.shape[0] > 0 and df.dropna(how='all').shape[0] == 0:
                with open(no_rows_log, 'a') as f:
                    f.write(f"{full_path}  ALL_NAN\n")

    print("Done. Logs:")
    print(f"  empty zero-byte files: {empty_log}")
    print(f"  corrupt/unreadable files: {corrupt_log}")
    print(f"  no-data / all-NaN files: {no_rows_log}")


if __name__ == "__main__":
    base_path = r"E:\DS_RAW_COMPLETE\DS_Structured"
    if os.path.exists(base_path):
        log_empty_csvs(base_path)
    else:
        print("❌ Base path does not exist!")


Done. Logs:
  empty zero-byte files: E:\DS_RAW_COMPLETE\logs\6-empty_csv_files_log.txt
  corrupt/unreadable files: E:\DS_RAW_COMPLETE\logs\6-corrupt_csv_files_log.txt
  no-data / all-NaN files: E:\DS_RAW_COMPLETE\logs\6-no_rows_csv_files_log.txt


In [None]:
sensor_frequency = {
    'glass_accelerometer': 5,
    'glass_gyroscope': 5,
    'glass_magnetometer': 5,
    'phone_acceleromter_calibrated' : 100,
    'phone_accelerometer': 500,
    'phone_gravity': 200,
    'phone_linear_acceleration': 100,
    'phone_gyroscope_uncalibrated': 500,
    'phone_gyroscope': 500,
    'phone_magnetometer_uncalibrated': 100,
    'phone_magnetometer':  100,
    'phone_interrupt_gyroscope': 100,
    'watch_accelerometer': 100,
    'watch_gyroscope_uncalibrated': 200,
    'watch_linear_acceleration': 100,
    'watch_gyroscope': 100,
    'watch_magnetometer_uncalibrated': 100,
    'watch_magnetometer': 100,
    'watch_gravity': 100,
}