In [10]:
import numpy as np

In [20]:
stance_thresholds = {
    'GRF': {'min': 2.45, 'max': 4.89},  
    'MeanAcceleration': {'min': 1.66, 'max': 2.30}, 
    'SDAcceleration': {'min': 0.35, 'max': 1.66}, 
    'PercentStance': {'min': 4.62, 'max': 5.45},  
    'GeneralVelocity': {'min': 1.67, 'max': 2.33},  
}

swing_thresholds = {
    'MeanAcceleration': {'min': 1.9, 'max': 2.33},  
    'SDAcceleration': {'min': 0.27, 'max': 1.58},  
    'GeneralVelocity': {'min': 1.9, 'max': 2.34},  
}

In [21]:
import os
import pandas as pd
import numpy as np

# Simplified classify_threshold function
def classify_threshold(features, thresholds):
    # Check if any feature is normal (outside the min/max range)
    for feature, threshold in thresholds.items():
        if features[feature] < threshold['min'] or features[feature] > threshold['max']:
            return 0  # Return 0 (normal) if any feature is normal
    return 1  # Return 1 (abnormal) if no features are normal

# Function to process each CSV and apply thresholding
def apply_thresholds_to_csv(input_folder, output_folder):
    # Walk through all subdirectories and files in the input folder
    for root, _, files in os.walk(input_folder):
        for file_name in files:
            if file_name.endswith('.csv'):
                print(f"Processing file: {file_name}")  # Debugging print
                file_path = os.path.join(root, file_name)
                df = pd.read_csv(file_path)
                
                # Check if phase is stance or swing based on the file name
                if 'stance' in file_name:
                    phase = 'stance'
                    thresholds = stance_thresholds
                elif 'swing' in file_name:
                    phase = 'swing'
                    thresholds = swing_thresholds
                else:
                    continue  # Skip files that are neither stance nor swing

                print(f"Applying thresholds for phase: {phase}")  # Debugging print
                file_is_normal = False  # Flag to check if the file is normal

                # Apply thresholding to each row and check if any row is normal
                for index, row in df.iterrows():
                    row_prediction = classify_threshold(row, thresholds)
                    if row_prediction == 0:  # If row is normal, file is marked as normal
                        file_is_normal = True
                        break  # No need to check further rows

                # Set file-level prediction: 0 (normal) if file is normal, 1 (abnormal) otherwise
                file_prediction = 0 if file_is_normal else 1

                # Add the file's final prediction to the dataframe
                df['FilePrediction'] = file_prediction

                # Construct the output path, preserving the original directory structure
                relative_path = os.path.relpath(root, input_folder)
                output_dir = os.path.join(output_folder, relative_path)
                os.makedirs(output_dir, exist_ok=True)  # Create subdirectories if they don't exist
                output_file = os.path.join(output_dir, file_name)
                
                df.to_csv(output_file, index=False)
                print(f"Processed and saved predictions for {file_name}")
            else:
                print(f"Skipping non-CSV file: {file_name}")  # Debugging print


In [22]:
# Example usage
input_folder = r"C:\Users\diyav\.jupyter\Feature Data"  # Folder containing your feature CSVs
output_folder = r"C:\Users\diyav\.jupyter\Threshold Data"  # Folder to save the updated CSVs

apply_thresholds_to_csv(input_folder, output_folder)

Skipping non-CSV file: High-Low-Mean.xlsx
Processing file: stance_features_1.csv
Applying thresholds for phase: stance
Processed and saved predictions for stance_features_1.csv
Processing file: stance_features_10.csv
Applying thresholds for phase: stance
Processed and saved predictions for stance_features_10.csv
Processing file: stance_features_11.csv
Applying thresholds for phase: stance
Processed and saved predictions for stance_features_11.csv
Processing file: stance_features_2.csv
Applying thresholds for phase: stance
Processed and saved predictions for stance_features_2.csv
Processing file: stance_features_3.csv
Applying thresholds for phase: stance
Processed and saved predictions for stance_features_3.csv
Processing file: stance_features_4.csv
Applying thresholds for phase: stance
Processed and saved predictions for stance_features_4.csv
Processing file: stance_features_5.csv
Applying thresholds for phase: stance
Processed and saved predictions for stance_features_5.csv
Processin

In [9]:
import os
import pandas as pd

def label_files(input_folder, output_folder):
    # Walk through all subdirectories and files in the input folder
    for root, _, files in os.walk(input_folder):
        for file_name in files:
            if file_name.endswith('.csv'):
                # Define the full path of the file
                file_path = os.path.join(root, file_name)
                df = pd.read_csv(file_path)

                # Get the folder name and make it lowercase for case-insensitive comparison
                folder_name = os.path.basename(root).lower()
                print(f"Processing folder: {folder_name}")  # Debugging print

                # Check the folder name to determine labeling
                if 'normal' in folder_name:  # Folder name contains 'normal'
                    label = 0  # Label normal as 0
                elif 'abnormal_stance' in folder_name:  # Folder name contains 'abnormal_stance'
                    print(f"Labeling files in {folder_name} with abnormal stance logic.")  # Debugging print
                    if 'stance' in file_name:
                        label = 1  # Label stance files as abnormal (1)
                    elif 'swing' in file_name:
                        label = 0  # Label swing files as normal (0)
                elif 'abnormal_swing' in folder_name:  # Folder name contains 'abnormal_swing'
                    print(f"Labeling files in {folder_name} with abnormal swing logic.")  # Debugging print
                    if 'swing' in file_name:
                        label = 1  # Label swing files as abnormal (1)
                    elif 'stance' in file_name:
                        label = 0  # Label stance files as normal (0)
                else:
                    continue  # Skip files that don't fit any condition

                # Overwrite the 'label' column with the new value
                df['label'] = label

                # Save the updated CSV to the output folder, preserving the folder structure
                relative_path = os.path.relpath(root, input_folder)  # Relative path to keep the folder structure
                output_dir = os.path.join(output_folder, relative_path)  # Output directory for the updated files
                os.makedirs(output_dir, exist_ok=True)  # Create subdirectories if they don't exist
                output_file = os.path.join(output_dir, file_name)  # Path to the new file

                df.to_csv(output_file, index=False)
                print(f"Updated and saved {file_name} with label {label}")

# Example usage
input_folder = r"C:\Users\diyav\.jupyter\Threshold Data"  # Folder containing your original CSV files
output_folder = r"C:\Users\diyav\.jupyter\Labeled Data"  # New folder for labeled files
label_files(input_folder, output_folder)



Processing folder: sub10_2_normal
Updated and saved stance_features_1.csv with label 0
Processing folder: sub10_2_normal
Updated and saved stance_features_10.csv with label 0
Processing folder: sub10_2_normal
Updated and saved stance_features_11.csv with label 0
Processing folder: sub10_2_normal
Updated and saved stance_features_2.csv with label 0
Processing folder: sub10_2_normal
Updated and saved stance_features_3.csv with label 0
Processing folder: sub10_2_normal
Updated and saved stance_features_4.csv with label 0
Processing folder: sub10_2_normal
Updated and saved stance_features_5.csv with label 0
Processing folder: sub10_2_normal
Updated and saved stance_features_6.csv with label 0
Processing folder: sub10_2_normal
Updated and saved stance_features_7.csv with label 0
Processing folder: sub10_2_normal
Updated and saved stance_features_8.csv with label 0
Processing folder: sub10_2_normal
Updated and saved stance_features_9.csv with label 0
Processing folder: sub10_2_normal
Updated

In [10]:
import os
import pandas as pd

def label_files(input_folder, output_folder):
    # Walk through all subdirectories and files in the input folder
    for root, _, files in os.walk(input_folder):
        for file_name in files:
            if file_name.endswith('.csv'):
                # Define the full path of the file
                file_path = os.path.join(root, file_name)
                df = pd.read_csv(file_path)

                # Get the folder name and make it lowercase for case-insensitive comparison
                folder_name = os.path.basename(root).lower()
                print(f"Processing file: {file_name} in folder: {folder_name}")  # Debugging print

                # Prioritize abnormal folders first
                if 'abnormal_stance' in folder_name:  # Folder name contains 'abnormal_stance'
                    if 'stance' in file_name:
                        label = 1  # Label stance files as abnormal (1)
                        print(f"Labeling as ABNORMAL (stance): {file_name}")
                    elif 'swing' in file_name:
                        label = 0  # Label swing files as normal (0)
                        print(f"Labeling as NORMAL (swing): {file_name}")
                elif 'abnormal_swing' in folder_name:  # Folder name contains 'abnormal_swing'
                    if 'swing' in file_name:
                        label = 1  # Label swing files as abnormal (1)
                        print(f"Labeling as ABNORMAL (swing): {file_name}")
                    elif 'stance' in file_name:
                        label = 0  # Label stance files as normal (0)
                        print(f"Labeling as NORMAL (stance): {file_name}")
                elif 'normal' in folder_name:  # Folder name contains 'normal'
                    label = 0  # Label everything as normal (0)
                    print(f"Labeling as NORMAL: {file_name}")

                # Overwrite the 'label' column with the new value
                df['label'] = label

                # Save the updated CSV to the output folder, preserving the folder structure
                relative_path = os.path.relpath(root, input_folder)  # Relative path to keep the folder structure
                output_dir = os.path.join(output_folder, relative_path)  # Output directory for the updated files
                os.makedirs(output_dir, exist_ok=True)  # Create subdirectories if they don't exist
                output_file = os.path.join(output_dir, file_name)  # Path to the new file

                df.to_csv(output_file, index=False)
                print(f"Updated and saved {file_name} with label {label}")

# Example usage
input_folder = r"C:\Users\diyav\.jupyter\Threshold Data"  # Folder containing your original CSV files
output_folder = r"C:\Users\diyav\.jupyter\Labeled Data"  # New folder for labeled files
label_files(input_folder, output_folder)


Processing file: stance_features_1.csv in folder: sub10_2_normal
Labeling as NORMAL: stance_features_1.csv
Updated and saved stance_features_1.csv with label 0
Processing file: stance_features_10.csv in folder: sub10_2_normal
Labeling as NORMAL: stance_features_10.csv
Updated and saved stance_features_10.csv with label 0
Processing file: stance_features_11.csv in folder: sub10_2_normal
Labeling as NORMAL: stance_features_11.csv
Updated and saved stance_features_11.csv with label 0
Processing file: stance_features_2.csv in folder: sub10_2_normal
Labeling as NORMAL: stance_features_2.csv
Updated and saved stance_features_2.csv with label 0
Processing file: stance_features_3.csv in folder: sub10_2_normal
Labeling as NORMAL: stance_features_3.csv
Updated and saved stance_features_3.csv with label 0
Processing file: stance_features_4.csv in folder: sub10_2_normal
Labeling as NORMAL: stance_features_4.csv
Updated and saved stance_features_4.csv with label 0
Processing file: stance_features_5

In [21]:
from sklearn.metrics import accuracy_score, confusion_matrix

# Function to compare thresholding predictions with actual labels
def compare_with_actual_labels(predictions_folder):
    predicted_labels = []
    actual_labels = []
    
    # Loop through the files in the predictions folder
    for file_name in os.listdir(predictions_folder):
        file_path = os.path.join(predictions_folder, file_name)
        
        if os.path.isdir(file_path):  # Skip directories
            continue
        
        # Load the CSV with predictions and actual labels
        df = pd.read_csv(file_path)
        
        # Ensure both 'FilePrediction' and 'label' columns exist
        if 'FilePrediction' in df.columns and 'label' in df.columns:
            predicted_labels.extend(df['FilePrediction'].values)
            actual_labels.extend(df['label'].values)
        else:
            print(f"Skipping {file_name}: Missing required columns.")
    
    # Check if there are any labels to compare
    if not predicted_labels or not actual_labels:
        print("No predictions or actual labels found for comparison.")
        return
    
    # Calculate and display accuracy
    accuracy = accuracy_score(actual_labels, predicted_labels)
    print(f"Accuracy: {accuracy * 100:.2f}%")
    
    # Display the confusion matrix
    cm = confusion_matrix(actual_labels, predicted_labels)
    print("Confusion Matrix:")
    print(cm)


In [24]:
import os
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix

# Function to compare thresholding predictions with actual labels
def compare_with_actual_labels(predictions_folder):
    predicted_labels = []
    actual_labels = []
    
    # Walk through all subdirectories and files in the predictions folder
    for root, dirs, files in os.walk(predictions_folder):
        for file_name in files:
            file_path = os.path.join(root, file_name)
            
            if file_name.endswith('.csv'):  # Only process CSV files
                # Load the CSV with predictions and actual labels
                df = pd.read_csv(file_path)
                
                # Ensure both 'FilePrediction' and 'label' columns exist
                if 'FilePrediction' in df.columns and 'label' in df.columns:
                    predicted_labels.extend(df['FilePrediction'].values)
                    actual_labels.extend(df['label'].values)
                else:
                    print(f"Skipping {file_name}: Missing required columns.")
    
    # Check if there are any labels to compare
    if not predicted_labels or not actual_labels:
        print("No predictions or actual labels found for comparison.")
        return
    
    # Calculate and display accuracy
    accuracy = accuracy_score(actual_labels, predicted_labels)
    print(f"Accuracy: {accuracy * 100:.2f}%")
    
    # Display the confusion matrix
    cm = confusion_matrix(actual_labels, predicted_labels)
    print("Confusion Matrix:")
    print(cm)


In [25]:
# Example usage:
# Define the path to the folder containing the prediction CSVs
predictions_folder = r"C:\Users\diyav\.jupyter\Labeled Data"

# Call the function to compare predictions with actual labels
compare_with_actual_labels(predictions_folder)

Accuracy: 61.83%
Confusion Matrix:
[[333 213]
 [ 45  85]]
