In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import os
import shutil
shared_folder_path = '/content/drive/MyDrive/EJUST-GAIT-2/Visual_Sensors_Gait/subjects'
files = os.listdir(shared_folder_path)
new_folder_path = '/content/drive/MyDrive/gait'
os.makedirs(new_folder_path, exist_ok=True)
print(files)


['.DS_Store', 'sub29', 'sub43', 'sub17', 'sub32', 'sub12', 'sub40', 'sub36', 'sub42', 'sub11', 'sub16', 'sub20', 'sub18', 'sub27', 'sub44', 'sub19', 'sub26', 'sub21', 'sub28', 'sub10', 'sub1', 'sub6', 'sub8', 'sub9', 'sub7', 'sub0', 'sub35', 'sub34', 'sub33', 'sub15', 'sub24', 'sub23', 'sub41', 'sub22', 'sub25', 'sub13', 'sub14', 'sub5', 'sub2', 'sub3', 'sub4', 'sub31', 'sub38', 'sub39', 'sub30', 'sub37']


In [None]:
for item in os.listdir(shared_folder_path):
    s = os.path.join(shared_folder_path, item)
    d = os.path.join(new_folder_path, item)
    if os.path.isdir(s):
        shutil.copytree(s, d, dirs_exist_ok=True)  # Copies directories
    else:
        shutil.copy2(s, d)  # Copies individual files

# List copied files
print("Files in the new folder:")
print(os.listdir(new_folder_path))


Files in the new folder:
['.DS_Store', 'sub29', 'sub43', 'sub17', 'sub32', 'sub12', 'sub40', 'sub36', 'sub42', 'sub11', 'sub16', 'sub20', 'sub18', 'sub27', 'sub44', 'sub19', 'sub26', 'sub21', 'sub28', 'sub10', 'sub1', 'sub6', 'sub8', 'sub9', 'sub7', 'sub0', 'sub35', 'sub34', 'sub33', 'sub15', 'sub24', 'sub23', 'sub41', 'sub22', 'sub25', 'sub13', 'sub14', 'sub5', 'sub2', 'sub3', 'sub4', 'sub31', 'sub38', 'sub39', 'sub30', 'sub37']


In [7]:
import os
import re
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# Root directory paths
root_dir = '/content/drive/MyDrive/gait/'
output_root_dir = '/content/drive/MyDrive/gait/Output/'

# Function to load, round time, and aggregate within each second
def load_and_aggregate(file_path, sensor_type, time_col='timestamp (+0200)'):
    # Check if the file is empty or unreadable
    if os.path.getsize(file_path) == 0:
        print(f"Warning: {file_path} is empty. Skipping this file.")
        return pd.DataFrame()  # Return an empty DataFrame

    df = pd.read_csv(file_path)
    if df.empty:
        print(f"Warning: {file_path} contains no data. Skipping this file.")
        return pd.DataFrame()

    # Replace periods in the time component with colons to match standard ISO format
    df[time_col] = df[time_col].str.replace(r"(\d{4}-\d{2}-\d{2}T\d{2})\.(\d{2})\.(\d{2})\.(\d+)",
                                            r"\1:\2:\3.\4", regex=True)

    # Convert to datetime format
    df[time_col] = pd.to_datetime(df[time_col], errors='coerce')

    # Drop rows where datetime conversion failed (if any)
    df = df.dropna(subset=[time_col])

    # Round to the nearest second (using lowercase 's' to avoid FutureWarning)
    df[time_col] = df[time_col].dt.round('s')

    # Set the appropriate column names based on sensor type
    if sensor_type == 'Accelerometer':
        x_col, y_col, z_col = 'x-axis (g)', 'y-axis (g)', 'z-axis (g)'
    elif sensor_type == 'Gyroscope':
        x_col, y_col, z_col = 'x-axis (deg/s)', 'y-axis (deg/s)', 'z-axis (deg/s)'
    elif sensor_type == 'Magnetometer':
        x_col, y_col, z_col = 'x-axis (T)', 'y-axis (T)', 'z-axis (T)'

    # Group by the rounded time and take the mean for each second
    if all(col in df.columns for col in [x_col, y_col, z_col]):
        df_agg = df.groupby(time_col)[[x_col, y_col, z_col]].mean().reset_index()
    else:
        print(f"Warning: {file_path} missing expected columns. Skipping this file.")
        return pd.DataFrame()

    # Rename columns for clarity
    df_agg.rename(columns={x_col: f'{sensor_type}_x', y_col: f'{sensor_type}_y', z_col: f'{sensor_type}_z'}, inplace=True)

    return df_agg

# Function to standardize and normalize a DataFrame
def standardize_and_normalize(df, columns):
    # Standardize
    scaler = StandardScaler()
    df[columns] = scaler.fit_transform(df[columns])

    # Normalize
    normalizer = MinMaxScaler()
    df[columns] = normalizer.fit_transform(df[columns])

    return df

# Loop through each subdirectory (from sub0 to sub44)
for i in range(45):
    sub_dir = f"sub{i}"
    sub_dir_path = os.path.join(root_dir, sub_dir)
    output_sub_dir = os.path.join(output_root_dir, sub_dir)
    os.makedirs(output_sub_dir, exist_ok=True)

    for body_part in ['knee', 'Arm', 'Thigh']:
        # Use regular expressions to identify files by sensor type, allowing any gibberish before and between
        accel_file = next((f for f in os.listdir(sub_dir_path) if re.search(f".*{body_part}.*Accelerometer", f, re.IGNORECASE)), None)
        gyro_file = next((f for f in os.listdir(sub_dir_path) if re.search(f".*{body_part}.*Gyroscope", f, re.IGNORECASE)), None)
        magnet_file = next((f for f in os.listdir(sub_dir_path) if re.search(f".*{body_part}.*Magnetometer", f, re.IGNORECASE)), None)

        # Continue if any of the files are missing
        if not all([accel_file, gyro_file, magnet_file]):
            print(f"Missing files for {body_part} in {sub_dir}")
            continue

        # Load and aggregate data for each sensor
        accel_df = load_and_aggregate(os.path.join(sub_dir_path, accel_file), 'Accelerometer')
        gyro_df = load_and_aggregate(os.path.join(sub_dir_path, gyro_file), 'Gyroscope')
        magnet_df = load_and_aggregate(os.path.join(sub_dir_path, magnet_file), 'Magnetometer')

        # Skip if any DataFrame is empty
        if accel_df.empty or gyro_df.empty or magnet_df.empty:
            print(f"Skipping {body_part} in {sub_dir} due to missing or empty data.")
            continue

        # Merge DataFrames on rounded timestamp if the column exists in all DataFrames
        if 'timestamp (+0200)' in accel_df.columns and 'timestamp (+0200)' in gyro_df.columns and 'timestamp (+0200)' in magnet_df.columns:
            combined_df = pd.merge(accel_df, gyro_df, on='timestamp (+0200)', how='outer')
            combined_df = pd.merge(combined_df, magnet_df, on='timestamp (+0200)', how='outer')
        else:
            print(f"Skipping {body_part} in {sub_dir} due to missing timestamp column.")
            continue

        # Standardize and normalize the data
        data_columns = combined_df.columns.drop('timestamp (+0200)')
        combined_df = standardize_and_normalize(combined_df, data_columns)

        # Define output file path and save combined data
        output_path = os.path.join(output_sub_dir, f"{body_part.capitalize()}_combined.csv")
        combined_df.to_csv(output_path, index=False)

        print(f"Standardized and normalized combined data for {body_part} saved to: {output_path}")


Standardized and normalized combined data for knee saved to: /content/drive/MyDrive/gait/Output/sub0/Knee_combined.csv
Skipping Arm in sub0 due to missing or empty data.
Skipping Thigh in sub0 due to missing or empty data.
Standardized and normalized combined data for knee saved to: /content/drive/MyDrive/gait/Output/sub1/Knee_combined.csv
Standardized and normalized combined data for Arm saved to: /content/drive/MyDrive/gait/Output/sub1/Arm_combined.csv
Standardized and normalized combined data for Thigh saved to: /content/drive/MyDrive/gait/Output/sub1/Thigh_combined.csv
Standardized and normalized combined data for knee saved to: /content/drive/MyDrive/gait/Output/sub2/Knee_combined.csv
Skipping Arm in sub2 due to missing or empty data.
Skipping Thigh in sub2 due to missing or empty data.
Standardized and normalized combined data for knee saved to: /content/drive/MyDrive/gait/Output/sub3/Knee_combined.csv
Standardized and normalized combined data for Arm saved to: /content/drive/MyD