In [1]:
# import 필요한 라이브러리
!pip install natsort
!pip install pandas
import os
from natsort import natsorted
from pathlib import Path
import shutil
import pandas as pd



Change the patient ID to uppercase.
The consistency of IDs is very important!
Always check that every file follows the format below.

fileName: P002_031220_w_0001_nonleg_imu_knee_angle_moment.csv

meaning:  patientID_visitDate_speed_Numbering_measuredLeg_imu_knee_angle_moment.csv

In [3]:
# Define the directory containing the CSV files with patient IDs
targetDir = "R:\KumarLab3\PROJECTS\wesens\Data\Analysis\smith_dl\IMU Deep Learning\Data\CSV\exported_csv"

# Loop through all files in the target directory
# natsorted() ensures natural sorting (p1, p2, p10 instead of p1, p10, p2)
for filename in natsorted(os.listdir(targetDir)):
    
    # Create the full path to the current file
    old_name = os.path.join(targetDir, filename)
    
    # Create the new filename by replacing the first character with 'P'
    # This assumes all files start with 'p' and we want to change it to 'P'
    # filename[1:] takes everything after the first character
    new_name = os.path.join(targetDir, 'P' + filename[1:])
    
    # Rename the file from old_name to new_name
    os.rename(old_name, new_name)
    

# txt To csv conversion
# Essential
# Always perform when receiving data


In [4]:
# txt to csv convert
txtDir = r"R:\KumarLab3\PROJECTS\wesens\Data\Analysis\smith_dl\IMU Deep Learning\Data\20220325_raw_byDeepak"
csvDir = r"R:\KumarLab3\PROJECTS\wesens\Data\Analysis\smith_dl\IMU Deep Learning\Data\20220325_raw_byDeepak_csv"
# Get naturally sorted list of all files in the source directory
dataList = natsorted([_ for _ in os.listdir(txtDir)])
for Name_datum in dataList:
    # Read the .txt file as tab-separated values
    read_file = pd.read_csv(os.path.join(txtDir, Name_datum),sep='\t')
    # Save as .csv file with extension changed from .txt to .csv
    read_file.to_csv(os.path.join(csvDir, Name_datum.replace(".txt",".csv")))
    

When converting TXT to CSV, place the ‘side’ field of oaleg at the very end.

In [10]:
# txt to csv convert
txtDir = r"R:\KumarLab3\PROJECTS\wesens\Data\Analysis\smith_dl\IMU Deep Learning\Data\20220325_raw_byDeepak"
csvDir = r"R:\KumarLab3\PROJECTS\wesens\Data\Analysis\smith_dl\IMU Deep Learning\Data\allnew_20220325_raw_byDeepak_csv"
dgDir = r"R:\KumarLab3\PROJECTS\wesens\Data\Analysis\smith_dl\IMU Deep Learning\demographics.xlsx"
# Get naturally sorted list of all files in the source directory
dataList = natsorted([_ for _ in os.listdir(txtDir)])
# demographic 관련 - Load demographics Excel file
list_demograph = pd.read_excel(dgDir,engine = 'openpyxl')
for Name_datum in dataList:
    # Extract patient ID from filename (part before first underscore) and lookup 'Side' info
    sideInfo = list_demograph.loc[list_demograph['ID']==Name_datum.split('_')[0],'Side']
    # Read the .txt file as tab-separated values
    read_file = pd.read_csv(os.path.join(txtDir, Name_datum),sep='\t')
    # Change file extension from .txt to .csv
    filename =  Name_datum.replace(".txt",".csv")
    # Add side information to filename: "filename_Left.csv" or "filename_Right.csv"
    filename = '.'.join([filename.split('.')[0]+f'_{sideInfo.values[0]}',filename.split('.')[1]])
    # Save the file with the new filename that includes side information
    read_file.to_csv(os.path.join(csvDir,filename))

In [11]:
sideInfo.values[0]

'R'

Save the CSV files into separate sub-folders within one main folder.
Not needed at the moment.

In [7]:
# Declare the necessary functions.
def ensure_dir(file_path):
    # Create directory if it doesn't exist
    if not os.path.exists(file_path):
        os.makedirs(file_path)

# Specify target directory
dataDir = r"R:\KumarLab3\PROJECTS\wesens\Data\Analysis\smith_dl\IMU Deep Learning\Data\CSV"
sortedFolder = "sorted_csv"
dataExt = r".csv"

# Retrieve the full list of files, select only the required extensions, and from that list keep only the .csv files while excluding everything else.
dataList = natsorted([_ for _ in os.listdir(os.path.join(dataDir,"exported_csv")) if _.endswith(dataExt)])
countForErr = 0

# Split the file names, and then move each part into its own folder.
for datum in dataList:
    try:
        # Split filename by underscores to extract patient information
        # Example: "P001_20220325_Left_Affected_Fast.csv" becomes ["P001", "20220325", "Left", "Affected", "Fast.csv"]
        sep_datum = datum.split("_")
        paName = sep_datum[0]          # Patient name/ID (e.g., "P001")
        paVisitDate = sep_datum[1]     # Visit date (e.g., "20220325")
        paAffectedside = sep_datum[2]  # Affected side (e.g., "Left")
        paDataside = sep_datum[3]      # Data side (e.g., "Affected")
        paSpeed = sep_datum[4]         # Speed condition (e.g., "Fast.csv")
        
        # Required path - create nested folder structure based on file components
        # Result: CSV/sorted_csv/P001/20220325/Affected/Fast.csv/
        saveDir = os.path.join(dataDir, sortedFolder, paName, paVisitDate, paDataside, paSpeed)
        
        # Check path - create the directory structure if it doesn't exist
        ensure_dir(saveDir)
        
        # Move file - copy the file to the new organized location
        if not os.path.exists(os.path.join(saveDir,datum)): # If file already exists, skip it
            shutil.copyfile(os.path.join(dataDir,"exported_csv", datum), os.path.join(saveDir,datum))
    except IndexError:
        # Handle files that don't have the expected underscore structure
        countForErr = countForErr + 1
        print(f'pass the weird file:{countForErr}')

# Combine data classified into folders into one (just in case)
# Currently not needed

In [8]:
# Save the processed folders back into a single file
# Find all .csv files within the folder
dataDir = r"R:\KumarLab3\PROJECTS\wesens\Data\Analysis\smith_dl\IMU Deep Learning\Data\sorted_csv"
exportFolder = r"R:\KumarLab3\PROJECTS\wesens\Data\Analysis\smith_dl\IMU Deep Learning\Data\exported_csv"

# Walk through all subdirectories and files in the sorted folder structure
for (path, dir, files) in os.walk(dataDir):
    for filename in files:
        # Extract file extension to check if it's a CSV file
        ext = os.path.splitext(filename)[-1]
        if ext == '.csv':
            # Copy the CSV file from nested folder back to flat export folder
            shutil.copyfile(os.path.join(path, filename), os.path.join(exportFolder, filename))

# Data to exclude - first filter out columns with anomalies


In [9]:
# Cases where column length is short
# Continuing from the next code...