# Data_Checker
### Purpose
- Save data step by step
- Identify data to exclude
- Automatically exclude data that should be removed
- Data to exclude is located in "20220325_raw_byDeepak_csv" folder
### Data needs to be modified based on which foot was stepped on
### Output Example
- Data_Checked
    - list of data.xlsx
    - [folder] include
    - [folder] exclude


In [1]:
# Import necessary libraries
import os
from natsort import natsorted
from pathlib import Path
import shutil
import pandas as pd
import numpy as np
!pip install tqdm
!pip install matplotlib
from tqdm.notebook import tqdm
from mpl_toolkits.axes_grid1 import host_subplot
import mpl_toolkits.axisartist as AA
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages



In [2]:
# Declare necessary functions
def ensure_dir(file_path):
    """Creates a directory if it doesn't already exist"""
    if not os.path.exists(file_path):
        os.makedirs(file_path)


# Exclude_insufficient_length and step-wise saving
# - Exclude data that doesn't meet minimum length requirements
# - Classify and save data into 1-step and 2-step categories

In [3]:
# First, divide data by steps
# Load complete data list
# Specify target directories
dataDir =       r"R:\KumarLab3\PROJECTS\wesens\Data\Analysis\smith_dl\IMU Deep Learning\Data\allnew_20220325_raw_byDeepak_csv"
rawDir =        r"R:\KumarLab3\PROJECTS\wesens\Data\Analysis\smith_dl\IMU Deep Learning\Data\allnew_20220325_raw_byDeepak_csv\RAW"
includeDir =    r"R:\KumarLab3\PROJECTS\wesens\Data\Analysis\smith_dl\IMU Deep Learning\Data\allnew_20220325_raw_byDeepak_csv\INC_ByStep\RAW"
excludeDir =    r"R:\KumarLab3\PROJECTS\wesens\Data\Analysis\smith_dl\IMU Deep Learning\Data\allnew_20220325_raw_byDeepak_csv\EXC"
dataExt = r".csv"


In [4]:

# Create directories if they don't exist
ensure_dir(includeDir)
ensure_dir(excludeDir)

# Get list of all files in raw directory, filter for CSV files only
dataList = natsorted([_ for _ in os.listdir(rawDir) if _.endswith(dataExt)])
excluded_len = []  # Track number of steps extracted per file (0 = excluded)

# Process each file to check data quality and extract individual steps
# Quality checks:
# 1) Does the file have complete data? Must have at least 1 step of IMU + mechanics data (minimum 75 columns)
# 2) Does the data contain repeated zeros indicating corrupted/incomplete recordings?

Num_column_start = 2  # Skip first 2 empty columns before data begins

for datum in tqdm(dataList):
    # Read individual CSV file
    read_file = pd.read_csv(os.path.join(rawDir, datum))
    
    # Check if file has valid data structure (columns must be multiple of 75)
    if (len(read_file.columns)-2) % 75 == 0 and (len(read_file.columns)-2) // 75 > 0:
        # File has proper structure - calculate maximum number of steps available
        MAXstepCount = (len(read_file.columns)-2) // 75  # Each step = 75 columns of data
        
        # Extract data for each individual step
        for stepcount in range(0, MAXstepCount):
            # Extract IMU sensor data for this step (63 columns per step)
            # Formula: [column_index * total_steps + current_step + offset]
            DFperStep_imu = read_file.iloc[:,[idx*MAXstepCount+stepcount+Num_column_start for idx in range(0,63)]]
            
            # Extract biomechanics/spatial-temporal data for this step (12 columns per step)
            # This data starts after all IMU columns and has different indexing pattern
            DFperStep_SD = read_file.iloc[:, [(63*MAXstepCount)+idx+stepcount*3+Num_column_start for idx in range(0,12*(MAXstepCount)) if idx%(3*MAXstepCount) <3]]
            
            # Combine IMU and biomechanics data, remove completely empty rows
            DFperStep = pd.concat([DFperStep_imu, DFperStep_SD], axis=1).dropna(how='all')
            
            # Create filename for individual step data
            nametoSave = datum.split(".")[0] + "_" + str(stepcount+1) + "_Step.csv"
            
            # Save individual step data to include directory
            DFperStep.to_csv(os.path.join(includeDir, nametoSave), index=False)
        
        # Record how many steps were successfully extracted from this file
        excluded_len.append(stepcount+1)
    else:
        # File doesn't meet quality standards - move to exclude directory
        read_file.to_csv(os.path.join(excludeDir, datum), index=False)
        excluded_len.append(0)  # 0 indicates file was excluded

# Create summary report of processing results
listofname = pd.DataFrame(dataList, columns=["fileName"])
listofExcluded = pd.DataFrame(excluded_len, columns=["No. of Included"])
result = pd.concat([listofname, listofExcluded], axis=1)

# Calculate statistics
numofexcluded = (listofExcluded[listofExcluded.columns[0]] == 0).sum()  # Files completely excluded
numofonestep = (listofExcluded[listofExcluded.columns[0]] == 1).sum()   # Files with 1 step extracted
numoftwostep = (listofExcluded[listofExcluded.columns[0]] == 2).sum()   # Files with 2 steps extracted

# Save processing summary to Excel file
result.to_excel(os.path.join(dataDir, "list_Excluded_DataByStep.xlsx"))

  0%|          | 0/1304 [00:00<?, ?it/s]

In [5]:
# Data verification summary
Numtotal = numofexcluded+numofonestep+numoftwostep
print(f"No. Excluded: {numofexcluded} \nNo. 1 step Included: {numofonestep} \nNo. 2 step Included: {numoftwostep} \nNo. total(verify): {Numtotal} \nNo. total(trueval): {len(dataList)}")
print("==========================")
print(f"INC_ByStep contains {numofonestep + 2*numoftwostep} files = No. 1 step included + 2 * No. 2 step included(s)")

No. Excluded: 391 
No. 1 step Included: 828 
No. 2 step Included: 85 
No. total(verify): 1304 
No. total(trueval): 1304
INC_ByStep contains 998 files = No. 1 step included + 2 * No. 2 step included(s)


# Exclude columns filled with only zeros

In [6]:
# Filter out step-wise data that contains columns with only zeros
# Reference: https://stackoverflow.com/questions/21164910/how-do-i-delete-a-column-that-contains-only-zeros-in-pandas

# Define directory paths for zero-column filtering process
dataDir =       r'R:\KumarLab3\PROJECTS\wesens\Data\Analysis\smith_dl\IMU Deep Learning\Data\allnew_20220325_raw_byDeepak_csv\INC_ByStep'
rawDir =       r'R:\KumarLab3\PROJECTS\wesens\Data\Analysis\smith_dl\IMU Deep Learning\Data\allnew_20220325_raw_byDeepak_csv\INC_ByStep\RAW'
includeDir =    r'R:\KumarLab3\PROJECTS\wesens\Data\Analysis\smith_dl\IMU Deep Learning\Data\allnew_20220325_raw_byDeepak_csv\INC_ByStep\INC_ByZero\RAW'
excludeDir =    r'R:\KumarLab3\PROJECTS\wesens\Data\Analysis\smith_dl\IMU Deep Learning\Data\allnew_20220325_raw_byDeepak_csv\INC_ByStep\EXC'

# Create output directories if they don't exist
ensure_dir(includeDir)
ensure_dir(excludeDir)
dataExt = r".csv"

# Get list of all CSV files from the step-classified directory
dataList = natsorted([_ for _ in os.listdir(rawDir) if _.endswith(dataExt)])
excluded_zero = []  # Track which files pass/fail zero-column test (1=pass, 0=fail)

# Process each file to identify and remove zero-only columns
for datum in tqdm(dataList):
    # Read individual step file
    read_file = pd.read_csv(os.path.join(rawDir, datum))
    
    # Create working copy starting from row 4 (skip header/metadata rows)
    tmp = read_file.iloc[4:,:].copy()
    
    # Convert all data to float for numerical analysis
    tmp = tmp.astype(float)
    
    # Remove columns that contain only zeros
    # (tmp != 0).any(axis=0) returns True for columns with at least one non-zero value
    tmp = tmp.loc[:,(tmp != 0).any(axis=0)]
    
    # Check if remaining data still has valid structure (multiple of 75 columns)
    if (len(tmp.columns)) % 75 == 0 and (len(tmp.columns)) // 75 > 0:
        # Data is still valid after removing zero columns - save to include directory
        read_file.to_csv(os.path.join(includeDir, datum), index=False)
        excluded_zero.append(1)  # Mark as included
    else:
        # Removing zero columns broke the data structure - exclude this file
        read_file.to_csv(os.path.join(excludeDir, datum), index=False)
        excluded_zero.append(0)  # Mark as excluded

# Create summary report of zero-column filtering results
listofname = pd.DataFrame(dataList, columns=["fileName"])
listofExcluded = pd.DataFrame(excluded_zero, columns=["No. of Included"])

# Calculate statistics
numofexcluded = (listofExcluded[listofExcluded.columns[0]] == 0).sum()  # Files excluded due to zero columns
numofincluded = (listofExcluded[listofExcluded.columns[0]] == 1).sum()  # Files that passed zero-column test

# Save filtering results to Excel file
result = pd.concat([listofname, listofExcluded], axis=1)
result.to_excel(os.path.join(dataDir, "list_Excluded_byZero.xlsx"))

  0%|          | 0/998 [00:00<?, ?it/s]

In [7]:
Numtotal = numofexcluded+numofincluded
print(f"No. Excluded: {numofexcluded} \nNo.Included: {numofincluded} \nNo. total(verify): {Numtotal} \nNo. total(truevl): {len(dataList)}")
print("==========================")
print(f"includ folder contain {numofincluded} file(s)")

No. Excluded: 95 
No.Included: 903 
No. total(verify): 998 
No. total(truevl): 998
includ folder contain 903 file(s)


# Axis conversion based on which foot was stepped
# - oaleg - R : Right foot moment measurement
# - oaleg - L : Left foot moment measurement  
# - nonleg- R : Left foot moment measurement
# - nonleg- L : Right foot moment measurement

In [8]:
rawDir =        r'R:\KumarLab3\PROJECTS\wesens\Data\Analysis\smith_dl\IMU Deep Learning\Data\allnew_20220325_raw_byDeepak_csv\INC_ByStep\INC_ByZero\RAW'
targetDir =    r'R:\KumarLab3\PROJECTS\wesens\Data\Analysis\smith_dl\IMU Deep Learning\Data\allnew_20220325_raw_byDeepak_csv\INC_ByStep\INC_ByZero\RAW_AXIS_corrected'

In [9]:
def nonlegisLeft(sideInfo):
    if (sideInfo == 'R'):
        return True
    else:
        return False

In [10]:
ensure_dir(targetDir)
dataExt = r".csv"

# Get list of all CSV files from the directory
dataList = natsorted([_ for _ in os.listdir(rawDir) if _.endswith(dataExt)])
excluded_zero = []

for datum in tqdm(dataList):
    # Read individual file
    read_file = pd.read_csv(os.path.join(rawDir, datum))
    
    # Convert data rows (starting from row 4) to float for numerical processing
    read_file.iloc[4:,:] = read_file.iloc[4:,:].astype(float)
    
    # Extract side and leg information from filename
    sideInfo = datum.split('_')[-3]  # Get affected side info
    legInfo = datum.split('_')[4]    # Get leg info
    
    # If nonleg is the left foot
    if nonlegisLeft(sideInfo):
        # Define target columns for axis conversion (skip certain column ranges)
        # Excludes columns where (col%9 >0 and col%9<4) or (col%9 >5 and col%9<9)
        targetcol = [_ for _ in range(0,27) if not ((_%9 >0) and (_%9<4)) or ((_%9 >5) and (_%9<9))]
        
        # Apply negative transformation to selected columns
        for col in targetcol:
            read_file.iloc[4:,col] = read_file.iloc[4:,col].apply(lambda x:x*-1)
        # Modify nonleg data!
    
    # If nonleg is not the left foot (i.e., nonleg is right foot)
    else:
        # Define target columns with offset of 27 for the other leg's data
        targetcol = [_ + 27 for _ in range(0,27) if not ((_%9 >0) and (_%9<4)) or ((_%9 >5) and (_%9<9))]
        
        # Apply negative transformation to selected columns
        for col in targetcol:
            read_file.iloc[4:,col] = read_file.iloc[4:,col].apply(lambda x:x*-1)
        # Modify oaleg data!
    
    # Save the processed file with axis corrections
    read_file.to_csv(os.path.join(targetDir,datum),index=False)

  0%|          | 0/903 [00:00<?, ?it/s]

# PDF plotting for visual inspection
# - Generate PDF plots for manual visual verification

In [11]:
# Define necessary functions
def axis3plot(position, data, name):
    """
    Creates a 3-axis plot with X, Y, Z data on separate y-axes
    Args:
        position: subplot position for matplotlib
        data: DataFrame with 3 columns (X, Y, Z axis data)
        name: label for the plot/sensor type
    """
    # Plot layout parameters
    subAJ_right = 0.5      # Right margin adjustment
    subAJ_top = 1.5        # Top margin adjustment  
    subAJ_wspace = 0.5     # Width spacing between subplots
    sub_offset = 40        # Offset for third y-axis
    
    # Convert data to float for plotting
    data = data.astype(float)
    
    # Create main plot host with custom axes
    host = host_subplot(position, axes_class=AA.Axes)
    plt.subplots_adjust(right=subAJ_right, top=subAJ_top, wspace=subAJ_wspace)
    
    # Create two additional y-axes (twin axes) for Y and Z data
    par1 = host.twinx()  # Second y-axis for Y data
    par2 = host.twinx()  # Third y-axis for Z data
    
    # Position the third y-axis with an offset to avoid overlap
    offset = sub_offset
    new_fixed_axis = par2.get_grid_helper().new_fixed_axis
    par2.axis["right"] = new_fixed_axis(loc="right",
                                        axes=par2,
                                        offset=(offset, 0))
    
    # Enable visibility for the right-side y-axes
    par1.axis["right"].toggle(all=True) 
    par2.axis["right"].toggle(all=True)
    
    # Set axis labels
    host.set_xlabel(f"Time/{name}")  # X-axis shows time with sensor name
    
    # Plot the three data series (X, Y, Z) on their respective axes
    p1, = host.plot(range(0, len(data)), np.array(data.iloc[:,0]), label="X_axis")  # X data on main axis
    p2, = par1.plot(range(0, len(data)), np.array(data.iloc[:,1]), label="Y_axis")  # Y data on second axis
    p3, = par2.plot(range(0, len(data)), np.array(data.iloc[:,2]), label="Z_axis")  # Z data on third axis
    
    # Add legend to identify the three lines
    host.legend()
    
    # Color-code the y-axis labels, ticks, and tick labels to match their data lines
    host.axis["left"].label.set_color(p1.get_color())           # X-axis (left) matches X data color
    par1.axis["right"].label.set_color(p2.get_color())         # Y-axis (right) matches Y data color  
    par2.axis["right"].label.set_color(p3.get_color())         # Z-axis (offset right) matches Z data color
    
    host.axis["left"].major_ticks.set_color(p1.get_color())     # Color the tick marks
    par1.axis["right"].major_ticks.set_color(p2.get_color())
    par2.axis["right"].major_ticks.set_color(p3.get_color())
    
    host.axis["left"].major_ticklabels.set_color(p1.get_color()) # Color the tick labels
    par1.axis["right"].major_ticklabels.set_color(p2.get_color())
    par2.axis["right"].major_ticklabels.set_color(p3.get_color())

In [12]:
# Generate images and visually assess data to determine what should be excluded
# Here we only generate the images
# Visual inspection and exclusion will be done separately in a .py script

# Define necessary directories
dataDir =       r'R:\KumarLab3\PROJECTS\wesens\Data\Analysis\smith_dl\IMU Deep Learning\Data\allnew_20220325_raw_byDeepak_csv\INC_ByStep\INC_ByZero'
rawDir =        r'R:\KumarLab3\PROJECTS\wesens\Data\Analysis\smith_dl\IMU Deep Learning\Data\allnew_20220325_raw_byDeepak_csv\INC_ByStep\INC_ByZero\AXIS_correction'
figDir =        r'R:\KumarLab3\PROJECTS\wesens\Data\Analysis\smith_dl\IMU Deep Learning\Data\allnew_20220325_raw_byDeepak_csv\INC_ByStep\INC_ByZero\FIG'
includeDir =    r'R:\KumarLab3\PROJECTS\wesens\Data\Analysis\smith_dl\IMU Deep Learning\Data\allnew_20220325_raw_byDeepak_csv\INC_ByStep\INC_ByZero\Included_checked\RAW'
excludeDir =    r'R:\KumarLab3\PROJECTS\wesens\Data\Analysis\smith_dl\IMU Deep Learning\Data\allnew_20220325_raw_byDeepak_csv\INC_ByStep\INC_ByZero\EXC'

# Create output directories if they don't exist
ensure_dir(figDir)
ensure_dir(includeDir)
ensure_dir(excludeDir)
dataExt = r".csv"

# Get list of all CSV files from the axis-corrected directory
dataList = natsorted([_ for _ in os.listdir(rawDir) if _.endswith(dataExt)])
excluded_zero = []

for datum in tqdm(dataList):
    # Read data file
    read_file = pd.read_csv(os.path.join(rawDir, datum))
    
    # Generate figure only if PDF doesn't already exist
    if not os.path.exists(os.path.join(figDir, f'{datum.split(".")[0]}.pdf')):
        pp = PdfPages(os.path.join(figDir, f'{datum.split(".")[0]}.pdf'))
        
        ################################# Page 1 ###############
        # Plot first 9 sensor groups (columns 0-26)
        plt.figure(figsize=(15, 8))
        for i in range(0, 9):
            axis3plot(331+i, read_file.iloc[4:, 3*i:3+i*3], '_'.join(read_file.iloc[0, 3*i:3+i*3][0].split('_')[:-1]))
        plt.tight_layout()
        pp.savefig()
        plt.close()
        
        ################################# Page 2 ###############
        # Starting column position for second leg data
        col_startForFig = 27
        plt.figure(figsize=(15, 8))
        for i in range(0, 9):
            axis3plot(331+i, read_file.iloc[4:, col_startForFig+3*i:col_startForFig+3+i*3], '_'.join(read_file.iloc[0, col_startForFig+3*i:col_startForFig+3+i*3][0].split('_')[:-1]))
        plt.tight_layout()
        pp.savefig()
        plt.close()
        
        ################################# Page 3 ###############
        # Starting column position for additional sensor data
        col_startForFig = 54
        plt.figure(figsize=(15, 8))
        for i in range(0, 3):
            axis3plot(331+i, read_file.iloc[4:, col_startForFig+3*i:col_startForFig+3+i*3], '_'.join(read_file.iloc[0, col_startForFig+3*i:col_startForFig+3+i*3][0].split('_')[:-1]))
        plt.tight_layout()
        pp.savefig()
        plt.close()
        
        ################################# Page 4 ###############
        col_startForFig = 63
        plt.figure(figsize=(15, 8))
        for i in range(0, 4):
            axis3plot(331+i, read_file.iloc[4:, col_startForFig+3*i:col_startForFig+3+i*3], '_'.join(read_file.iloc[0, col_startForFig+3*i:col_startForFig+3+i*3][0].split('_')[:-1]))
        plt.tight_layout()
        pp.savefig()
        plt.close()
        
        ############################ After all plots are generated ############################
        pp.close()
        
        # Note: Creating 903 PDFs takes about 2 hours - is this correct?
        # Processing speed is fast, but reading/writing data over network seems slow
        # About 3 seconds per file

FileNotFoundError: [WinError 3] The system cannot find the path specified: 'R:\\KumarLab3\\PROJECTS\\wesens\\Data\\Analysis\\smith_dl\\IMU Deep Learning\\Data\\allnew_20220325_raw_byDeepak_csv\\INC_ByStep\\INC_ByZero\\AXIS_correction'

# 끝

## Why is there a size difference between the original data (908 files) and the newly created data (903 files)?
- The data count of 903 files is correct
- But the original dataset contains 5 extra files
- Let's find the culprit!

In [None]:
# Define paths to compare PDF files between new and original datasets
allnew = r'R:\KumarLab3\PROJECTS\wesens\Data\Analysis\smith_dl\IMU Deep Learning\Data\allnew_20220325_raw_byDeepak_csv\INC_ByStep\INC_ByZero\FIG'
origin = r'R:\KumarLab3\PROJECTS\wesens\Data\Analysis\smith_dl\IMU Deep Learning\Data\20220325_raw_byDeepak_csv\INC_ByStep\INC_ByZero\FIG'
dataExt = r".pdf"

# Get complete file lists and filter for PDF files only
allnewdataList = natsorted([_ for _ in os.listdir(allnew) if _.endswith(dataExt)])  # New dataset PDF files
origindataList = natsorted([_ for _ in os.listdir(origin) if _.endswith(dataExt)])  # Original dataset PDF files

In [None]:
# Process filename to remove step information for comparison
# Take the first filename and split it by underscores
str_name = np.array(allnewdataList[0].split('_'))

# Remove the third-to-last element (likely the step number like "1" or "2")
str_name = np.delete(str_name, -3)

# Rejoin the parts back into a filename string
str_name = '_'.join(str_name)

# Display the processed filename
str_name

In [None]:
# Process all filenames to create standardized names for comparison
renamed = []
for data in allnewdataList:
    # Split filename by underscores into array
    str_name = np.array(data.split('_'))
    
    # Remove the third-to-last element (step number) from the filename
    str_name = np.delete(str_name, -3)
    
    # Rejoin the parts back into a standardized filename
    str_name = '_'.join(str_name)
    
    # Add the processed filename to the list
    renamed.append(str_name)


In [None]:
# Find files that exist in one dataset but not the other (symmetric difference)
# This will show files that are only in the original dataset OR only in the new dataset
set(origindataList) ^ set(renamed)

# Supplementary - Newly learned code this time

In [None]:
############# Scalable code!!
## We did it!

# Configuration parameters
Num_column_start = 2    # Number of columns to skip at the beginning (empty columns)
MAXstepCount = 3        # Maximum number of steps in the data
stepcount = 2           # Current step to extract (0-indexed, so this is step 3)

# Complex column indexing formula for extracting biomechanics data for a specific step
# This extracts spatial-temporal/biomechanics data (12 columns per step) for the specified step
[(63*MAXstepCount) + idx + stepcount*3 + Num_column_start 
 for idx in range(0, 12*(MAXstepCount)) 
 if idx%(3*MAXstepCount) < 3]

# Breakdown of the formula:
# - (63*MAXstepCount): Skip all IMU data columns (63 columns × number of steps)
# - idx + stepcount*3: Navigate to the correct step's data within the biomechanics section
# - Num_column_start: Account for initial empty columns
# - idx%(3*MAXstepCount) < 3: Filter condition to select only the first 3 columns of each group

In [None]:
# Example of creating multiple 3-axis plots in a grid layout
# Reference: https://matplotlib.org/3.5.0/gallery/axisartist/demo_parasite_axes2.html

from mpl_toolkits.axes_grid1 import host_subplot
import mpl_toolkits.axisartist as AA
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

# Plot layout parameters
subAJ_right = 1.5      # Right margin adjustment
subAJ_top = 1.5        # Top margin adjustment  
subAJ_wspace = 0.4     # Width spacing between subplots
sub_offset = 15        # Offset for third y-axis

# =============== First subplot (position 331) ===============
host = host_subplot(331, axes_class=AA.Axes)
plt.subplots_adjust(right=subAJ_right, top=subAJ_top, wspace=subAJ_wspace)

# Create twin axes for Y and Z data
par1 = host.twinx()  # Second y-axis
par2 = host.twinx()  # Third y-axis

# Position the third y-axis with offset to avoid overlap
offset = sub_offset
new_fixed_axis = par2.get_grid_helper().new_fixed_axis
par2.axis["right"] = new_fixed_axis(loc="right", axes=par2, offset=(offset, 0))

# Enable visibility for right-side axes
par1.axis["right"].toggle(all=True) 
par2.axis["right"].toggle(all=True)

# Set labels and plot sample data
host.set_xlabel("Time")
p1, = host.plot([0, 1, 2], [0, 1, 2], label="X_axis")     # X data on main axis
p2, = par1.plot([0, 1, 2], [0, 3, 2], label="Y_axis")    # Y data on second axis
p3, = par2.plot([0, 1, 2], [50, 30, 15], label="Z_axis") # Z data on third axis

# Add legend
host.legend()

# Color-code axes to match their data lines
host.axis["left"].label.set_color(p1.get_color())
par1.axis["right"].label.set_color(p2.get_color())
par2.axis["right"].label.set_color(p3.get_color())

host.axis["left"].major_ticks.set_color(p1.get_color())
par1.axis["right"].major_ticks.set_color(p2.get_color())
par2.axis["right"].major_ticks.set_color(p3.get_color())

host.axis["left"].major_ticklabels.set_color(p1.get_color())
par1.axis["right"].major_ticklabels.set_color(p2.get_color())
par2.axis["right"].major_ticklabels.set_color(p3.get_color())

# =============== Second subplot (position 332) ===============
host = host_subplot(332, axes_class=AA.Axes)
plt.subplots_adjust(right=subAJ_right, top=subAJ_top, wspace=subAJ_wspace)

# Create twin axes
par1 = host.twinx()
par2 = host.twinx()

# Position third y-axis
offset = sub_offset
new_fixed_axis = par2.get_grid_helper().new_fixed_axis
par2.axis["right"] = new_fixed_axis(loc="right", axes=par2, offset=(offset, 0))

par1.axis["right"].toggle(all=True) 
par2.axis["right"].toggle(all=True)

# Plot different sample data for this subplot
host.set_xlabel("Time")
p1, = host.plot([0, 1, 2], [0, 2, 1], label="X_axis")
p2, = par1.plot([0, 1, 2], [0, 5, 2], label="Y_axis")
p3, = par2.plot([0, 1, 2], [50, 2, 15], label="Z_axis")

host.legend()

# Color-code axes
host.axis["left"].label.set_color(p1.get_color())
par1.axis["right"].label.set_color(p2.get_color())
par2.axis["right"].label.set_color(p3.get_color())

host.axis["left"].major_ticks.set_color(p1.get_color())
par1.axis["right"].major_ticks.set_color(p2.get_color())
par2.axis["right"].major_ticks.set_color(p3.get_color())

host.axis["left"].major_ticklabels.set_color(p1.get_color())
par1.axis["right"].major_ticklabels.set_color(p2.get_color())
par2.axis["right"].major_ticklabels.set_color(p3.get_color())

# =============== Third subplot (position 333) ===============
host = host_subplot(333, axes_class=AA.Axes)
plt.subplots_adjust(right=subAJ_right, top=subAJ_top, wspace=subAJ_wspace)

par1 = host.twinx()
par2 = host.twinx()

offset = sub_offset
new_fixed_axis = par2.get_grid_helper().new_fixed_axis
par2.axis["right"] = new_fixed_axis(loc="right", axes=par2, offset=(offset, 0))

par1.axis["right"].toggle(all=True) 
par2.axis["right"].toggle(all=True)

host.set_xlabel("Time")
p1, = host.plot([0, 1, 2], [0, 1, 2], label="X_axis")
p2, = par1.plot([0, 1, 2], [0, 3, 2], label="Y_axis")
p3, = par2.plot([0, 1, 2], [50, 30, 15], label="Z_axis")

host.legend()

# Color-code axes
host.axis["left"].label.set_color(p1.get_color())
par1.axis["right"].label.set_color(p2.get_color())
par2.axis["right"].label.set_color(p3.get_color())

host.axis["left"].major_ticks.set_color(p1.get_color())
par1.axis["right"].major_ticks.set_color(p2.get_color())
par2.axis["right"].major_ticks.set_color(p3.get_color())

host.axis["left"].major_ticklabels.set_color(p1.get_color())
par1.axis["right"].major_ticklabels.set_color(p2.get_color())
par2.axis["right"].major_ticklabels.set_color(p3.get_color())

# =============== Fourth subplot (position 334) ===============
host = host_subplot(334, axes_class=AA.Axes)
plt.subplots_adjust(right=subAJ_right, top=subAJ_top, wspace=subAJ_wspace)

par1 = host.twinx()
par2 = host.twinx()

offset = sub_offset
new_fixed_axis = par2.get_grid_helper().new_fixed_axis
par2.axis["right"] = new_fixed_axis(loc="right", axes=par2, offset=(offset, 0))

par1.axis["right"].toggle(all=True) 
par2.axis["right"].toggle(all=True)

host.set_xlabel("Time")
p1, = host.plot([0, 1, 2], [0, 2, 1], label="X_axis")
p2, = par1.plot([0, 1, 2], [0, 5, 2], label="Y_axis")
p3, = par2.plot([0, 1, 2], [50, 2, 15], label="Z_axis")

host.legend()

# Color-code axes
host.axis["left"].label.set_color(p1.get_color())
par1.axis["right"].label.set_color(p2.get_color())
par2.axis["right"].label.set_color(p3.get_color())

host.axis["left"].major_ticks.set_color(p1.get_color())
par1.axis["right"].major_ticks.set_color(p2.get_color())
par2.axis["right"].major_ticks.set_color(p3.get_color())

host.axis["left"].major_ticklabels.set_color(p1.get_color())
par1.axis["right"].major_ticklabels.set_color(p2.get_color())
par2.axis["right"].major_ticklabels.set_color(p3.get_color())

# Finalize layout and save to PDF
plt.tight_layout()

result_fig = plt.draw()
# Save the complete figure to PDF
pp = PdfPages(os.path.join(r'Z:\PROJECTS\iwalqq\Data\V3D\Output\IMU Deep Learning\Data\20220325_raw_byDeepak_csv\INC_ByStep\INC_ByZero\FIG','line_plot.pdf'))
pp.savefig(result_fig)
pp.close()


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# sinusoidal sample data
sample_length = range(1, 15+1)
rads = np.arange(0, 2*np.pi, 0.01)
data = np.array([np.sin(t*rads) for t in sample_length])
df = pd.DataFrame(data.T, index=pd.Series(rads.tolist(), name='radians'), columns=[f'freq: {i}x' for i in sample_length])


# default plot with subplots; each column is a subplot
axes = df.plot(subplots=True)

In [None]:
axes = df.plot(subplots=True, layout=(3, 5), figsize=(25, 16), sharex=True, sharey=True)

# flatten the axes array to easily access any subplot
axes = axes.flat

# extract the figure object
fig = axes[0].get_figure()

# use tight_layout
fig.tight_layout()

In [None]:
# 함수화
def axis3plot(position,data,name):
    subAJ_right = 0.5
    subAJ_top = 1.5
    subAJ_wspace = 0.5
    sub_offset= 40

    data = data.astype(float)
    
    host = host_subplot(position, axes_class=AA.Axes)
    plt.subplots_adjust(right=subAJ_right,top=subAJ_top, wspace=subAJ_wspace)

    par1 = host.twinx()
    par2 = host.twinx()

    offset = sub_offset
    new_fixed_axis = par2.get_grid_helper().new_fixed_axis
    par2.axis["right"] = new_fixed_axis(loc="right",
                                        axes=par2,
                                        offset=(offset, 0))

    par1.axis["right"].toggle(all=True) 
    par2.axis["right"].toggle(all=True)

    # host.set_xlim(0, 2)
    # host.set_ylim(0, 2)

    host.set_xlabel(f"Time/{name}")
    # host.set_ylabel("X_axis")
    # par1.set_ylabel("Y_axis")
    # par2.set_ylabel("Z_axis")

    p1, = host.plot(range(0,len(data)), np.array(data.iloc[:,0]), label="X_axis")
    p2, = par1.plot(range(0,len(data)),  np.array(data.iloc[:,1]), label="Y_axis")
    p3, = par2.plot(range(0,len(data)),  np.array(data.iloc[:,2]), label="Z_axis")

    # par1.set_ylim(0, 4)
    # par2.set_ylim(1, 65)

    host.legend()

    host.axis["left"].label.set_color(p1.get_color())
    par1.axis["right"].label.set_color(p2.get_color())
    par2.axis["right"].label.set_color(p3.get_color())

    host.axis["left"].major_ticks.set_color(p1.get_color())
    par1.axis["right"].major_ticks.set_color(p2.get_color())
    par2.axis["right"].major_ticks.set_color(p3.get_color())

    host.axis["left"].major_ticklabels.set_color(p1.get_color())
    par1.axis["right"].major_ticklabels.set_color(p2.get_color())
    par2.axis["right"].major_ticklabels.set_color(p3.get_color())

plt.figure(figsize=(15, 8))
axis3plot(331,non_shank_acc,'non_shank_acc')
axis3plot(332,non_shank_acc,'non_shank_acc')
axis3plot(333,non_shank_acc,'non_shank_acc')
axis3plot(334,non_shank_acc,'non_shank_acc')
axis3plot(335,non_shank_acc,'non_shank_acc')
axis3plot(336,non_shank_acc,'non_shank_acc')
axis3plot(338,non_shank_acc,'non_shank_acc')
plt.tight_layout()

ressult_fig = plt.draw()
# plt.show()
pp = PdfPages(os.path.join(r'Z:\PROJECTS\iwalqq\Data\V3D\Output\IMU Deep Learning\Data\20220325_raw_byDeepak_csv\INC_ByStep\INC_ByZero\FIG','line_plot.pdf'))
pp.savefig(ressult_fig)
pp.close()

In [None]:
import re
A = 'W002_20210715_isok_concon60_0001_Miqus_21_25762.avi'
p = re.compile(r'_([0-9]{5})\.')
p.search(A)[0][1:-1]