# Get the motion data

InHARD dataset contains motion data of operators executing assembly tasks. the motion data contains joint positions and joint rotations at each frame. 

Data is stored in BVH files including the hiarchy of the skeleton.

In [1]:
import pandas as pd

# Specify the path to your BVH file
bvh_file_path = "./InHARD_13/Skeleton/P01_R01.bvh"

Joint = ['Hips',
'RightUpLeg','RightLeg','RightFoot',
'LeftUpLeg','LeftLeg','LeftFoot',
'Spine','Spine1','Spine2','Spine3','Neck','Head',
'RightShoulder','RightArm','RightForeArm','RightHand','RightHandThumb1','RightHandThumb2','RightHandThumb3',
'RightInHandIndex','RightHandIndex1','RightHandIndex2','RightHandIndex3',
'RightInHandMiddle','RightHandMiddle1','RightHandMiddle2','RightHandMiddle3',
'RightInHandRing','RightHandRing1','RightHandRing2','RightHandRing3',
'RightInHandPinky','RightHandPinky1','RightHandPinky2','RightHandPinky3',
'LeftShoulder','LeftArm','LeftForeArm','LeftHand','LeftHandThumb1','LeftHandThumb2','LeftHandThumb3',
'LeftInHandIndex','LeftHandIndex1','LeftHandIndex2','LeftHandIndex3',
'LeftInHandMiddle','LeftHandMiddle1','LeftHandMiddle2','LeftHandMiddle3',
'LeftInHandRing','LeftHandRing1','LeftHandRing2','LeftHandRing3',
'LeftInHandPinky','LeftHandPinky1','LeftHandPinky2','LeftHandPinky3']

print("nbr of joints:", len(Joint))

joints = []
for joint in Joint:
    joints.append(joint + '_xp')
    joints.append(joint + '_yp')
    joints.append(joint + '_zp')
    joints.append(joint + '_yo')
    joints.append(joint + '_xo')
    joints.append(joint + '_zo')
    
print("nbr of columns:", len(joints))

nbr of joints: 59
nbr of columns: 354


In [2]:
import os
import pandas as pd
from itertools import islice

directory_path = './InHARD_13/Skeleton'  # Replace with the actual path to your directory
# Create a dictionary to store DataFrames
dataframes_dict = {}

# Iterate through all files in the directory
for filename in os.listdir(directory_path):
    if filename.endswith(".bvh"):
        file_path = os.path.join(directory_path, filename)
        
        
        # Open the BVH file for reading
        with open(file_path, 'r') as file:
            # Set a counter variable to keep track of the current line number
            line_number = 0
            data = []

            # Iterate over each line in the file
            for line in file:
                # Increment the line number
                line_number += 1

                # Check if the current line number is greater than or equal to 353
                if line_number >= 353:
                    # Split the line into a list of values
                    values = line.split()
                    data.append(values)

        # Read the bvh file into a DataFrame,
        skeleton = pd.DataFrame(data, columns=joints)

        # Rename the DataFrame based on the file name
        skeleton_name = f"{filename.split('.')[0]}"

        # Store the DataFrame in the dictionary
        dataframes_dict[skeleton_name] = skeleton
        print(skeleton_name)

P01_R01
P01_R02
P01_R03
P02_R01
P02_R02
P03_R01
P03_R03
P03_R04
P04_R01
P04_R02
P05_R01
P05_R02
P05_R03
P05_R04
P06_R01
P07_R01
P07_R02
P08_R01
P08_R02
P08_R03
P08_R04
P09_R01
P09_R02
P09_R03
P10_R01
P10_R02
P10_R03
P11_R01
P11_R02
P12_R01
P12_R02
P13_R02
P14_R01
P14_R02
P15_R01
P15_R02
P16_R01
P16_R02


In [4]:
data = dataframes_dict[list(dataframes_dict.keys())[0]]

Get only joint positions (_xp, _yp, _zp)

In [5]:
# Select columns you want in the new DataFrame

# Store column names that end with '_Xp', '_Yp', '_Zp' in a list
selected_columns = [col for col in data.columns if col.endswith('_xp') or col.endswith('_yp') or col.endswith('_zp')]

# Iterate through the original dictionary and create new DataFrames with selected columns
for key, df in dataframes_dict.items():
    dataframes_dict[key] = df[selected_columns]

In [6]:
# Display one of the selected DataFrames
dataframes_dict[list(dataframes_dict.keys())[0]].isnull().any()

Hips_xp              False
Hips_yp              False
Hips_zp              False
RightUpLeg_xp        False
RightUpLeg_yp        False
                     ...  
LeftHandPinky2_yp    False
LeftHandPinky2_zp    False
LeftHandPinky3_xp    False
LeftHandPinky3_yp    False
LeftHandPinky3_zp    False
Length: 177, dtype: bool

# Define resolution (30fps)
Default resolution is 120 fps. 

In [7]:
import pandas as pd

step = 4 # for 30fps: default 119fps 

# Define a function to create the new dataframe
def create_new_dataframe(old_df, interval):
    new_data = []
    for i in range(0, len(old_df), interval):
        # Take the last row in each interval
        new_data.append(old_df.iloc[i:i+interval].iloc[-1])
    # Create a new dataframe from the collected data
    new_dataframe = pd.DataFrame(new_data)
    return new_dataframe

# Iterate through the original dictionary and create new filtered DataFrames
for key, df in dataframes_dict.items():
    # Create the new dataframe
    dataframes_dict[key] = create_new_dataframe(df, step)

In [8]:
dataframes_dict[list(dataframes_dict.keys())[0]]

Unnamed: 0,Hips_xp,Hips_yp,Hips_zp,RightUpLeg_xp,RightUpLeg_yp,RightUpLeg_zp,RightLeg_xp,RightLeg_yp,RightLeg_zp,RightFoot_xp,...,LeftInHandPinky_zp,LeftHandPinky1_xp,LeftHandPinky1_yp,LeftHandPinky1_zp,LeftHandPinky2_xp,LeftHandPinky2_yp,LeftHandPinky2_zp,LeftHandPinky3_xp,LeftHandPinky3_yp,LeftHandPinky3_zp
3,21.217239,94.664818,23.341904,-9.283305,-1.613265,-0.035834,-0.053209,-41.864525,-0.146822,-0.359916,...,-1.201789,4.141052,-0.022105,-1.090526,2.756526,0.000000,0.000000,1.741895,0.000000,0.000000
7,21.362213,94.663223,23.430948,-9.285585,-1.610352,-0.036885,-0.071158,-41.869015,-0.126640,-0.463801,...,-1.201789,4.141052,-0.022105,-1.090526,2.756526,0.000000,0.000000,1.741895,0.000000,0.000000
11,21.613129,94.656242,23.567194,-9.284847,-1.612040,-0.039106,-0.056709,-41.868813,-0.124166,-0.594668,...,-1.201789,4.141052,-0.022105,-1.090526,2.756526,0.000000,0.000000,1.741895,0.000000,0.000000
15,21.738607,94.654129,23.584167,-9.283081,-1.611117,-0.022285,-0.061013,-41.872177,-0.078517,-0.618199,...,-1.201789,4.141052,-0.022105,-1.090526,2.756526,0.000000,0.000000,1.741895,0.000000,0.000000
19,21.697111,94.656281,23.424967,-9.285315,-1.615747,-0.008895,-0.063805,-41.874130,-0.056839,-0.469790,...,-1.201789,4.141052,-0.022105,-1.090526,2.756526,0.000000,0.000000,1.741895,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62491,-38.897144,94.459602,-42.383083,-9.223318,-1.599159,0.002280,0.048439,-41.871193,-0.021455,-0.393953,...,-1.201789,4.141052,-0.022105,-1.090526,2.756526,0.000000,0.000000,1.741895,0.000000,0.000000
62495,-38.984474,94.477135,-42.230938,-9.212724,-1.590879,-0.014128,0.083330,-41.863171,-0.092265,-0.349498,...,-1.201789,4.141052,-0.022105,-1.090526,2.756526,0.000000,0.000000,1.741895,0.000000,0.000000
62499,-39.115993,94.478752,-41.993748,-9.226039,-1.587976,-0.013244,0.058597,-41.855625,-0.107192,-0.412221,...,-1.201789,4.141052,-0.022105,-1.090526,2.756526,0.000000,0.000000,1.741895,0.000000,0.000000
62503,-38.827045,94.459450,-41.863350,-9.242977,-1.584826,-0.021892,0.040896,-41.848766,-0.142575,-0.287669,...,-1.201789,4.141052,-0.022105,-1.090526,2.756526,0.000000,0.000000,1.741895,0.000000,0.000000


# Filter the dataframe
Select 21 joints that are not zeros.

In [None]:
import pandas as pd

# Drop columns from index 17 to 35 and from 40 to 58
# Remember that Python uses 0-based indexing
columns_to_drop = list(range(51, 108)) + list(range(120, 177))

for key, df in dataframes_dict.items():
    dataframes_dict[key] = df.drop(df.columns[columns_to_drop], axis=1)

In [None]:
dataframes_dict[list(dataframes_dict.keys())[0]]

# Normalize the data

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Create a MinMaxScaler
scaler = MinMaxScaler()

# Iterate through each DataFrame in the dictionary and normalize it
normalized_dataframes_dict = {}
for key, df in dataframes_dict.items():
    for col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')
        
    # Select only numeric columns for normalization
    numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
    
    # Normalize the numeric columns using MinMaxScaler
    df[numeric_cols] = scaler.fit_transform(df[numeric_cols])

    # Store the normalized DataFrame in the new dictionary
    dataframes_dict[key] = df

In [None]:
dataframes_dict[list(dataframes_dict.keys())[0]]

In [None]:
len(dataframes_dict)

# Save the data into npy and csv files

In [14]:
import numpy as np

# Create a directory to store the npy files (optional)
output_directory = 'npy_30fps_p_21'
output_csv_directory = 'csv_30fps_p_21'

# Create the directory if it doesn't exist
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# Create the directory if it doesn't exist
if not os.path.exists(output_csv_directory):
    os.makedirs(output_csv_directory)

print('30 fps InHARD 13 features: ')
for key, df in dataframes_dict.items():
    
    # Save the normalized DataFrame as a CSV file
    output_filepath = os.path.join(output_csv_directory, f"{key}.csv")
    df.to_csv(output_filepath, index=False)

    # Save the normalized DataFrame as a .npy file
    output_filepath = os.path.join(output_directory, f"{key}.npy")
    np.save(output_filepath, df.to_numpy())
    
    d=np.load(output_filepath)
    print(d.shape)

30 fps InHARD 13 features: 
(15627, 63)
(13113, 63)
(12561, 63)
(12623, 63)
(10404, 63)
(18822, 63)
(11757, 63)
(12397, 63)
(18724, 63)
(13731, 63)
(24193, 63)
(16898, 63)
(15170, 63)
(12308, 63)
(26342, 63)
(10379, 63)
(9781, 63)
(13082, 63)
(10767, 63)
(8765, 63)
(8422, 63)
(8363, 63)
(7361, 63)
(7377, 63)
(14914, 63)
(11222, 63)
(10219, 63)
(16405, 63)
(13165, 63)
(14589, 63)
(11118, 63)
(13689, 63)
(9737, 63)
(7949, 63)
(10342, 63)
(8669, 63)
(19329, 63)
(12052, 63)


In [15]:
d=np.load('npy_30fps_p_21/P01_R01.npy')
d.shape

(15627, 63)