# Segmentation of MOCAP Data

In [None]:
import pandas as pd
from scipy.interpolate import interp1d
import numpy as np
from scipy.signal import butter, filtfilt
#Interpolation
def interpolate_column(col):
    #Fill missing values with 0
    col_filled = col.fillna(0)
    x = np.where(col_filled != 0)[0]
    y = col_filled[col_filled != 0]
    if len(x) > 0:
       f = interp1d(x, y, kind='linear', fill_value='extrapolate')
       col_interp = f(np.arange(len(col_filled)))
    else:
       col_interp = col_filled
    
    return col_interp


#Smoothing
def butter_lowpass(cutoff, fs, order=5):
    nyquist = 0.5 * fs
    normal_cutoff = cutoff / nyquist
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    return b, a

def apply_lowpass_filter(data, cutoff_freq, fs=1, order=5):
    b, a = butter_lowpass(cutoff_freq, fs, order=order)
    smoothed_data = filtfilt(b, a, data)
    return smoothed_data

In [None]:
import pandas as pd
import os
import time

def mocap_preprocess(folder_path):
    #Getting all files and folders in a specified directory
    contents = os.listdir(folder_path)
    timestamp_path='\\Data_new\\Raw\\Timestamp'
    for item in contents[:]:
        item_path = os.path.join(folder_path, item)
        
        #Browse the timestamp folder for the corresponding item
        item_timestamp= os.path.join(timestamp_path,item)
        print('folder：' + item_timestamp)
        
        if os.path.isdir(item_path):
            print('folder：' + item_path)
        
        #Traverse the subfolders 'time01' and 'time02' under the 'sample' folder
        item_contents = os.listdir(item_path)#first和second文件名
        
        for t in item_contents[:]:
            item_path_time = os.path.join(item_path, t) #第item次子文件地址
            item_timestamp_time= os.path.join(item_timestamp, t) #第item次时间戳
            print('folder：'+item_path_time)
            print('folder：'+item_timestamp_time)
            
            
            timestamp=pd.read_csv(item_timestamp_time+'.csv')
                  
            file_contents = os.listdir(item_path_time)                  
            
            #Traverse the 'task' files under the 'time' folder
            for file in file_contents[:4]:
                if file[:-4] == 'task_normal':
                    row = 0
                if file[:-4] == 'task_cup':
                    row = 1
                if file[:-4] == 'task_text':
                    row = 2
                if file[:-4] == 'task_web':
                    row = 3
                
                #Import files
                file_path = os.path.join(item_path_time, file)
                print('file：'+file_path)
                df_2=pd.read_csv(file_path)#读取文件
                    
                    
                #Convert timestamps to numerical format
                for j in range(6):
                    time_array_mocap= time.strptime(timestamp.iloc[row,j+1][:-7], "%Y-%m-%d-%H:%M:%S")
                    timestamp_mocap = time.mktime(time_array_mocap)
                    timestamp_mocap= timestamp_mocap+float(timestamp.iloc[row,j+1][-7:-3])
                    timestamp.iloc[row,j+1]=timestamp_mocap
                        
                #Segment different nodes of timestamp records
                start=timestamp.iloc[row,1]
                start_kinect=timestamp.iloc[row,2]
                start_turn=timestamp.iloc[row,3]
                end_turn=timestamp.iloc[row,4]
                end_kinect=timestamp.iloc[row,5]
                end=timestamp.iloc[row,6]
                
            
                #mocap timestamp divided by 1000
                for i in range(0,len(df_2)):
                    df_2.iloc[i,2]=df_2.iloc[i,2]/1000
                
                
                #Remove the 2m data at the beginning and end
                mocap_new = df_2.copy()
                for i in range(0,len(mocap_new)):
                    timestamp2 = mocap_new.iloc[i,2]
                    if timestamp2 >= start:
                        mocap_new.drop(mocap_new.head(i).index,inplace=True)
                        break
                mocap_new.reset_index(drop=True,inplace=True)
                for i in range(0,len(mocap_new)):
                    timestamp2 = mocap_new.iloc[i,2]
                    if timestamp2 > end :
                        mocap_new.drop(mocap_new.tail(len(mocap_new)-i).index,inplace=True)
                        break
                
                
                #Interpolation
                df_3=mocap_new.copy()
                for i in range(3,len(mocap_new.columns)):
                    interpolated_values = interpolate_column(mocap_new.iloc[:,i])
                    df_3.iloc[:,i]=interpolated_values
                #Filtering
                cutoff_frequency = 7.5 
                sampling_frequency = 100 
                filter_order = 4  
                df_3.iloc[:,3:].apply(lambda x: apply_lowpass_filter(x, cutoff_frequency, fs=sampling_frequency, order=filter_order))
                
                #/1000,mm transfer m
                df_3.iloc[:,3:] = df_3.iloc[:,3:]/1000
                df_3 = df_3.apply(pd.to_numeric, errors='coerce')
                
                #go: start ~ start_turn
                mocap_go = df_3.copy()
                for i in range(0,len(mocap_go)):
                    timestamp2 = mocap_go.iloc[i,2]
                    if timestamp2 >= start:
                        mocap_go.drop(mocap_go.head(i).index,inplace=True)
                        break
                mocap_go.reset_index(drop=True,inplace=True)
                for i in range(0,len(mocap_go)):
                    timestamp2 = mocap_go.iloc[i,2]
                    if timestamp2 > start_turn :
                        mocap_go.drop(mocap_go.tail(len(mocap_go)-i).index,inplace=True)
                        break
                mocap_go['Timestamp'] = mocap_go['Timestamp'].astype(str)
                if len(mocap_go)<2:
                    print("错误")
                    continue
                mocap_go.to_csv("\\Data_new\\Processed\\MOCAP_5m\\"+t+'\\go\\'+file[:-4]+'\\'+item+'_'+file[:-4]+'.csv',index=False)
                
                #go_kinect: start_kinect ~ start_turn
                mocap_go_kinect = df_3.copy()
                for i in range(0,len(mocap_go_kinect)):
                    timestamp2 = mocap_go_kinect.iloc[i,2]
                    if timestamp2 >= start_kinect:
                        mocap_go_kinect.drop(mocap_go_kinect.head(i).index,inplace=True)
                        break
                mocap_go_kinect.reset_index(drop=True,inplace=True)
                for i in range(0,len(mocap_go_kinect)):
                    timestamp2 = mocap_go_kinect.iloc[i,2]
                    if timestamp2 > start_turn :
                        mocap_go_kinect.drop(mocap_go_kinect.tail(len(mocap_go_kinect)-i).index,inplace=True)
                        break
                mocap_go_kinect['Timestamp'] = mocap_go_kinect['Timestamp'].astype(str)              
                mocap_go_kinect.to_csv("\\Data_new\\Processed\\MOCAP_2m\\"+t+'\\go\\'+file[:-4]+'\\'+item+'_'+file[:-4]+'.csv',index=False)          
                
                

                #turn: start_turn ~ end_turn
                
                mocap_turn = df_3.copy()
                for i in range(0,len(mocap_turn)):
                    timestamp2 = mocap_turn.iloc[i,2]
                    if timestamp2 >= start_turn:
                        mocap_turn.drop(mocap_turn.head(i).index,inplace=True)
                        break
                mocap_turn.reset_index(drop=True,inplace=True)
                for i in range(0,len(mocap_turn)):
                    timestamp2 = mocap_turn.iloc[i,2]
                    if timestamp2 > end_turn :
                        mocap_turn.drop(mocap_turn.tail(len(mocap_turn)-i).index,inplace=True)
                        break
                mocap_turn['Timestamp'] = mocap_turn['Timestamp'].astype(str)
                if len(mocap_turn)<2:
                   print("错误")    
                mocap_turn.to_csv("\\Data_new\\Processed\\MOCAP_5m\\"+t+'\\turn\\'+file[:-4]+'\\'+item+'_'+file[:-4]+'.csv',index=False)  
                mocap_turn.to_csv("\\Data_new\\Processed\\MOCAP_2m\\"+t+'\\turn\\'+file[:-4]+'\\'+item+'_'+file[:-4]+'.csv',index=False)    
                
                #back: end_turn ~ end
                mocap_back = df_3.copy()
                for i in range(0,len(mocap_back)):
                    timestamp2 = mocap_back.iloc[i,2]
                    if timestamp2 >= end_turn:
                        mocap_back.drop(mocap_back.head(i).index,inplace=True)
                        break
                mocap_back.reset_index(drop=True,inplace=True)
                for i in range(0,len(mocap_back)):
                    timestamp2 = mocap_back.iloc[i,2]
                    if timestamp2 > end :
                        mocap_back.drop(mocap_back.tail(len(mocap_back)-i).index,inplace=True)
                        break
                mocap_back['Timestamp'] = mocap_back['Timestamp'].astype(str)
       
                mocap_back.to_csv("\\Data_new\\Processed\\MOCAP_5m\\"+t+'\\back\\'+file[:-4]+'\\'+item+'_'+file[:-4]+'.csv',index=False)      

                #back_kinect: end_turn ~ end_kinect
                mocap_back_kinect = df_3.copy()
                for i in range(0,len(mocap_back_kinect)):
                    timestamp2 = mocap_back_kinect.iloc[i,2]
                    if timestamp2 >= end_turn:
                        mocap_back_kinect.drop(mocap_back_kinect.head(i).index,inplace=True)
                        break
                mocap_back_kinect.reset_index(drop=True,inplace=True)
                for i in range(0,len(mocap_back_kinect)):
                    timestamp2 = mocap_back_kinect.iloc[i,2]
                    if timestamp2 > end_kinect :
                        mocap_back_kinect.drop(mocap_back_kinect.tail(len(mocap_back_kinect)-i).index,inplace=True)
                        break
                mocap_back_kinect['Timestamp'] = mocap_back_kinect['Timestamp'].astype(str)           
                mocap_back_kinect.to_csv("\\Data_new\\Processed\\MOCAP_2m\\"+t+'\\back\\'+file[:-4]+'\\'+item+'_'+file[:-4]+'.csv',index=False)                 

                        

                
                
        
       
            

In [None]:
mocap_preprocess("\\Data_new\\Raw\\MOCAP")