In [None]:
import h5py
import numpy as np
import os
import glob
import subprocess
import sys
[sys.path.append(i) for i in ['.', '..', '../ubisoft-laforge-ZeroEGGS/ZEGGS']]

from anim import bvh, quat, txform

# from process_zeggs_bvh import preprocess_animation

In [3]:
data = np.random.rand(1000, 50)
labels = np.random.randint(0, 10, size=(1000,))

In [4]:
with h5py.File('dataset.h5', 'w') as hdf:
    # Create datasets
    hdf.create_dataset('features', data=data)
    hdf.create_dataset('labels', data=labels)
    
    # Add metadata (attributes) to the file
    hdf.attrs['description'] = 'Sample dataset with random data'
    hdf.attrs['version'] = 1.0


In [5]:
with h5py.File('dataset.h5', 'r') as hdf:
    # Access datasets
    features = hdf['features'][:]
    labels = hdf['labels'][:]
    
    # Access metadata
    description = hdf.attrs['description']
    version = hdf.attrs['version']
    
    print(f"Description: {description}, Version: {version}")
    print(f"Features shape: {features.shape}, Labels shape: {labels.shape}")

Description: Sample dataset with random data, Version: 1.0
Features shape: (1000, 50), Labels shape: (1000,)


In [6]:
source_path = '../ubisoft-laforge-ZeroEGGS/data/processed_v1/trimmed/'
target = '../ubisoft-laforge-ZeroEGGS/data/processed_v1/processed/'

In [18]:
import re
import numpy as np

def load_bvh_file(filename, start=None, end=None, order=None):
    
    channelmap = {
        'Xrotation' : 'x',
        'Yrotation' : 'y',
        'Zrotation' : 'z'   
    }
    
    f = open(filename, "r")

    i = 0
    active = -1
    end_site = False
    state = 'definition'
    
    names   = []
    offsets = np.empty(shape=[0, 3], dtype=np.float32)
    parents = np.empty(shape=[0],    dtype=np.int32)
    
    for line in f:
        
        if state == 'definition':
        
            if "HIERARCHY" in line: continue
            if "MOTION" in line: continue

            rmatch = re.match(r"ROOT (\w+)", line)
            if rmatch:
                names.append(rmatch.group(1))
                offsets = np.append(offsets, np.array([[0,0,0]], dtype=np.float32), axis=0)
                parents = np.append(parents, np.array([active], dtype=np.int32))
                active  = parents.shape[0]-1
                continue

            if "{" in line: continue

            if "}" in line:
                if end_site: end_site = False
                else: active = parents[active]
                continue
            
            offmatch = re.match(r"\s*OFFSET\s+([\-\d\.e]+)\s+([\-\d\.e]+)\s+([\-\d\.e]+)", line)
            if offmatch:
                if not end_site:
                    offsets[active] = np.array(list(map(float, offmatch.groups())))
                continue
               
            chanmatch = re.match(r"\s*CHANNELS\s+(\d+)", line)
            if chanmatch:
                channels = int(chanmatch.group(1))
                if order is None:
                    channelis = 0 if channels == 3 else 3
                    channelie = 3 if channels == 3 else 6
                    parts = line.split()[2+channelis:2+channelie]
                    if any([p not in channelmap for p in parts]):
                        continue
                    order = "".join([channelmap[p] for p in parts])
                continue

            jmatch = re.match(r"\s*JOINT\s+(\w+)", line)
            if jmatch:
                names.append(jmatch.group(1))
                offsets = np.append(offsets, np.array([[0,0,0]], dtype=np.float32), axis=0)
                parents = np.append(parents, np.array([active], dtype=np.int32))
                active  = (parents.shape[0]-1)
                continue
            
            if "End Site" in line:
                end_site = True
                continue
                  
            fmatch = re.match(r"\s*Frames:\s+(\d+)", line)
            if fmatch:
                if start and end:
                    fnum = (end - start)-1
                else:
                    fnum = int(fmatch.group(1))
                jnum = parents.shape[0]
                positions = offsets[np.newaxis].repeat(fnum, axis=0)
                rotations = np.zeros([fnum, jnum, 3], dtype=np.float32)
                continue
            
            fmatch = re.match(r"\s*Frame Time:\s+([\d\.]+)", line)
            if fmatch:
                frametime = float(fmatch.group(1))
                state = 'body'
                continue
            
        elif state == 'body':
            
            if (start and end) and (i < start or i >= end-1):
                i += 1
                continue
            
            dmatch = line.strip().split()
            if dmatch:
                
                fi = i - start if start else i
                data_block = np.asarray(tuple(map(float, dmatch)))
                N = parents.shape[0]
                if   channels == 3:
                    positions[fi,0] = data_block[0:3]
                    rotations[fi,:] = data_block[3: ].reshape([N, 3])
                elif channels == 6:
                    data_block = data_block.reshape([N, 6])
                    positions[fi,:] = data_block[:,0:3]
                    rotations[fi,:] = data_block[:,3:6]
                elif channels == 9:
                    positions[fi,0] = data_block[0:3]
                    data_block = data_block[3:].reshape([N-1, 9])
                    rotations[fi,1:] = data_block[:,3:6]
                    positions[fi,1:] = positions[fi,1:] + data_block[:,0:3] * data_block[:,6:9]
                else:
                    raise Exception("Too many channels! %i" % channels)

                i += 1
        
        else:
        
            raise Exception()
        
    f.close()
    
    return {
        'rotations': rotations,
        'positions': positions,
        'offsets': offsets,
        'parents': parents,
        'names': names,
        'order': order,
        'frametime': frametime
    }

In [19]:
def preprocess_animation(animation_file, fps=60):
    anim_data = load_bvh_file(animation_file)       #  'rotations' (8116, 75, 3), 'positions', 'offsets' (75, 3), 'parents', 'names' (75,), 'order' 'zyx', 'frametime' 0.016667
    nframes = len(anim_data["rotations"])

    if fps != 60 :
        rate = 60 // fps
        anim_data["rotations"] = anim_data["rotations"][0:nframes:rate]
        anim_data["positions"] = anim_data["positions"][0:nframes:rate]
        dt = 1 / fps
        nframes = anim_data["positions"].shape[0]
    else:
        dt = anim_data["frametime"]

    njoints = len(anim_data["parents"])

    lrot = quat.unroll(quat.from_euler(np.radians(anim_data["rotations"]), anim_data["order"]))
    lpos = anim_data["positions"]
    grot, gpos = quat.fk(lrot, lpos, anim_data["parents"])
    # Find root (Projected hips on the ground)
    root_pos = gpos[:, anim_data["names"].index("Spine2")] * np.array([1, 0, 1])
    # Root direction
    root_fwd = quat.mul_vec(grot[:, anim_data["names"].index("Hips")], np.array([[0, 0, 1]]))
    root_fwd[:, 1] = 0
    root_fwd = root_fwd / np.sqrt(np.sum(root_fwd * root_fwd, axis=-1))[..., np.newaxis]
    # Root rotation
    root_rot = quat.normalize(
        quat.between(np.array([[0, 0, 1]]).repeat(len(root_fwd), axis=0), root_fwd)
    )

    # Find look at direction
    gaze_lookat = quat.mul_vec(grot[:, anim_data["names"].index("Head")], np.array([0, 0, 1]))
    gaze_lookat[:, 1] = 0
    gaze_lookat = gaze_lookat / np.sqrt(np.sum(np.square(gaze_lookat), axis=-1))[..., np.newaxis]
    # Find gaze position
    gaze_distance = 100  # Assume other actor is one meter away
    gaze_pos_all = root_pos + gaze_distance * gaze_lookat
    gaze_pos = np.median(gaze_pos_all, axis=0)
    gaze_pos = gaze_pos[np.newaxis].repeat(nframes, axis=0)

    # Visualize Gaze Pos
    visualize_gaze = False
    if visualize_gaze:
        import matplotlib.pyplot as plt

        plt.scatter(gaze_pos_all[:, 0], gaze_pos_all[:, 2], s=0.1, marker=".")
        plt.scatter(gaze_pos[0, 0], gaze_pos[0, 2])
        plt.scatter(root_pos[:, 0], root_pos[:, 2], s=0.1, marker=".")
        plt.quiver(root_pos[::60, 0], root_pos[::60, 2], root_fwd[::60, 0], root_fwd[::60, 2])
        plt.gca().set_aspect("equal")
        plt.savefig('1.jpg')
        plt.show()

    # Compute local gaze dir
    gaze_dir = gaze_pos - root_pos
    # gaze_dir = gaze_dir / np.sqrt(np.sum(np.square(gaze_dir), axis=-1))[..., np.newaxis]
    gaze_dir = quat.mul_vec(quat.inv(root_rot), gaze_dir)

    # Make relative to root
    lrot[:, 0] = quat.mul(quat.inv(root_rot), lrot[:, 0])
    lpos[:, 0] = quat.mul_vec(quat.inv(root_rot), lpos[:, 0] - root_pos)

    # Local velocities
    lvel = np.zeros_like(lpos)
    lvel[1:] = (lpos[1:] - lpos[:-1]) / dt
    lvel[0] = lvel[1] - (lvel[3] - lvel[2])

    lvrt = np.zeros_like(lpos)
    lvrt[1:] = quat.to_helical(quat.abs(quat.mul(lrot[1:], quat.inv(lrot[:-1])))) / dt
    lvrt[0] = lvrt[1] - (lvrt[3] - lvrt[2])

    # Root velocities
    root_vrt = np.zeros_like(root_pos)
    root_vrt[1:] = quat.to_helical(quat.abs(quat.mul(root_rot[1:], quat.inv(root_rot[:-1])))) / dt
    root_vrt[0] = root_vrt[1] - (root_vrt[3] - root_vrt[2])
    root_vrt[1:] = quat.mul_vec(quat.inv(root_rot[:-1]), root_vrt[1:])
    root_vrt[0] = quat.mul_vec(quat.inv(root_rot[0]), root_vrt[0])

    root_vel = np.zeros_like(root_pos)
    root_vel[1:] = (root_pos[1:] - root_pos[:-1]) / dt
    root_vel[0] = root_vel[1] - (root_vel[3] - root_vel[2])
    root_vel[1:] = quat.mul_vec(quat.inv(root_rot[:-1]), root_vel[1:])
    root_vel[0] = quat.mul_vec(quat.inv(root_rot[0]), root_vel[0])

    # Compute character space
    crot, cpos, cvrt, cvel = quat.fk_vel(lrot, lpos, lvrt, lvel, anim_data["parents"])

    # Compute 2-axis transforms
    ltxy = np.zeros(dtype=np.float32, shape=[len(lrot), njoints, 2, 3])
    ltxy[..., 0, :] = quat.mul_vec(lrot, np.array([1.0, 0.0, 0.0]))
    ltxy[..., 1, :] = quat.mul_vec(lrot, np.array([0.0, 1.0, 0.0]))

    ctxy = np.zeros(dtype=np.float32, shape=[len(crot), njoints, 2, 3])
    ctxy[..., 0, :] = quat.mul_vec(crot, np.array([1.0, 0.0, 0.0]))
    ctxy[..., 1, :] = quat.mul_vec(crot, np.array([0.0, 1.0, 0.0]))

    # return (
    #     root_pos,
    #     root_rot,
    #     root_vel,
    #     root_vrt,
    #     lpos,
    #     lrot,
    #     ltxy,
    #     lvel,
    #     lvrt,
    #     cpos,
    #     crot,
    #     ctxy,
    #     cvel,
    #     cvrt,
    #     gaze_pos,
    #     gaze_dir,
    # ), anim_data["parents"], dt, anim_data["order"]

    lpos = lpos.reshape(nframes, -1)
    ltxy = ltxy.reshape(nframes, -1)
    lvel = lvel.reshape(nframes, -1)
    lvrt = lvrt.reshape(nframes, -1)

    all_poses = np.concatenate((root_pos, root_rot, root_vel, root_vrt, lpos, ltxy, lvel, lvrt, gaze_dir), axis=1)

    return all_poses, anim_data["parents"], dt, anim_data["order"], njoints

In [20]:
animation_file_path = "/Users/thanh/OpenHuman/OHGesture/ubisoft-laforge-ZeroEGGS/data/processed_v1/trimmed/valid/005_Neutral_4_x_0_9.bvh"

all_poses, parents, dt, order, njoints = preprocess_animation(animation_file_path, fps=60)

In [7]:
def make_zeggs_dataset(source_path, target):
    if not os.path.exists(target):
        os.mkdir(target)

    def make_zeggs_subdataset(source_path, target, all_poses):
        if not os.path.exists(target):
            os.mkdir(target)
        target_audio_path = os.path.join(target, 'normalize_audio')
        target_audionpz_path = os.path.join(target, 'normalize_audio_npz')
        target_gesture_path = os.path.join(target, 'gesture_npz')
        target_mfcc_path = os.path.join(target, 'mfcc')
        if not os.path.exists(target_audio_path):
            os.mkdir(target_audio_path)
        if not os.path.exists(target_mfcc_path):
            os.mkdir(target_mfcc_path)
        if not os.path.exists(target_audionpz_path):
            os.mkdir(target_audionpz_path)
        if not os.path.exists(target_gesture_path):
            os.mkdir(target_gesture_path)
        wav_files = sorted(glob.glob(source_path + "/*.wav"))
        for _, wav_file in enumerate(wav_files):
            name = os.path.split(wav_file)[1][:-4]
            print(name)
            # audio
            print('normalize audio: ' + name + '.wav')
            normalize_wav_path = os.path.join(target_audio_path, name + '.wav')
            cmd = ['ffmpeg-normalize', wav_file, '-o', normalize_wav_path, '-ar', '16000']
            subprocess.call(cmd)
            print('extract MFCC...')
            obj = MFCC(frate=20)
            # wav, fs = librosa.load(normalize_wav_path, sr=16000)
            wav, fs = sf.read(normalize_wav_path)
            mfcc = obj.sig2s2mfc_energy(wav, None)
            print(mfcc[:, :-2].shape)  # -1 -> -2      # (502, 13)
            np.savez_compressed(os.path.join(target_mfcc_path, name + '.npz'), mfcc=mfcc[:, :-2])
            np.savez_compressed(os.path.join(target_audionpz_path, name + '.npz'), wav=wav)
            # bvh
            print('extract gesture...')
            bvh_file = os.path.join(source_path, name + '.bvh')
            pose, parents, dt, order, njoints = preprocess_animation(bvh_file, fps=20)
            print(pose.shape)
            np.savez_compressed(os.path.join(target_gesture_path, name + '.npz'), gesture=pose)
            all_poses.append(pose)

        return all_poses

    source_path_train = os.path.join(source_path, 'train')
    target_train = os.path.join(target, 'train')
    all_poses = []
    all_poses = make_zeggs_subdataset(source_path_train, target_train, all_poses)
    source_path_test = os.path.join(source_path, 'valid')
    target_test = os.path.join(target, 'valid')
    all_poses = make_zeggs_subdataset(source_path_test, target_test, all_poses)

    all_poses = np.vstack(all_poses)
    pose_mean = np.mean(all_poses, axis=0, dtype=np.float64)
    pose_std = np.std(all_poses, axis=0, dtype=np.float64)
    np.savez_compressed(os.path.join(target, 'mean.npz'), mean=pose_mean)
    np.savez_compressed(os.path.join(target, 'std.npz'), std=pose_std)

In [8]:
make_zeggs_dataset(source_path, target)

NameError: name 'os' is not defined