# everyone 데이터셋 변환

- 데이터 수집 프로젝트 : https://github.com/CSAILVision/GazeCapture

``` python
### 프로파일 아이템으로 등록 가능한 것들 <6 항목>
# frame.json <FrameNames>
# 97개 프레임 파일명 filename.jpg 리스트

# appleFace.json <Face>
# dict_keys(['H', 'W', 'X', 'Y', 'IsValid']
# 97개 얼굴 자르기 위해 필요한 좌표 및 박스 크기

# facegrid.json <Grid>
# 97개 아이템 dict_keys(['X', 'Y', 'W', 'H', 'IsValid']) 
# 전체 이미지에서 얼굴이 어느 그리드에 속하는지 정보

# appleRight,LeftEye.json <Eye) 
# 97개 아이템 dict_keys(['H', 'W', 'X', 'Y', 'IsValid'])
# 눈 영역 자르기 위한 좌표 및 박스 크기

# DotInfo.json (TargetDot)
# 97개 아이템 dict_keys(['DotNum', 'XPts', 'YPts', 'XCam', 'YCam', 'Time'])
# 타겟점 정보

# screen.js <Screen>
# 97개 아이템  dict_keys(['H', 'W', 'Orientation{1~4}'])
# 단위는 프로젝트 링크 참조

### 그 외 <2 항목>
# motion.json <Motion>
# 765개 아이템 : dict_keys(['GravityX', 'UserAcceleration', 'AttitudeRotationMatrix', 'AttitudePitch', 'Time', 'AttitudeQuaternion', 'AttitudeRoll', 'RotationRate', 'AttitudeYaw', 'DotNum'])
# 필터없는 전체 버전인 듯, 중력 센서 등등

# info.json <ProfileSummary>
# 1개 고유템 dict_keys(['TotalFrames', 'NumFaceDetections', 'NumEyeDetections', 'Dataset', 'DeviceName'])
# 프로파일 정보
```

## 데이터셋 루트 경로 체크

In [1]:
DATASET_ROOT = "E:\\everyone\\"

In [None]:
profile_dirs = os.listdir(DATASET_ROOT)

In [None]:
print(profile_dirs[:5])

## 특정 profile 픽

In [None]:
profile_path = os.path.join(DATASET_ROOT, profile_dirs[5])
print(profile_path)

## 프로파일 Raw 데이터 로드

In [None]:
def load_profile_dir(profile_path):
    files = os.listdir(profile_path)
    json_paths = [os.path.abspath(os.path.join(profile_path, f)) for f in files if os.path.splitext(f)[1] == '.json']
    # print(files)
    # print("num:", len(json_paths), json_paths)

    face_path = next(path for path in json_paths if 'appleFace' in path)
    frame_name_path = next(path for path in json_paths if 'frame' in path)
    grid_path = next(path for path in json_paths if 'faceGrid' in path)
    left_eye_path = next(path for path in json_paths if 'appleLeftEye' in path)
    right_eye_path = next(path for path in json_paths if 'appleRightEye' in path)
    dot_info_path = next(path for path in json_paths if 'dotInfo' in path)
    screen_path = next(path for path in json_paths if 'screen' in path)
    info_path = next(path for path in json_paths if 'info' in path)
    motion_path = next(path for path in json_paths if 'motion' in path)
    
    # Load json
    with open(frame_name_path, 'r') as data:
        frame_names = json.load(data)
    with open(face_path, 'r') as data:
        face_js = json.load(data)
    with open(grid_path, 'r') as data:
        grid_js = json.load(data)
    with open(left_eye_path, 'r') as data:
        left_js = json.load(data)
    with open(right_eye_path, 'r') as data:
        right_js = json.load(data)
    with open(dot_info_path, 'r') as data:
        dot_js = json.load(data)
    with open(screen_path, 'r') as data:
        screen_js = json.load(data)
    with open(info_path, 'r') as data:
        info_js = json.load(data)
    with open(motion_path, 'r') as data:
        motion_js = json.load(data)
    
    # load images (jpeg encoded binary)
    frames = []
    for frame_name in frame_names:
        frame_path = os.path.join(profile_path, 'frames', frame_name)
        frame = bytearray()
        try: 
            frame = open(frame_path, 'rb').read()
        except:
            print(">>> exception when load frame image at ", frame_path)
        frames.append(frame)
    
    raw_dataset = {
        'profile_id': os.path.split(profile_path)[-1],
        'frame_names' : frame_names,
        'frames' : frames,
        'face_js': face_js,
        'grid_js': grid_js,
        'left_js': left_js,
        'right_js': right_js,
        'dot_js': dot_js,
        'screen_js': screen_js,
        'info_js': info_js,
        'motion_js': motion_js
    }
    
    return raw_dataset

## 프로파일 데이터프레임 모델

In [None]:
def get_profile_summary(ds):
    info = ds['info_js']
    return { 
        'profile_id': ds['profile_id'],
        'num_frames' : info['TotalFrames'], 
        'num_face_detect': info['NumFaceDetections'], 
        'num_eye_detect': info['NumEyeDetections'], 
        'device':info['DeviceName'].lower() 
    }

def get_of(js, idx, key):
    return js[key][idx]

def get_rect(js, idx):
    return { 'h': js['H'][idx], 'w': js['W'][idx], 'x': js['X'][idx], 'y': js['Y'][idx] }

def get_valid(js, idx):
    return js['IsValid'][idx] == 1

def get_target_pts(js, idx):
    return { 'x': js['XPts'][idx], 'y': js['YPts'][idx] }

def get_target_dist(js, idx):
    return { 'x': js['XCam'][idx], 'y': js['YCam'][idx] }

def get_screen_hw(js, idx):
    return { 'h': js['H'][idx], 'w': js['W'][idx] }

def get_record_meta(ds, idx):
    frames = ds['frames']
    frame_names = ds['frame_names']
    face_js  = ds['face_js']
    grid_js  = ds['grid_js'] 
    left_js  = ds['left_js'] 
    right_js = ds['right_js'] 
    dot_js   = ds['dot_js'] 
    screen_js = ds['screen_js'] 
    info_js   = ds['info_js'] 
    motion_js = ds['motion_js']
    
    record_meta = {
        'frame_name': frame_names[idx],
        'face_valid': get_valid(face_js, idx), 
        'face_grid_valid': get_valid(grid_js, idx),
        'left_eye_valid': get_valid(left_js, idx),
        'right_eye_valid': get_valid(right_js, idx),
        
        'face_rect': get_rect(face_js, idx),
        'face_grid_rect': get_rect(grid_js, idx),
        'left_eye_rect': get_rect(left_js, idx),
        'right_eye_rect': get_rect(right_js, idx),
        
        'target_id': get_of(dot_js, idx, 'DotNum'),
        'target_pts': get_target_pts(dot_js, idx),
        'target_dist': get_target_dist(dot_js, idx),
        'target_time': get_of(dot_js, idx, 'Time'),
        
        'screen_hw': get_screen_hw(screen_js, idx),
        'orientation': get_of(screen_js, idx, 'Orientation')
    }
    
    return record_meta

def get_profile(ds):
    profile = { 
        "summary" : get_profile_summary(ds), 
        "meta" : [], # "motions" : motions
        'frames': ds['frames']
    }
    
    for idx, frame in enumerate(ds['frame_names']):
        profile['meta'].append(get_record_meta(ds, idx))
        
    return profile

## 프로파일 데이터 프레임 생성

In [None]:
raw_data = load_profile_dir(profile_path)
profile = get_profile(raw_data)

print(profile['summary'])
print(profile['meta'][96])
print("frame binary size:", len(profile['frames'][96]))

## NPZ archive 변환
- archive npz: 효율적 원본 보관 및 학습용 npz 변환 용도이며 학습용 npz는 이를 기반으로 별도로 생성해야 함

In [None]:
def profile_to_archive_npz(profile, out_dir_path):
    if not os.path.exists(out_dir_path):
        os.makedirs(out_dir_path, exist_ok=True)
    
    meta = profile['meta']
    frames = np.asarray(profile['frames']).transpose()
    summary = profile['summary']
    profile_id = summary['profile_id']

    out_path = os.path.join(out_dir_path, "profile-{}".format(profile_id))
    np.savez_compressed(out_path, summary=summary, meta=meta, frames=frames)

In [None]:
OUT_DIR_PATH = "E:/everyone-npz"
profile_to_archive_npz(profile, OUT_DIR_PATH)

## NPZ archive 복원 무결성 체크

In [None]:
npz = np.load(os.path.join(OUT_DIR_PATH, "profile-00024.npz"), allow_pickle=True)
print(npz.files)

In [None]:
meta = npz['meta']
frames = npz['frames']
summary = npz['summary']

In [None]:
assert len(profile['frames'][96]) == len(frames[96])
print(summary)

# 모든 프로파일 npz archive 변환 처리

In [None]:
def to_everyone_archive_npz(everyone_root_path, out_dir_path):
    profile_dirs = os.listdir(everyone_root_path)
    
    for profile_dir in profile_dirs:
        profile_path = os.path.join(everyone_root_path, profile_dir)
        raw_data = load_profile_dir(profile_path)
        profile = get_profile(raw_data)
        profile_to_archive_npz(profile, out_dir_path)

In [None]:
to_everyone_archive_npz("E:/everyone", "E:/everyone-npz")