# 데이터 전처리

- 얼굴 데이터 압축 해제 -> 사진 열기 -> 얼굴 검출 -> (224, 224)로 리사이즈


In [1]:
import os
import warnings
from typing import Tuple

import cv2
from preprocess_faces import process_images_mp
from tqdm import tqdm

warnings.filterwarnings(action="ignore")

In [2]:
!ls ./data/Validation

'[라벨]EMOIMG_당황_VALID.zip'  '[원천]EMOIMG_상처_VALID.zip'
'[라벨]EMOIMG_불안_VALID.zip'  '[원천]EMOIMG_불안_VALID.zip'
'[원천]EMOIMG_슬픔_VALID.zip'  '[라벨]EMOIMG_상처_VALID.zip'
'[원천]EMOIMG_분노_VALID.zip'  '[라벨]EMOIMG_분노_VALID.zip'
'[라벨]EMOIMG_중립_VALID.zip'  '[원천]EMOIMG_당황_VALID.zip'
'[라벨]EMOIMG_슬픔_VALID.zip'  '[원천]EMOIMG_중립_VALID.zip'


In [3]:
DATA_ROOT_DIR = "./data/Validation"

In [4]:
def detect_faces(img_path: str, dsize: Tuple[int] = (224, 224)):
    if not os.path.exists(img_path):
        return

    img = cv2.imread(img_path)

    # Haar cascade 모델이 흑백 이미지를 사용
    faces = haar_cascade.detectMultiScale(
        cv2.cvtColor(img, cv2.COLOR_BGR2GRAY),
        scaleFactor=1.1,
        minNeighbors=5,
        minSize=(224, 224),
    )

    for x, y, w, h in faces:
        m = max(w, h)
        cv2.rectangle(img, (x, y), (x + m, y + m), (0, 255, 0), 0)
        face_img = cv2.resize(img[y: y + m, x: x + m].copy(), dsize=dsize)

        base, ext = os.path.splitext(img_path)
        out_path = f"face_{base}{ext}"

        cv2.imwrite(out_path, face_img)

    if os.path.exists(img_path):
        os.remove(img_path)

In [5]:
def process_images(path):
    if not os.path.exists(path):
        return

    files = os.listdir(path)

    for fname in tqdm(files, total=len(files)):
        base, ext = os.path.splitext(fname)
        if base[:5] == "face_":
            continue

        detect_faces(os.path.join(path, fname))

In [6]:
for root, dirs, files in os.walk(DATA_ROOT_DIR):
    print(root)

    if len(files) == 0:
        continue

    for fname in files:
        name, ext = os.path.splitext(fname)

        if ext != ".zip":
            continue

        zip_fname = os.path.join(root, fname)
        unzip_dir = os.path.join(root, name)

        if not os.path.exists(unzip_dir):
            unzip_cmd = f"unzip -q {zip_fname} -d {unzip_dir}"
            print(f"Start unzip {fname}")
            os.system(unzip_cmd)

        process_images_mp(unzip_dir)

./data/Validation
Start unzip [라벨]EMOIMG_당황_VALID.zip
['./data/Validation/[라벨]EMOIMG_당황_VALID/img_emotion_validation_data(┤ч╚▓).json']
Start unzip [라벨]EMOIMG_불안_VALID.zip
['./data/Validation/[라벨]EMOIMG_불안_VALID/img_emotion_validation_data(║╥╛╚).json']
Start unzip [원천]EMOIMG_슬픔_VALID.zip
['./data/Validation/[원천]EMOIMG_슬픔_VALID/a08352013c2366227976f20f0733766f59abf24f91a6e726c2a235793f4136ba_│▓_30_╜╜╟─_┐└╢Ї&░°┐м╜├╝│_20201202173622-005-006.jpg', './data/Validation/[원천]EMOIMG_슬픔_VALID/cd9088c78d99ae51902fbd242b6b9c0c857cf34b2c26d92fd4813d0ed184430f_┐й_20_╜╜╟─_╟р╗ч&╗ч╣л░°░г_20201203111812-006-001.jpg', './data/Validation/[원천]EMOIMG_슬픔_VALID/fd6f2ef55a7c2db5728fbf2265a43ece4021ee390614b243ff2ef51838d5f6d1_│▓_20_╜╜╟─_╗є╛ў╜├╝│&┴б╞ў&╜├└х_20210109221836-003-001.jpg']


Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS

Start unzip [원천]EMOIMG_분노_VALID.zip
['./data/Validation/[원천]EMOIMG_분노_VALID/f7cdb98dd987742a4f8eeb9d66387ef02fb817ddda54a4280273625b89639669_┐й_30_║╨│ы_╜║╞ў├ў ░№╢ў ╣╫ ╖╣└·╜├╝│_20201205222929-004-015.jpg', './data/Validation/[원천]EMOIMG_분노_VALID/0d3ace5363638bb6b4b772789353e529d80fb23b7b7f67597f784f45da383048_┐й_20_║╨│ы_▒│┼ы&└╠╡┐╝Ў┤▄(┐д╕о║г└╠┼═ ╞ў╟╘)_20201207001407-002-004.jpg', './data/Validation/[원천]EMOIMG_분노_VALID/18cc8f23b0ccfad00d459a4c42c7af9ed574c3a4edd5e9dd23cb654393154fdc_┐й_40_║╨│ы_╡╡╜╔ ╚п░ц_20210121195541-008-003.jpg']


Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS

Start unzip [라벨]EMOIMG_중립_VALID.zip
['./data/Validation/[라벨]EMOIMG_중립_VALID/img_emotion_validation_data(┴▀╕│).json']
Start unzip [라벨]EMOIMG_슬픔_VALID.zip
['./data/Validation/[라벨]EMOIMG_슬픔_VALID/img_emotion_validation_data(╜╜╟─).json']
Start unzip [원천]EMOIMG_상처_VALID.zip
['./data/Validation/[원천]EMOIMG_상처_VALID/2cb3768fcbb57fc7afd5014a94a6787dc2bce508d223989e0276f03ace2a44a6_┐й_20_╗є├│_╝ў╣┌ ╣╫ ░┼┴╓░°░г_20210129163130-010-003.jpg', './data/Validation/[원천]EMOIMG_상처_VALID/006b56dc2f8cda2361e1b01b2496d6f352dd5b1790f0a9b0bfcbe540b292247d_┐й_20_╗є├│_╗є╛ў╜├╝│&┴б╞ў&╜├└х_20210130214734-003-005.jpg', './data/Validation/[원천]EMOIMG_상처_VALID/ad1db0c0ce0e49bb20de7233efb83efc1abff28be0ac7ccd6b56caddf82fd7a7_┐й_40_╗є├│_╝ў╣┌ ╣╫ ░┼┴╓░°░г_20210202175904-010-017.jpg']


Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Corrupt JPEG data: premature end of data segment
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Inval

Start unzip [원천]EMOIMG_불안_VALID.zip
['./data/Validation/[원천]EMOIMG_불안_VALID/d209cc0204c1524c3d073eabd16c1ede906217384e2c1763c50643e9a3dac6ec_┐й_30_║╥╛╚_╜╟┐▄ └┌┐м╚п░ц_20210217174545-007-016.jpg', './data/Validation/[원천]EMOIMG_불안_VALID/bbfdb80efc193ac68ce947195d119cab166ad8c62a5308fdb7f9b0f3b8d138e3_┐й_20_║╥╛╚_░°░°╜├╝│&┴╛▒│&└╟╖с╜├╝│_20201203105852-001-010.jpg', './data/Validation/[원천]EMOIMG_불안_VALID/07d518465169eb8fe71a74d7464744a954ab29db25b18278c2ed716bed40242b_│▓_20_║╥╛╚_╜╟┐▄ └┌┐м╚п░ц_20210130202120-007-004.jpg']


Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS

Start unzip [라벨]EMOIMG_상처_VALID.zip
['./data/Validation/[라벨]EMOIMG_상처_VALID/img_emotion_validation_data(╗є├│).json']
Start unzip [라벨]EMOIMG_분노_VALID.zip
['./data/Validation/[라벨]EMOIMG_분노_VALID/img_emotion_validation_data(║╨│ы).json']
Start unzip [원천]EMOIMG_당황_VALID.zip
['./data/Validation/[원천]EMOIMG_당황_VALID/ba2bd377a6632cc58e86d52379693aaece19b69ed49851b171c500b42f397ed5_│▓_20_┤ч╚▓_┐└╢Ї&░°┐м╜├╝│_20201207023448-005-002.jpg', './data/Validation/[원천]EMOIMG_당황_VALID/36ab29ac1534a3f76a8fe0c82cd4c2460073451f519205d8c5bd4d40f311f142_│▓_30_┤ч╚▓_╡╡╜╔ ╚п░ц_20210119163908-008-015.jpg', './data/Validation/[원천]EMOIMG_당황_VALID/10d7f0d504f95b647e7731850b1ad6212279fe2008130f53b06e90af9819d733_┐й_40_┤ч╚▓_╝ў╣┌ ╣╫ ░┼┴╓░°░г_20210203124727-010-012.jpg']


Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS

Start unzip [원천]EMOIMG_중립_VALID.zip
['./data/Validation/[원천]EMOIMG_중립_VALID/6eb0f4525d55e848a9c37a246a535b0f1b2dc6aa6bcc48d7aaafdecc18865919_│▓_20_┴▀╕│_╝ў╣┌ ╣╫ ░┼┴╓░°░г_20210122122343-010-001.jpg', './data/Validation/[원천]EMOIMG_중립_VALID/e7d03573a4ef0f455e412779fe03127267f8674ef9e99e851cba2a0d75a134c4_┐й_30_┴▀╕│_░°░°╜├╝│&┴╛▒│&└╟╖с╜├╝│_20201205122732-001-010.jpg', './data/Validation/[원천]EMOIMG_중립_VALID/c69b9768e82cc7df4a9b98bc8d90ac8292db38d57fca2b6728f05eca5bd9a8d6_│▓_20_┴▀╕│_▒│┼ы&└╠╡┐╝Ў┤▄(┐д╕о║г└╠┼═ ╞ў╟╘)_20201203104343-002-016.jpg']


Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS

In [4]:
!df -h

Filesystem      Size  Used Avail Use% Mounted on
tmpfs           1.6G  2.3M  1.6G   1% /run
/dev/nvme0n1p2  468G  229G  216G  52% /
tmpfs           7.8G  7.3M  7.8G   1% /dev/shm
tmpfs           5.0M  4.0K  5.0M   1% /run/lock
efivarfs        128K   51K   73K  41% /sys/firmware/efi/efivars
/dev/nvme0n1p1  511M  6.1M  505M   2% /boot/efi
tmpfs           1.6G  128K  1.6G   1% /run/user/1000
tmpfs           1.6G   88K  1.6G   1% /run/user/128
/dev/nvme1n1p1  469G  312G  134G  71% /media/ssafy/Volume1
/dev/sda1        15G  9.9M   15G   1% /media/ssafy/04BB-C7CE
