In [6]:
import os
import json
import numpy as np
import torch
from facenet_pytorch import MTCNN, InceptionResnetV1
from PIL import Image
import cv2

# MPS 장치 확인
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device for FaceNet: {device}")

# MTCNN은 CPU에서 실행하고, InceptionResnetV1은 MPS에서 실행
mtcnn = MTCNN(keep_all=True, device="cpu")  # MTCNN을 CPU에서 실행
resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)  # FaceNet을 MPS에서 실행

# JSON 파일 경로와 동영상 폴더 설정
json_path = '/Users/gible/오픈소스sw/Train_Data/train_data/metadata.json'
video_folder = '/Users/gible/오픈소스sw/Train_Data/train_data/train_sample_videos'
output_folder = '/Users/gible/오픈소스sw/Train_Data/train_data/Embedding_data'

# JSON 파일 로드
with open(json_path, 'r') as f:
    metadata = json.load(f)

# 결과 저장 폴더 생성
os.makedirs(output_folder, exist_ok=True)

# Prewhiten 함수 정의
def prewhiten(x):
    mean = np.mean(x)
    std = np.std(x)
    std_adj = np.maximum(std, 1.0/np.sqrt(x.size))
    y = (x - mean) / std_adj
    return y

def process_video(video_path, label, output_path):
    cap = cv2.VideoCapture(video_path)
    embeddings = []
    frame_count = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        # OpenCV 이미지에서 PIL 이미지로 변환
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        pil_image = Image.fromarray(frame_rgb)

        # MTCNN을 사용하여 얼굴 추출 (CPU)
        boxes, _ = mtcnn.detect(pil_image)
        
        if boxes is not None:
            faces = []
            for box in boxes:
                # 얼굴 이미지 crop 및 리사이즈
                face = pil_image.crop((box[0], box[1], box[2], box[3])).resize((160, 160))
                faces.append(face)

            # 얼굴 임베딩 계산 (MPS)
            faces = [prewhiten(np.array(face)) for face in faces]
            face_tensors = torch.stack([torch.tensor(face, dtype=torch.float32).permute(2, 0, 1) for face in faces]).to(device)
            with torch.no_grad():
                face_embeddings = resnet(face_tensors)
                embeddings.append(face_embeddings.cpu().numpy())
        
        frame_count += 1

        # 예시로 매 10 프레임마다 추출
        if frame_count >= 10:
            break

    cap.release()

    # 임베딩 배열로 저장
    embeddings = np.vstack(embeddings) if embeddings else np.array([])
    if embeddings.size > 0:
        np.save(output_path, embeddings)

# 각 동영상 처리
for video_name, data in metadata.items():
    label = data['label']
    video_path = os.path.join(video_folder, video_name)
    output_path = os.path.join(output_folder, f"{video_name.split('.')[0]}_embedding.npy")

    if os.path.exists(video_path):
        process_video(video_path, label, output_path)
        print(f"Processed {video_name} with label {label}")
    else:
        print(f"Video file {video_name} not found")

Using device for FaceNet: mps
Processed aagfhgtpmv.mp4 with label FAKE
Processed aapnvogymq.mp4 with label FAKE
Processed abarnvbtwb.mp4 with label REAL
Processed abofeumbvv.mp4 with label FAKE
Processed abqwwspghj.mp4 with label FAKE
Processed acifjvzvpm.mp4 with label FAKE
Processed acqfdwsrhi.mp4 with label FAKE
Processed acxnxvbsxk.mp4 with label FAKE
Processed acxwigylke.mp4 with label FAKE
Processed aczrgyricp.mp4 with label FAKE
Processed adhsbajydo.mp4 with label FAKE
Processed adohikbdaz.mp4 with label FAKE
Processed adylbeequz.mp4 with label FAKE
Processed aelfnikyqj.mp4 with label REAL
Processed aelzhcnwgf.mp4 with label FAKE
Processed aettqgevhz.mp4 with label FAKE
Processed aevrfsexku.mp4 with label FAKE
Processed afoovlsmtx.mp4 with label REAL
Processed agdkmztvby.mp4 with label FAKE
Processed agqphdxmwt.mp4 with label FAKE
Processed agrmhtjdlk.mp4 with label REAL
Processed ahbweevwpv.mp4 with label FAKE
Processed ahdbuwqxit.mp4 with label FAKE
Processed ahfazfbntc.mp4 wi

In [None]:
|