## train.ipynb
1. 기본 설정
2. Xception 모델  
2-1. Xception 구현  
2-2. Pretrained Weight  
3. 훈련(Fine Tuning)  
3-1. 전처리  
3-2. Train/Validate 함수  
3-3. 훈련

In [101]:

!nvidia-smi

Sun Dec 15 14:17:25 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 566.36                 Driver Version: 566.36         CUDA Version: 12.7     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4070 ...  WDDM  |   00000000:01:00.0  On |                  N/A |
|  0%   48C    P3             52W /  220W |    7375MiB /  12282MiB |     39%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

## 1. 기본 설정

In [102]:

import easydict
import os
import sys
from PIL import Image
import tqdm
import shutil

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision.transforms as transforms
import torch.utils.data as data
import torch.backends.cudnn as cudnn
from torchvision import transforms

In [103]:

import os

# 현재 작업 디렉토리 확인
current_directory = os.getcwd()
print(f"현재 작업 디렉토리: {current_directory}")

# 이동할 디렉토리 설정
target_directory = r"D:\01.deepfake"

# 디렉토리 이동
try:
    os.chdir(target_directory)
    print(f"디렉토리를 이동했습니다: {os.getcwd()}")
except FileNotFoundError:
    print(f"지정된 경로가 존재하지 않습니다: {target_directory}")
except PermissionError:
    print(f"지정된 경로에 접근할 권한이 없습니다: {target_directory}")

현재 작업 디렉토리: D:\01.deepfake
디렉토리를 이동했습니다: D:\01.deepfake


In [104]:

cudnn.benchmark = True

args = easydict.EasyDict({
    "gpu": 0,  # 현재 사용 가능한 GPU 번호
    "num_workers": 4,  # 일반 PC에 맞게 32에서 4로 조정

    # 실제 데이터 경로로 수정
    "root": "D:/01.deepfake/dfdc_train_part_0 - 복사본",
    "train_list": "D:/01.deepfake/dfdc_train_part_0 - 복사본/train_list_1st.txt",
    "valid_list": "D:/01.deepfake/dfdc_train_part_0 - 복사본/test_list_1st.txt",

    # 학습 관련 파라미터
    "learning_rate": 0.001,
    "num_epochs": 1,
    "batch_size": 16,  # GPU 메모리에 맞게 32에서 16으로 조정

    # 모델 저장 경로
    "save_fn": "D:/01.deepfake/models/deepfake_c0_xception_tuned.pth.tar",
})

# 필요한 디렉토리 생성
os.makedirs("D:/01.deepfake/models", exist_ok=True)

# train_list_1st.txt와 test_list_1st.txt 파일이 없다면 생성하는 코드 추가
def create_data_list_files():
    train_images = []  # 학습 이미지 경로 리스트
    test_images = []   # 테스트 이미지 경로 리스트
    
    # train_list_1st.txt 생성
    with open(args.train_list, 'w') as f:
        for img_path in train_images:
            label = 0  # 또는 1 (실제 라벨에 따라)
            f.write(f"{img_path} {label}\n")
    
    # test_list_1st.txt 생성
    with open(args.valid_list, 'w') as f:
        for img_path in test_images:
            label = 0  # 또는 1 (실제 라벨에 따라)
            f.write(f"{img_path} {label}\n")

# 파일 존재 여부 확인 전에 리스트 파일 생성
if not os.path.isfile(args.train_list) or not os.path.isfile(args.valid_list):
    create_data_list_files()

# 경로 확인
assert os.path.isfile(args.train_list), f'Training list not found at: {args.train_list}'
assert os.path.isfile(args.valid_list), f'Validation list not found at: {args.valid_list}'

## 2. 모델
- 참고문헌\[1]\[2]에 따르면 Xception\[3] 모델이 변조 영상 탐지에 가장 좋은 성능을 보여주어 해당 모델을 기본 모델로 선정

\[1] FaceForensics++: Learning to Detect Manipulated Facial Images, ICCV 2019.  
\[2] A Large-scale Challenging Dataset for DeepFace Forensics, CVPR 2020.  
\[3] Xception: Deep Learning with Depthwise Seperable Convolutions, CVPR 2017.

### 3-1 Xception 구현


In [105]:
"""
Author: Andreas Rössler,
Implemented in https://github.com/ondyari/FaceForensics under MIT license
"""


class SeparableConv2d(nn.Module):
    def __init__(self,in_channels,out_channels,kernel_size=1,stride=1,padding=0,dilation=1,bias=False):
        super(SeparableConv2d,self).__init__()

        self.conv1 = nn.Conv2d(in_channels,in_channels,kernel_size,stride,padding,dilation,groups=in_channels,bias=bias)
        self.pointwise = nn.Conv2d(in_channels,out_channels,1,1,0,1,1,bias=bias)

    def forward(self,x):
        x = self.conv1(x)
        x = self.pointwise(x)
        return x


class Block(nn.Module):
    def __init__(self,in_filters,out_filters,reps,strides=1,start_with_relu=True,grow_first=True):
        super(Block, self).__init__()

        if out_filters != in_filters or strides!=1:
            self.skip = nn.Conv2d(in_filters,out_filters,1,stride=strides, bias=False)
            self.skipbn = nn.BatchNorm2d(out_filters)
        else:
            self.skip=None

        self.relu = nn.ReLU(inplace=True)
        rep=[]

        filters=in_filters
        if grow_first:
            rep.append(self.relu)
            rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False))
            rep.append(nn.BatchNorm2d(out_filters))
            filters = out_filters

        for i in range(reps-1):
            rep.append(self.relu)
            rep.append(SeparableConv2d(filters,filters,3,stride=1,padding=1,bias=False))
            rep.append(nn.BatchNorm2d(filters))

        if not grow_first:
            rep.append(self.relu)
            rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False))
            rep.append(nn.BatchNorm2d(out_filters))

        if not start_with_relu:
            rep = rep[1:]
        else:
            rep[0] = nn.ReLU(inplace=False)

        if strides != 1:
            rep.append(nn.MaxPool2d(3,strides,1))
        self.rep = nn.Sequential(*rep)

    def forward(self,inp):
        x = self.rep(inp)

        if self.skip is not None:
            skip = self.skip(inp)
            skip = self.skipbn(skip)
        else:
            skip = inp

        x+=skip
        return x


class Xception(nn.Module):
    def __init__(self, num_classes=1000):
        super(Xception, self).__init__()
        self.num_classes = num_classes

        self.conv1 = nn.Conv2d(3,32,3,2,0,bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(32,64,3,bias=False)
        self.bn2 = nn.BatchNorm2d(64)

        self.block1=Block(64,128,2,2,start_with_relu=False,grow_first=True)
        self.block2=Block(128,256,2,2,start_with_relu=True,grow_first=True)
        self.block3=Block(256,728,2,2,start_with_relu=True,grow_first=True)

        self.block4=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block5=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block6=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block7=Block(728,728,3,1,start_with_relu=True,grow_first=True)

        self.block8=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block9=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block10=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block11=Block(728,728,3,1,start_with_relu=True,grow_first=True)

        self.block12=Block(728,1024,2,2,start_with_relu=True,grow_first=False)

        self.conv3 = SeparableConv2d(1024,1536,3,1,1)
        self.bn3 = nn.BatchNorm2d(1536)

        self.conv4 = SeparableConv2d(1536,2048,3,1,1)
        self.bn4 = nn.BatchNorm2d(2048)

        self.fc = nn.Linear(2048, num_classes)

    def features(self, input):
        x = self.conv1(input)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)

        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.block5(x)
        x = self.block6(x)
        x = self.block7(x)
        x = self.block8(x)
        x = self.block9(x)
        x = self.block10(x)
        x = self.block11(x)
        x = self.block12(x)

        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu(x)

        x = self.conv4(x)
        x = self.bn4(x)
        return x

    def logits(self, features):
        x = self.relu(features)

        x = F.adaptive_avg_pool2d(x, (1, 1)) 
        x = x.view(x.size(0), -1)
        x = self.last_linear(x)
        return x

    def forward(self, input):
        x = self.features(input)
        x = self.logits(x)
        return x


## 기존 Xception에 Dropout만 추가
class xception(nn.Module):
    def __init__(self, num_out_classes=2, dropout=0.5):
        super(xception, self).__init__()

        self.model = Xception(num_classes=num_out_classes)
        self.model.last_linear = self.model.fc
        del self.model.fc

        num_ftrs = self.model.last_linear.in_features
        if not dropout:
            self.model.last_linear = nn.Linear(num_ftrs, num_out_classes)
        else:            
            self.model.last_linear = nn.Sequential(
                nn.Dropout(p=dropout),
                nn.Linear(num_ftrs, num_out_classes)
            )

    def forward(self, x):
        x = self.model(x)
        return x

### 2-2. Pretrained Weight
- FaceForensics++ 데이터로 학습된 pre-trained model weight 다운\[4]

\[4]https://github.com/HongguLiu/Deepfake-Detection

In [106]:
!wget -O deepfake_c0_xception.pkl --no-check-certificate 'https://docs.google.com/uc?export=download&id=1eHRN117X0loEff7EBk1mGMJeGbGKsd7m'

'wget'��(��) ���� �Ǵ� �ܺ� ����, ������ �� �ִ� ���α׷�, �Ǵ�
��ġ ������ �ƴմϴ�.
'id'��(��) ���� �Ǵ� �ܺ� ����, ������ �� �ִ� ���α׷�, �Ǵ�
��ġ ������ �ƴմϴ�.


## 3. 훈련

### 3-1 전처리
- dlib을 이용하여 얼굴만 Cropping하여 학습하였을 때, 0.94점에서 올라가지 못하였음
- 얼굴 추출없이 CenterCrop하여 학습하였을 때, 가장 좋은 점수 0.97점을 얻음
- 그 외 설정은 Pre-train에 사용된 설정과 같음

In [107]:
xception_default = {
    'train': transforms.Compose([transforms.CenterCrop((299, 299)),
                                 transforms.ToTensor(),
                                 transforms.RandomHorizontalFlip(),
                                 transforms.Normalize([0.5]*3, [0.5]*3),
                                 ]),
    'valid': transforms.Compose([transforms.CenterCrop((299, 299)),
                                 transforms.ToTensor(),
                                 transforms.Normalize([0.5]*3, [0.5]*3),
                                 ]),
    'test': transforms.Compose([transforms.CenterCrop((299, 299)),
                                transforms.ToTensor(),
                                transforms.Normalize([0.5] * 3, [0.5] * 3),
                                ]),
}

### 3-2 Train/Validate/Dataset 함수
- 홈페이지 제공 함수를 수정하여 사용

In [108]:
# util

def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')

def adjust_learning_rate(optimizer, epoch, args):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = args.lr * (0.1 ** (epoch // 30))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [109]:
# custom dataset

class ImageRecord(object):
    def __init__(self, row):
        self._data = row

    @property
    def path(self):
        return self._data[0]

    @property
    def label(self):
        return int(self._data[1])


class DFDCDatatset(data.Dataset):
    def __init__(self, root_path, list_file, transform=None):
        self.root_path = root_path
        self.list_file = list_file
        self.transform = transform

        self._parse_list()

    def _load_image(self, image_path):
        return Image.open(image_path).convert('RGB')

    def _parse_list(self):
        self.image_list = [ImageRecord(x.strip().split(' ')) for x in open(self.list_file)]

    def __getitem__(self, index):
        record = self.image_list[index]
        image_name = os.path.join(self.root_path, record.path)
        image = self._load_image(image_name)

        if self.transform is not None:
            image = self.transform(image)

        return image, record.label

    def __len__(self):
        return len(self.image_list)

In [110]:
# train / validate

def train(train_loader, model, criterion, optimizer, epoch):   
    n = 0
    running_loss = 0.0
    running_corrects = 0

    model.train()

    with tqdm.tqdm(train_loader, total=len(train_loader), desc="Train", file=sys.stdout) as iterator:
        for images, target in iterator:
            if args.gpu is not None:
                images = images.cuda(args.gpu, non_blocking=True)
                target = target.cuda(args.gpu, non_blocking=True)

            outputs = model(images)
            _, pred = torch.max(outputs.data, 1)

            loss = criterion(outputs, target)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            n += images.size(0)
            running_loss += loss.item() * images.size(0)
            running_corrects += torch.sum(pred == target.data)

            epoch_loss = running_loss / float(n)
            epoch_acc = running_corrects / float(n)

            log = 'loss - {:.4f}, acc - {:.3f}'.format(epoch_loss, epoch_acc)
            iterator.set_postfix_str(log)

    scheduler.step()


def validate(test_loader, model, criterion):
    n = 0
    running_loss = 0.0
    running_corrects = 0

    model.eval()

    with tqdm.tqdm(valid_loader, total=len(valid_loader), desc="Valid", file=sys.stdout) as iterator:
        for images, target in iterator:
            if args.gpu is not None:
                images = images.cuda(args.gpu, non_blocking=True)
                target = target.cuda(args.gpu, non_blocking=True)

            with torch.no_grad():
                output = model(images)

            loss = criterion(output, target)
            _, pred = torch.max(output.data, 1)

            n += images.size(0)
            running_loss += loss.item() * images.size(0)
            running_corrects += torch.sum(pred == target.data)

            epoch_loss = running_loss / float(n)
            epoch_acc = running_corrects / float(n)

            log = 'loss - {:.4f}, acc - {:.3f}'.format(epoch_loss, epoch_acc)
            iterator.set_postfix_str(log)

    return epoch_acc

In [111]:
# import gdown

# # 모델을 저장할 디렉토리 생성
# os.makedirs("D:/01.deepfake/models", exist_ok=True)

# # Google Drive에서 파일 다운로드
# url = "https://docs.google.com/uc?export=download&id=1eHRN117X0loEff7EBk1mGMJeGbGKsd7m"
# output = "D:/01.deepfake/models/deepfake_c0_xception.pkl"
# gdown.download(url, output, quiet=False)

### 3-3. 훈련
- 하이퍼파라미터
  - Batch=32
  - Adam(lr=0.001)
  - Epoch=1, 2회 이상시 오버핏 발생
  - lr scheduler 미사용


In [112]:
model = xception(num_out_classes=2, dropout=0.5)
print("=> creating model '{}'".format('xception'))
model = model.cuda(args.gpu)

# pretrained weight 파일 경로
pretrained_path = "D:/01.deepfake/models/deepfake_c0_xception.pkl"

# 파일 존재 확인
assert os.path.isfile(pretrained_path), f'Pretrained weight not found at: {pretrained_path}'

# 모델 로드
model.load_state_dict(torch.load(pretrained_path))
print("=> model weight '{}' is loaded".format(pretrained_path))

# loss function과 optimizer 설정
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08)
scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

=> creating model 'xception'
=> model weight 'D:/01.deepfake/models/deepfake_c0_xception.pkl' is loaded


  model.load_state_dict(torch.load(pretrained_path))


In [113]:
train_dataset = DFDCDatatset(args.root,
                             args.train_list,
                             xception_default["train"],
                             )

valid_dataset = DFDCDatatset(args.root,
                             args.valid_list,
                             xception_default["valid"],
                             )

In [114]:
import json
import cv2
import os
from glob import glob
import random
import shutil

def extract_frames_from_videos():
    # 기본 경로 설정
    video_path = "D:/01.deepfake/dfdc_train_part_0 - 복사본"
    frames_path = "D:/01.deepfake/frames"
    
    # frames 디렉토리 생성
    os.makedirs(frames_path, exist_ok=True)
    
    # metadata.json 파일 읽기
    with open(os.path.join(video_path, "metadata.json"), "r") as f:
        metadata = json.load(f)
    
    print(f"Total videos in metadata: {len(metadata)}")
    
    # 각 비디오에서 프레임 추출
    for video_name, info in metadata.items():
        video_file = os.path.join(video_path, video_name)
        if not os.path.exists(video_file):
            continue
            
        # 레이블에 따른 저장 경로 설정
        label = info['label'].lower()
        save_dir = os.path.join(frames_path, label)
        os.makedirs(save_dir, exist_ok=True)
        
        # 프레임 파일 이름
        frame_name = f"{os.path.splitext(video_name)[0]}.jpg"
        save_path = os.path.join(save_dir, frame_name)
        
        # 이미 프레임이 존재하면 건너뜀
        if os.path.exists(save_path):
            print(f"Skipping already existing frame: {save_path}")
            continue
        
        # 비디오 열기
        cap = cv2.VideoCapture(video_file)
        
        # 중간 프레임 추출
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        middle_frame = total_frames // 2
        
        cap.set(cv2.CAP_PROP_POS_FRAMES, middle_frame)
        ret, frame = cap.read()
        
        if ret:
            # 프레임 저장
            cv2.imwrite(save_path, frame)
            print(f"Saved frame: {save_path}")
        
        cap.release()
    
    return frames_path

def create_train_valid_lists(frames_path, train_ratio=0.8):
    # real과 fake 이미지 경로 수집
    real_images = glob(os.path.join(frames_path, "real", "*.jpg"))
    fake_images = glob(os.path.join(frames_path, "fake", "*.jpg"))
    
    # 각각 섞기
    random.shuffle(real_images)
    random.shuffle(fake_images)
    
    # 8:2로 분할
    real_split = int(len(real_images) * train_ratio)
    fake_split = int(len(fake_images) * train_ratio)
    
    train_images = real_images[:real_split] + fake_images[:fake_split]
    valid_images = real_images[real_split:] + fake_images[fake_split:]
    
    random.shuffle(train_images)
    random.shuffle(valid_images)
    
    # 리스트 파일 생성
    base_path = "D:/01.deepfake/dfdc_train_part_0 - 복사본"
    
    with open(os.path.join(base_path, "train_list_1st.txt"), "w") as f:
        for img_path in train_images:
            label = "0" if "real" in img_path else "1"
            rel_path = os.path.relpath(img_path, base_path)
            f.write(f"{rel_path} {label}\n")
            
    with open(os.path.join(base_path, "test_list_1st.txt"), "w") as f:
        for img_path in valid_images:
            label = "0" if "real" in img_path else "1"
            rel_path = os.path.relpath(img_path, base_path)
            f.write(f"{rel_path} {label}\n")
    
    print(f"Created train list with {len(train_images)} images")
    print(f"Created validation list with {len(valid_images)} images")

# 프레임 추출 및 리스트 생성
print("Extracting frames from videos...")
frames_path = extract_frames_from_videos()

print("\nCreating train/validation lists...")
create_train_valid_lists(frames_path)


Extracting frames from videos...
Total videos in metadata: 5170
Skipping already existing frame: D:/01.deepfake/frames\fake\owxbbpjpch.jpg
Skipping already existing frame: D:/01.deepfake/frames\real\vpmyeepbep.jpg
Skipping already existing frame: D:/01.deepfake/frames\real\fzvpbrzssi.jpg
Skipping already existing frame: D:/01.deepfake/frames\fake\htorvhbcae.jpg
Skipping already existing frame: D:/01.deepfake/frames\fake\fckxaqjbxk.jpg
Skipping already existing frame: D:/01.deepfake/frames\fake\sphirandia.jpg
Skipping already existing frame: D:/01.deepfake/frames\fake\vsmadeuczx.jpg
Skipping already existing frame: D:/01.deepfake/frames\fake\ohaqlzfnuv.jpg
Skipping already existing frame: D:/01.deepfake/frames\fake\komngcqveq.jpg
Skipping already existing frame: D:/01.deepfake/frames\fake\iafvzgpbix.jpg
Skipping already existing frame: D:/01.deepfake/frames\fake\uaukglhmje.jpg
Skipping already existing frame: D:/01.deepfake/frames\real\syxobtuucp.jpg
Skipping already existing frame: D:/

In [115]:
import torch
import torch.utils.data as data
from torchvision import transforms
import os
from PIL import Image

# args 수정
args = easydict.EasyDict({
    "gpu": 0,
    "num_workers": 4,  # 시스템 사양에 맞게 조정

    # 경로 수정
    "root": "D:/01.deepfake/dfdc_train_part_0 - 복사본",
    "train_list": "D:/01.deepfake/dfdc_train_part_0 - 복사본/train_list_1st.txt",
    "valid_list": "D:/01.deepfake/dfdc_train_part_0 - 복사본/test_list_1st.txt",

    "learning_rate": 0.001,
    "num_epochs": 1,
    "batch_size": 16,  # GPU 메모리에 맞게 조정

    "save_fn": "D:/01.deepfake/models/deepfake_c0_xception_tuned.pth.tar",
})

# 데이터셋 생성
train_dataset = DFDCDatatset(
    args.root,
    args.train_list,
    xception_default["train"]
)

valid_dataset = DFDCDatatset(
    args.root,
    args.valid_list,
    xception_default["valid"]
)

print(f"Training dataset size: {len(train_dataset)}")
print(f"Validation dataset size: {len(valid_dataset)}")

# DataLoader 생성
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=args.batch_size,
    shuffle=True,
    num_workers=args.num_workers,
    pin_memory=True,
)

valid_loader = torch.utils.data.DataLoader(
    valid_dataset,
    batch_size=args.batch_size,
    shuffle=False,
    num_workers=args.num_workers,
    pin_memory=False,
)

Training dataset size: 4135
Validation dataset size: 1035


In [116]:
# # 1. DataLoader 설정 수정
# train_loader = torch.utils.data.DataLoader(
#     train_dataset,
#     batch_size=args.batch_size,
#     shuffle=True,
#     num_workers=0,  # worker 수를 0으로 설정하여 디버깅
#     pin_memory=True,
# )

# valid_loader = torch.utils.data.DataLoader(
#     valid_dataset,
#     batch_size=args.batch_size,
#     shuffle=False,
#     num_workers=0,  # worker 수를 0으로 설정하여 디버깅
#     pin_memory=False,
# )

# # 2. 학습 함수에 예외 처리 추가
# def train(train_loader, model, criterion, optimizer, epoch):   
#     try:
#         n = 0
#         running_loss = 0.0
#         running_corrects = 0

#         model.train()

#         with tqdm.tqdm(train_loader, total=len(train_loader), desc="Train", file=sys.stdout) as iterator:
#             for images, target in iterator:
#                 try:
#                     if args.gpu is not None:
#                         images = images.cuda(args.gpu, non_blocking=True)
#                         target = target.cuda(args.gpu, non_blocking=True)

#                     outputs = model(images)
#                     _, pred = torch.max(outputs.data, 1)

#                     loss = criterion(outputs, target)

#                     optimizer.zero_grad()
#                     loss.backward()
#                     optimizer.step()

#                     n += images.size(0)
#                     running_loss += loss.item() * images.size(0)
#                     running_corrects += torch.sum(pred == target.data)

#                     epoch_loss = running_loss / float(n)
#                     epoch_acc = running_corrects / float(n)

#                     log = 'loss - {:.4f}, acc - {:.3f}'.format(epoch_loss, epoch_acc)
#                     iterator.set_postfix_str(log)
                
#                 except Exception as e:
#                     print(f"Error in training batch: {str(e)}")
#                     continue

#         scheduler.step()
    
#     except Exception as e:
#         print(f"Error in training: {str(e)}")
#         raise e

# # 3. 실행
# print('-' * 50)
# print('Epoch {}/{}'.format(1, args.num_epochs))

# try:
#     train(train_loader, model, criterion, optimizer, 0)
#     acc = validate(valid_loader, model, criterion)

#     save_checkpoint(state={'epoch': args.num_epochs + 1,
#                           'state_dict': model.state_dict(),
#                           'best_acc1': acc,
#                           'optimizer': optimizer.state_dict(),},
#                    is_best=False,
#                    filename=args.save_fn,
#                    )
# except Exception as e:
#     print(f"Training failed: {str(e)}")

In [117]:
# # 디바이스 설정
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = model.to(device)  # 모델을 디바이스로 이동

# for epoch in range(args.num_epochs):
#     model.train()
#     train_loss = 0
#     for inputs, labels in train_loader:  # 튜플 언패킹
#         # 데이터를 디바이스로 이동
#         inputs = inputs.to(device)
#         labels = labels.to(device)

#         # Forward, backward, optimize
#         optimizer.zero_grad()
#         outputs = model(inputs)
#         loss = criterion(outputs, labels)
#         loss.backward()
#         optimizer.step()
#         train_loss += loss.item()

#     # Validation 루프
#     model.eval()
#     valid_loss = 0
#     correct = 0
#     total = 0
#     with torch.no_grad():
#         for inputs, labels in valid_loader:  # 튜플 언패킹
#             # 데이터를 디바이스로 이동
#             inputs = inputs.to(device)
#             labels = labels.to(device)

#             outputs = model(inputs)
#             loss = criterion(outputs, labels)
#             valid_loss += loss.item()
            
#             # Accuracy 계산
#             _, predicted = torch.max(outputs, 1)
#             correct += (predicted == labels).sum().item()
#             total += labels.size(0)

#     valid_accuracy = correct / total  # Validation accuracy 계산

#     print(f"Epoch {epoch + 1}/{args.num_epochs} - Train Loss: {train_loss:.4f}, "
#           f"Valid Loss: {valid_loss:.4f}, Valid Accuracy: {valid_accuracy:.4f}")

#     # 가장 좋은 모델만 저장
#     best_metric = save_best_model(model, optimizer, epoch, save_path, best_metric, valid_accuracy)


In [118]:
# # 1. DataLoader 설정
# train_loader = torch.utils.data.DataLoader(
#     train_dataset,
#     batch_size=args.batch_size,
#     shuffle=True,
#     num_workers=0,  # worker 수를 0으로 설정하여 디버깅
#     pin_memory=True,
# )

# valid_loader = torch.utils.data.DataLoader(
#     valid_dataset,
#     batch_size=args.batch_size,
#     shuffle=False,
#     num_workers=0,  # worker 수를 0으로 설정하여 디버깅
#     pin_memory=False,
# )
# args = easydict.EasyDict({
#     "batch_size": 32,
#     "num_epochs": 10,
#     "gpu": 0,
#     "save_dir": "./checkpoints"  # 모델 저장 경로 설정
# })

# # 2. 학습 함수 수정
# def train(train_loader, model, criterion, optimizer, epoch):   
#     try:
#         n = 0
#         running_loss = 0.0
#         running_corrects = 0

#         model.train()

#         with tqdm.tqdm(train_loader, total=len(train_loader), desc=f"Train Epoch {epoch}", file=sys.stdout) as iterator:
#             for images, target in iterator:
#                 try:
#                     if args.gpu is not None:
#                         images = images.cuda(args.gpu, non_blocking=True)
#                         target = target.cuda(args.gpu, non_blocking=True)

#                     outputs = model(images)
#                     _, pred = torch.max(outputs.data, 1)

#                     loss = criterion(outputs, target)

#                     optimizer.zero_grad()
#                     loss.backward()
#                     optimizer.step()

#                     n += images.size(0)
#                     running_loss += loss.item() * images.size(0)
#                     running_corrects += torch.sum(pred == target.data)

#                     epoch_loss = running_loss / float(n)
#                     epoch_acc = running_corrects / float(n)

#                     log = 'loss - {:.4f}, acc - {:.3f}'.format(epoch_loss, epoch_acc)
#                     iterator.set_postfix_str(log)
                
#                 except Exception as e:
#                     print(f"Error in training batch: {str(e)}")
#                     continue

#         scheduler.step()
    
#     except Exception as e:
#         print(f"Error in training: {str(e)}")
#         raise e

# # 3. 검증 함수 (이미 존재한다고 가정, 필요시 추가 작성)
# def validate(valid_loader, model, criterion):
#     model.eval()
#     running_loss = 0.0
#     running_corrects = 0
#     n = 0

#     with torch.no_grad():
#         with tqdm.tqdm(valid_loader, total=len(valid_loader), desc="Validate", file=sys.stdout) as iterator:
#             for images, target in iterator:
#                 if args.gpu is not None:
#                     images = images.cuda(args.gpu, non_blocking=True)
#                     target = target.cuda(args.gpu, non_blocking=True)

#                 outputs = model(images)
#                 _, pred = torch.max(outputs.data, 1)
#                 loss = criterion(outputs, target)

#                 n += images.size(0)
#                 running_loss += loss.item() * images.size(0)
#                 running_corrects += torch.sum(pred == target.data)

#     val_loss = running_loss / float(n)
#     val_acc = running_corrects / float(n)
#     return val_loss, val_acc

# # 4. 실행 (Best 모델 저장 로직 포함)
# best_accuracy = 0.0  # 최고 정확도 초기화
# save_dir = args.save_dir  # 모델 저장 경로

# print('-' * 50)
# for epoch in range(args.num_epochs):
#     print(f"Epoch {epoch + 1}/{args.num_epochs}")
#     try:
#         # 학습
#         train(train_loader, model, criterion, optimizer, epoch)
        
#         # 검증
#         val_loss, val_acc = validate(valid_loader, model, criterion)
#         print(f"Validation Loss: {val_loss:.4f}, Accuracy: {val_acc:.4f}")
        
#         # 베스트 모델 저장
#         if val_acc > best_accuracy:
#             best_accuracy = val_acc
#             save_path = f"{save_dir}/best_model_epoch_{epoch + 1}.pth"
#             torch.save({
#                 'epoch': epoch + 1,
#                 'model_state_dict': model.state_dict(),
#                 'optimizer_state_dict': optimizer.state_dict(),
#                 'accuracy': best_accuracy,
#             }, save_path)
#             print(f"New best model saved at epoch {epoch + 1} with accuracy: {best_accuracy:.4f}")

#     except Exception as e:
#         print(f"Training failed at epoch {epoch + 1}: {str(e)}")

In [None]:
from torch.utils.tensorboard import SummaryWriter
import torch.utils.data
import tqdm
import sys
import easydict

# 1. DataLoader 설정 (기존과 동일)
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=args.batch_size,
    shuffle=True,
    num_workers=0,
    pin_memory=True,
)

valid_loader = torch.utils.data.DataLoader(
    valid_dataset,
    batch_size=args.batch_size,
    shuffle=False,
    num_workers=0,
    pin_memory=False,
)

# args에 tensorboard 로그 디렉토리 추가
args = easydict.EasyDict({
    "batch_size": 32,
    "num_epochs": 30,
    "gpu": 0,
    "save_dir": "./checkpoints",
    "log_dir": "./runs"  # tensorboard 로그 저장 경로
})

# 2. 학습 함수 수정 (TensorBoard 추가)
def train(train_loader, model, criterion, optimizer, epoch, writer):   
    try:
        n = 0
        running_loss = 0.0
        running_corrects = 0

        model.train()

        with tqdm.tqdm(train_loader, total=len(train_loader), desc=f"Train Epoch {epoch}", file=sys.stdout) as iterator:
            for i, (images, target) in enumerate(iterator):
                try:
                    if args.gpu is not None:
                        images = images.cuda(args.gpu, non_blocking=True)
                        target = target.cuda(args.gpu, non_blocking=True)

                    outputs = model(images)
                    _, pred = torch.max(outputs.data, 1)

                    loss = criterion(outputs, target)

                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                    n += images.size(0)
                    running_loss += loss.item() * images.size(0)
                    running_corrects += torch.sum(pred == target.data)

                    epoch_loss = running_loss / float(n)
                    epoch_acc = running_corrects.float() / float(n)

                    # TensorBoard에 배치별 손실과 정확도 기록
                    step = epoch * len(train_loader) + i
                    writer.add_scalar('Training/Batch Loss', loss.item(), step)
                    writer.add_scalar('Training/Batch Accuracy', torch.sum(pred == target.data).float() / images.size(0), step)

                    log = 'loss - {:.4f}, acc - {:.3f}'.format(epoch_loss, epoch_acc)
                    iterator.set_postfix_str(log)
                
                except Exception as e:
                    print(f"Error in training batch: {str(e)}")
                    continue

        # TensorBoard에 에폭별 평균 손실과 정확도 기록
        writer.add_scalar('Training/Epoch Loss', epoch_loss, epoch)
        writer.add_scalar('Training/Epoch Accuracy', epoch_acc, epoch)
        writer.add_scalar('Training/Learning Rate', optimizer.param_groups[0]['lr'], epoch)

        scheduler.step()
    
    except Exception as e:
        print(f"Error in training: {str(e)}")
        raise e

# 3. 검증 함수 수정 (TensorBoard 추가)
def validate(valid_loader, model, criterion, epoch, writer):
    model.eval()
    running_loss = 0.0
    running_corrects = 0
    n = 0

    with torch.no_grad():
        with tqdm.tqdm(valid_loader, total=len(valid_loader), desc="Validate", file=sys.stdout) as iterator:
            for images, target in iterator:
                if args.gpu is not None:
                    images = images.cuda(args.gpu, non_blocking=True)
                    target = target.cuda(args.gpu, non_blocking=True)

                outputs = model(images)
                _, pred = torch.max(outputs.data, 1)
                loss = criterion(outputs, target)

                n += images.size(0)
                running_loss += loss.item() * images.size(0)
                running_corrects += torch.sum(pred == target.data)

    val_loss = running_loss / float(n)
    val_acc = running_corrects.float() / float(n)

    # TensorBoard에 검증 결과 기록
    writer.add_scalar('Validation/Loss', val_loss, epoch)
    writer.add_scalar('Validation/Accuracy', val_acc, epoch)

    return val_loss, val_acc

# 4. 실행 (TensorBoard 설정 추가)
best_accuracy = 0.0
save_dir = args.save_dir

# TensorBoard writer 초기화
writer = SummaryWriter(args.log_dir)

# 모델 구조를 TensorBoard에 기록 (선택사항)
dummy_input = torch.rand(1, 3, 224, 224).cuda(args.gpu)  # 입력 크기에 맞게 수정
writer.add_graph(model, dummy_input)

print('-' * 50)
for epoch in range(args.num_epochs):
    print(f"Epoch {epoch + 1}/{args.num_epochs}")
    try:
        # 학습
        train(train_loader, model, criterion, optimizer, epoch, writer)
        
        # 검증
        val_loss, val_acc = validate(valid_loader, model, criterion, epoch, writer)
        print(f"Validation Loss: {val_loss:.4f}, Accuracy: {val_acc:.4f}")
        
        # 베스트 모델 저장
        if val_acc > best_accuracy:
            best_accuracy = val_acc
            save_path = f"{save_dir}/best_model_epoch_{epoch + 1}.pth"
            torch.save({
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'accuracy': best_accuracy,
            }, save_path)
            print(f"New best model saved at epoch {epoch + 1} with accuracy: {best_accuracy:.4f}")

    except Exception as e:
        print(f"Training failed at epoch {epoch + 1}: {str(e)}")

# 학습 완료 후 writer 종료
writer.close()

--------------------------------------------------
Epoch 1/30
Train Epoch 0: 100%|██████████| 259/259 [01:59<00:00,  2.17it/s, loss - 0.6566, acc - 0.641]
Validate: 100%|██████████| 65/65 [00:22<00:00,  2.90it/s]
Validation Loss: 0.6456, Accuracy: 0.6406
New best model saved at epoch 1 with accuracy: 0.6406
Epoch 2/30
Train Epoch 1: 100%|██████████| 259/259 [01:59<00:00,  2.18it/s, loss - 0.5048, acc - 0.783]
Validate: 100%|██████████| 65/65 [00:22<00:00,  2.91it/s]
Validation Loss: 0.4934, Accuracy: 0.8386
New best model saved at epoch 2 with accuracy: 0.8386
Epoch 3/30
Train Epoch 2: 100%|██████████| 259/259 [02:00<00:00,  2.14it/s, loss - 0.3853, acc - 0.842]
Validate: 100%|██████████| 65/65 [00:23<00:00,  2.82it/s]
Validation Loss: 0.3425, Accuracy: 0.8937
New best model saved at epoch 3 with accuracy: 0.8937
Epoch 4/30
Train Epoch 3: 100%|██████████| 259/259 [02:06<00:00,  2.04it/s, loss - 0.3249, acc - 0.882]
Validate: 100%|██████████| 65/65 [00:24<00:00,  2.66it/s]
Validation Lo

## infer.ipynb
1. 기본 설정
2. Xception 모델  
3. Inference 및 제출 양식 생성

## 1. 기본 설정

In [84]:
import os
import glob
import easydict
import tqdm
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision import transforms

In [85]:
args = easydict.EasyDict({    
    "gpu": 0,
    
    # 테스트 데이터 경로 수정
    "root_test": "D:/01.deepfake/test_videos",
    # 결과 파일 저장 경로 수정
    "source_file": "D:/01.deepfake/test_submission.csv",
    # 학습된 모델 파일 경로
    "save_fn": "D:/01.deepfake/models/deepfake_c0_xception_tuned.pth.tar",
})

# 모델 파일 존재 확인
assert os.path.isfile(args.save_fn), f'Model file not found at: {args.save_fn}'

# 테스트 데이터 디렉토리 존재 확인
assert os.path.isdir(args.root_test), f'Test directory not found at: {args.root_test}'

# 결과 파일 저장 디렉토리 생성
os.makedirs(os.path.dirname(args.source_file), exist_ok=True)

## 2. 모델

In [86]:
"""
Author: Andreas Rössler,
Implemented in https://github.com/ondyari/FaceForensics under MIT license
"""

class SeparableConv2d(nn.Module):
    def __init__(self,in_channels,out_channels,kernel_size=1,stride=1,padding=0,dilation=1,bias=False):
        super(SeparableConv2d,self).__init__()

        self.conv1 = nn.Conv2d(in_channels,in_channels,kernel_size,stride,padding,dilation,groups=in_channels,bias=bias)
        self.pointwise = nn.Conv2d(in_channels,out_channels,1,1,0,1,1,bias=bias)

    def forward(self,x):
        x = self.conv1(x)
        x = self.pointwise(x)
        return x


class Block(nn.Module):
    def __init__(self,in_filters,out_filters,reps,strides=1,start_with_relu=True,grow_first=True):
        super(Block, self).__init__()

        if out_filters != in_filters or strides!=1:
            self.skip = nn.Conv2d(in_filters,out_filters,1,stride=strides, bias=False)
            self.skipbn = nn.BatchNorm2d(out_filters)
        else:
            self.skip=None

        self.relu = nn.ReLU(inplace=True)
        rep=[]

        filters=in_filters
        if grow_first:
            rep.append(self.relu)
            rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False))
            rep.append(nn.BatchNorm2d(out_filters))
            filters = out_filters

        for i in range(reps-1):
            rep.append(self.relu)
            rep.append(SeparableConv2d(filters,filters,3,stride=1,padding=1,bias=False))
            rep.append(nn.BatchNorm2d(filters))

        if not grow_first:
            rep.append(self.relu)
            rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False))
            rep.append(nn.BatchNorm2d(out_filters))

        if not start_with_relu:
            rep = rep[1:]
        else:
            rep[0] = nn.ReLU(inplace=False)

        if strides != 1:
            rep.append(nn.MaxPool2d(3,strides,1))
        self.rep = nn.Sequential(*rep)

    def forward(self,inp):
        x = self.rep(inp)

        if self.skip is not None:
            skip = self.skip(inp)
            skip = self.skipbn(skip)
        else:
            skip = inp

        x+=skip
        return x


class Xception(nn.Module):
    def __init__(self, num_classes=1000):
        super(Xception, self).__init__()
        self.num_classes = num_classes

        self.conv1 = nn.Conv2d(3,32,3,2,0,bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(32,64,3,bias=False)
        self.bn2 = nn.BatchNorm2d(64)

        self.block1=Block(64,128,2,2,start_with_relu=False,grow_first=True)
        self.block2=Block(128,256,2,2,start_with_relu=True,grow_first=True)
        self.block3=Block(256,728,2,2,start_with_relu=True,grow_first=True)

        self.block4=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block5=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block6=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block7=Block(728,728,3,1,start_with_relu=True,grow_first=True)

        self.block8=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block9=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block10=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block11=Block(728,728,3,1,start_with_relu=True,grow_first=True)

        self.block12=Block(728,1024,2,2,start_with_relu=True,grow_first=False)

        self.conv3 = SeparableConv2d(1024,1536,3,1,1)
        self.bn3 = nn.BatchNorm2d(1536)

        self.conv4 = SeparableConv2d(1536,2048,3,1,1)
        self.bn4 = nn.BatchNorm2d(2048)

        self.fc = nn.Linear(2048, num_classes)

    def features(self, input):
        x = self.conv1(input)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)

        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.block5(x)
        x = self.block6(x)
        x = self.block7(x)
        x = self.block8(x)
        x = self.block9(x)
        x = self.block10(x)
        x = self.block11(x)
        x = self.block12(x)

        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu(x)

        x = self.conv4(x)
        x = self.bn4(x)
        return x

    def logits(self, features):
        x = self.relu(features)

        x = F.adaptive_avg_pool2d(x, (1, 1)) 
        x = x.view(x.size(0), -1)
        x = self.last_linear(x)
        return x

    def forward(self, input):
        x = self.features(input)
        x = self.logits(x)
        return x

## 기존 Xception에 Dropout만 추가
class xception(nn.Module):
    def __init__(self, num_out_classes=2, dropout=0.5):
        super(xception, self).__init__()

        self.model = Xception(num_classes=num_out_classes)
        self.model.last_linear = self.model.fc
        del self.model.fc

        num_ftrs = self.model.last_linear.in_features
        if not dropout:
            self.model.last_linear = nn.Linear(num_ftrs, num_out_classes)
        else:            
            self.model.last_linear = nn.Sequential(
                nn.Dropout(p=dropout),
                nn.Linear(num_ftrs, num_out_classes)
            )

    def forward(self, x):
        x = self.model(x)
        return x

In [87]:
xception_default = {
    'train': transforms.Compose([transforms.CenterCrop((299, 299)),
                                 transforms.ToTensor(),
                                 transforms.RandomHorizontalFlip(),
                                 transforms.Normalize([0.5]*3, [0.5]*3),
                                 ]),
    'valid': transforms.Compose([transforms.CenterCrop((299, 299)),
                                 transforms.ToTensor(),
                                 transforms.Normalize([0.5]*3, [0.5]*3),
                                 ]),
    'test': transforms.Compose([transforms.CenterCrop((299, 299)),
                                transforms.ToTensor(),
                                transforms.Normalize([0.5] * 3, [0.5] * 3),
                                ]),
}

In [88]:
transform = xception_default['test']

## 2. Inference 및 제출 양식 생성

In [89]:
import os
import glob
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from PIL import Image
import cv2
from tqdm import tqdm
import numpy as np
from easydict import EasyDict

def get_default_args():
    """기본 설정값들을 반환하는 함수"""
    args = EasyDict()
    
    # GPU 설정
    args.gpu = 0
    
    # 경로 설정
    args.root_test = "D:/01.deepfake/test_videos"
    args.source_file = "D:/01.deepfake/test_submission.csv"
    args.save_dir = 'D:/01.deepfake/checkpoints'
    
    # 모델 설정
    args.num_classes = 2
    args.dropout = 0.5
    
    return args

def extract_frames(video_path, frames_dir):
    """비디오에서 중간 프레임을 추출하는 함수"""
    video_name = os.path.basename(video_path)
    frame_name = os.path.splitext(video_name)[0] + ".jpg"
    frame_path = os.path.join(frames_dir, frame_name)
    
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    middle_frame = total_frames // 2
    
    cap.set(cv2.CAP_PROP_POS_FRAMES, middle_frame)
    ret, frame = cap.read()
    
    if ret:
        cv2.imwrite(frame_path, frame)
        cap.release()
        return frame_path
    
    cap.release()
    return None

def load_best_model(model, checkpoint_dir):
    """저장된 가장 최근의 best 모델을 불러오는 함수"""
    # best_model이 포함된 모든 pth 파일 찾기
    model_files = glob.glob(os.path.join(checkpoint_dir, 'best_model_epoch_*.pth'))
    
    if not model_files:
        raise FileNotFoundError(f"No best model found in {checkpoint_dir}")
    
    # 파일의 수정 시간을 기준으로 정렬하여 가장 최근 파일 선택
    latest_model_path = max(model_files, key=os.path.getctime)
    
    # 모델 불러오기
    checkpoint = torch.load(latest_model_path)
    model.load_state_dict(checkpoint['model_state_dict'])  # 키 이름 수정
    
    print(f"Loaded latest best model from {os.path.basename(latest_model_path)}")
    print(f"Epoch: {checkpoint['epoch']}, Accuracy: {checkpoint['accuracy']:.4f}")
    
    return model

def inference(args, model, transform):
    """추론을 수행하는 함수"""
    # 디바이스 설정
    device = torch.device(f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    model.eval()
    
    # Softmax 레이어 설정
    softmax = nn.Softmax(dim=1)
    
    # 테스트 비디오 목록 가져오기
    test_videos = glob.glob(os.path.join(args.root_test, "*.mp4"))
    print(f"Found {len(test_videos)} test videos")
    
    # 프레임 저장 디렉토리 생성
    frames_dir = os.path.join(args.root_test, "frames")
    os.makedirs(frames_dir, exist_ok=True)
    
    # 결과 파일 생성
    os.makedirs(os.path.dirname(args.source_file), exist_ok=True)
    
    predictions = []
    with torch.no_grad():
        for video_path in tqdm(test_videos, desc="Processing videos"):
            try:
                # 프레임 추출
                frame_path = extract_frames(video_path, frames_dir)
                if frame_path is None:
                    print(f"Failed to extract frame from {video_path}")
                    continue
                
                # 이미지 전처리
                image = Image.open(frame_path)
                image_tensor = transform(image)
                image_tensor = image_tensor.unsqueeze(0).to(device)
                
                # 예측
                output = model(image_tensor)
                probabilities = softmax(output)[0]
                
                # 결과 저장
                video_name = os.path.basename(video_path)
                pred_label = 0 if probabilities[0] > probabilities[1] else 1
                predictions.append((video_name, pred_label))
                
            except Exception as e:
                print(f"Error processing {video_path}: {str(e)}")
    
    # 결과 저장
    with open(args.source_file, 'w') as f:
        f.write('path,y\n')
        for video_name, label in predictions:
            f.write(f"{video_name},{label}\n")
    
    # 예측 분포 출력
    labels = [p[1] for p in predictions]
    unique, counts = np.unique(labels, return_counts=True)
    print("\nPrediction distribution:")
    for label, count in zip(unique, counts):
        print(f"Label {label}: {count} ({count/len(labels)*100:.2f}%)")

def main():
    # 기본 설정값 가져오기
    args = get_default_args()
    
    # 전처리 설정
    transform = transforms.Compose([
        transforms.CenterCrop((299, 299)),
        transforms.ToTensor(),
        transforms.Normalize([0.5]*3, [0.5]*3)
    ])
    
    # 모델 생성
    model = xception(num_out_classes=args.num_classes, dropout=args.dropout)
    
    # 최고 성능 모델 불러오기
    model = load_best_model(model, args.save_dir)
    
    # 추론 수행
    inference(args, model, transform)

if __name__ == '__main__':
    main()

  checkpoint = torch.load(latest_model_path)


Loaded latest best model from best_model_epoch_1.pth
Epoch: 1, Accuracy: 0.8696
Found 400 test videos


Processing videos: 100%|██████████| 400/400 [01:30<00:00,  4.44it/s]


Prediction distribution:
Label 0: 158 (39.50%)
Label 1: 242 (60.50%)





In [90]:
import cv2
import os
import glob
import cv2
from tqdm import tqdm
from PIL import Image
import torch
import torch.nn as nn

def extract_frames_from_test_videos():
    # 테스트 비디오 경로에서 모든 mp4 파일 찾기
    test_videos = glob.glob(os.path.join(args.root_test, "*.mp4"))
    print(f"Found {len(test_videos)} test videos")
    
    # 프레임 저장 디렉토리
    frames_dir = os.path.join(args.root_test, "frames")
    os.makedirs(frames_dir, exist_ok=True)
    
    # 각 비디오에서 중간 프레임 추출
    extracted_frames = []
    for video_path in tqdm(test_videos, desc="Extracting frames"):
        # 비디오 파일명
        video_name = os.path.basename(video_path)
        frame_name = os.path.splitext(video_name)[0] + ".jpg"
        frame_path = os.path.join(frames_dir, frame_name)

        # 이미 프레임이 존재하는 경우 건너뛰기
        if os.path.exists(frame_path):
            print(f"Frame already exists for {video_name}, skipping...")
            extracted_frames.append(frame_path)
            continue
        
        # 비디오 읽기
        cap = cv2.VideoCapture(video_path)
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        middle_frame = total_frames // 2
        
        # 중간 프레임 추출
        cap.set(cv2.CAP_PROP_POS_FRAMES, middle_frame)
        ret, frame = cap.read()
        
        if ret:
            cv2.imwrite(frame_path, frame)
            extracted_frames.append(frame_path)
            
        cap.release()
    
    return extracted_frames

# 테스트 비디오에서 프레임 추출
print("Extracting frames from test videos...")
extracted_frames = extract_frames_from_test_videos()

# Inference 수행
print("Performing inference...")
m = nn.Softmax(dim=1)  # dim=1 추가하여 경고 메시지 제거

# 결과 파일 생성
save_csv = args.source_file
sc = open(save_csv, 'w')
sc.write('path,y\n')

with torch.no_grad():
    for image_path in tqdm(extracted_frames, desc="Inference"):
        try:
            # 이미지 로드 및 전처리
            image = Image.open(image_path)
            image = transform(image)
            image = torch.unsqueeze(image, dim=0)
            image = image.cuda(args.gpu, non_blocking=True)

            # 예측
            output = model(image)
            output = m(output)[0]

            # 파일명 추출 (mp4 확장자로 변경)
            video_name = os.path.splitext(os.path.basename(image_path))[0] + ".mp4"
            
            # 결과 저장
            label = "0" if output[0] > output[1] else "1"
            sc.write(f"{video_name},{label}\n")
            
        except Exception as e:
            print(f"Error processing {image_path}: {str(e)}")

sc.close()

# 결과 확인
print("\nChecking submission file...")
with open(save_csv, 'r') as f:
    lines = f.readlines()
    print(f"Total predictions: {len(lines)-1}")  # 헤더 제외
    print("\nFirst few lines:")
    for line in lines[:5]:
        print(line.strip())


Extracting frames from test videos...
Found 400 test videos


Extracting frames: 100%|██████████| 400/400 [00:00<00:00, 34714.59it/s]


Frame already exists for aassnaulhq.mp4, skipping...
Frame already exists for aayfryxljh.mp4, skipping...
Frame already exists for acazlolrpz.mp4, skipping...
Frame already exists for adohdulfwb.mp4, skipping...
Frame already exists for ahjnxtiamx.mp4, skipping...
Frame already exists for ajiyrjfyzp.mp4, skipping...
Frame already exists for aktnlyqpah.mp4, skipping...
Frame already exists for alrtntfxtd.mp4, skipping...
Frame already exists for aomqqjipcp.mp4, skipping...
Frame already exists for apedduehoy.mp4, skipping...
Frame already exists for apvzjkvnwn.mp4, skipping...
Frame already exists for aqrsylrzgi.mp4, skipping...
Frame already exists for axfhbpkdlc.mp4, skipping...
Frame already exists for ayipraspbn.mp4, skipping...
Frame already exists for bcbqxhziqz.mp4, skipping...
Frame already exists for bcvheslzrq.mp4, skipping...
Frame already exists for bdshuoldwx.mp4, skipping...
Frame already exists for bfdopzvxbi.mp4, skipping...
Frame already exists for bfjsthfhbd.mp4, skipp

Inference: 100%|██████████| 400/400 [00:09<00:00, 42.96it/s]


Checking submission file...
Total predictions: 400

First few lines:
path,y
aassnaulhq.mp4,0
aayfryxljh.mp4,1
acazlolrpz.mp4,1
adohdulfwb.mp4,0





In [None]:
import numpy as np
import torch
import torch.nn as nn
from PIL import Image
from tqdm import tqdm
import os
from glob import glob

print("Starting inference...")
m = nn.Softmax(dim=1)

# 결과 파일 열기
with open(save_csv, 'w') as sc:
    # 헤더 작성
    sc.write('path,y\n')
    
    with torch.no_grad():
        # 처음 5개 이미지 자세히 분석
        for idx, image_path in enumerate(extracted_frames[:5]):
            try:
                # 이미지 로드 및 전처리
                image = Image.open(image_path)
                print(f"\nProcessing image {idx}: {image_path}")
                print(f"Original image size: {image.size}")
                
                # 전처리 단계별 확인
                image_tensor = transform(image)
                print(f"Transformed tensor shape: {image_tensor.shape}")
                print(f"Tensor range: [{image_tensor.min():.2f}, {image_tensor.max():.2f}]")
                
                # 배치 차원 추가
                image_tensor = torch.unsqueeze(image_tensor, dim=0)
                image_tensor = image_tensor.cuda(args.gpu, non_blocking=True)

                # 예측
                output = model(image_tensor)
                print(f"Raw model output: {output[0].cpu().numpy()}")
                
                # Softmax 적용
                probabilities = m(output)[0]
                print(f"Probabilities: {probabilities.cpu().numpy()}")
                
                # 예측 결과
                pred_label = 0 if probabilities[0] > probabilities[1] else 1
                confidence = float(max(probabilities[0], probabilities[1]))
                
                print(f"Predicted label: {pred_label} (confidence: {confidence:.4f})")

                # 결과 저장
                video_name = os.path.splitext(os.path.basename(image_path))[0] + ".mp4"
                sc.write(f"{video_name},{pred_label}\n")

            except Exception as e:
                print(f"Error processing {image_path}: {str(e)}")

        # 나머지 이미지 처리
        for image_path in tqdm(extracted_frames[5:], desc="Processing remaining"):
            try:
                image = Image.open(image_path)
                image_tensor = transform(image)
                image_tensor = torch.unsqueeze(image_tensor, dim=0)
                image_tensor = image_tensor.cuda(args.gpu, non_blocking=True)
                
                output = model(image_tensor)
                probabilities = m(output)[0]
                pred_label = 0 if probabilities[0] > probabilities[1] else 1
                
                video_name = os.path.splitext(os.path.basename(image_path))[0] + ".mp4"
                sc.write(f"{video_name},{pred_label}\n")
            except Exception as e:
                print(f"Error processing {image_path}: {str(e)}")

print("\nChecking predictions distribution...")
# 예측 분포 확인
with open(save_csv, 'r') as f:
    lines = f.readlines()[1:]  # 헤더 제외
    predictions = [int(line.strip().split(',')[1]) for line in lines]
    unique, counts = np.unique(predictions, return_counts=True)
    for label, count in zip(unique, counts):
        print(f"Label {label}: {count} ({count/len(predictions)*100:.2f}%)")

Starting inference...

Processing image 0: D:/01.deepfake/test_videos\frames\aassnaulhq.jpg
Original image size: (1920, 1080)
Transformed tensor shape: torch.Size([3, 299, 299])
Tensor range: [-1.00, 0.15]
Raw model output: [ 0.5769574 -0.611828 ]
Probabilities: [0.7665238  0.23347625]
Predicted label: 0 (confidence: 0.7665)

Processing image 1: D:/01.deepfake/test_videos\frames\aayfryxljh.jpg
Original image size: (1920, 1080)
Transformed tensor shape: torch.Size([3, 299, 299])
Tensor range: [-1.00, 1.00]
Raw model output: [-0.6790594  0.6696099]
Probabilities: [0.206088 0.793912]
Predicted label: 1 (confidence: 0.7939)

Processing image 2: D:/01.deepfake/test_videos\frames\acazlolrpz.jpg
Original image size: (1920, 1080)
Transformed tensor shape: torch.Size([3, 299, 299])
Tensor range: [-1.00, 1.00]
Raw model output: [-1.3310783  1.4114515]
Probabilities: [0.06050992 0.93949   ]
Predicted label: 1 (confidence: 0.9395)

Processing image 3: D:/01.deepfake/test_videos\frames\adohdulfwb.j

Processing remaining: 100%|██████████| 395/395 [00:09<00:00, 42.43it/s]


Checking predictions distribution...
Label 0: 158 (39.50%)
Label 1: 242 (60.50%)





In [92]:
args = easydict.EasyDict({
    "gpu": 0,
    "num_workers": 4,
    "root": "D:/01.deepfake/dfdc_train_part_0 - 복사본",
    "train_list": "D:/01.deepfake/dfdc_train_part_0 - 복사본/train_list_1st.txt",
    "valid_list": "D:/01.deepfake/dfdc_train_part_0 - 복사본/test_list_1st.txt",
    "learning_rate": 0.001,
    "num_epochs": 1,
    "batch_size": 16,
    "save_fn": "D:/01.deepfake/models/deepfake_c0_xception_tuned.pth.tar",
})


In [93]:
def create_train_valid_lists(frames_path, train_ratio=0.8):
    real_images = glob(os.path.join(frames_path, "real", "*.jpg"))
    fake_images = glob(os.path.join(frames_path, "fake", "*.jpg"))

    random.shuffle(real_images)
    random.shuffle(fake_images)

    real_split = int(len(real_images) * train_ratio)
    fake_split = int(len(fake_images) * train_ratio)

    train_images = real_images[:real_split] + fake_images[:fake_split]
    valid_images = real_images[real_split:] + fake_images[fake_split:]


In [94]:
train_dataset = DFDCDatatset(
    args.root,
    args.train_list,
    xception_default["train"]
)

valid_dataset = DFDCDatatset(
    args.root,
    args.valid_list,
    xception_default["valid"]
)


In [95]:
# Dataset 생성
train_dataset = DFDCDatatset(args.root, args.train_list, xception_default["train"])
valid_dataset = DFDCDatatset(args.root, args.valid_list, xception_default["valid"])

print(f"Training dataset size: {len(train_dataset)}")
print(f"Validation dataset size: {len(valid_dataset)}")

# DataLoader 생성
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False)


Training dataset size: 4135
Validation dataset size: 1035


In [96]:
train_dataset = DFDCDatatset(args.root, args.train_list, xception_default["train"])
valid_dataset = DFDCDatatset(args.root, args.valid_list, xception_default["valid"])


In [97]:
import json
import os

def create_train_valid_lists(metadata_path, video_dir, output_dir, train_ratio=0.8):
    # Metadata 파일 로드
    with open(metadata_path, "r") as f:
        metadata = json.load(f)

    # REAL과 FAKE 데이터 분리
    real_videos = [video for video, info in metadata.items() if info["label"] == "REAL"]
    fake_videos = [video for video, info in metadata.items() if info["label"] == "FAKE"]

    # 섞기
    import random
    random.shuffle(real_videos)
    random.shuffle(fake_videos)

    # Train/Validation split
    real_split = int(len(real_videos) * train_ratio)
    fake_split = int(len(fake_videos) * train_ratio)

    train_videos = real_videos[:real_split] + fake_videos[:fake_split]
    valid_videos = real_videos[real_split:] + fake_videos[fake_split:]

    # 파일 생성
    os.makedirs(output_dir, exist_ok=True)

    def save_list(video_list, filename):
        with open(os.path.join(output_dir, filename), "w") as f:
            for video in video_list:
                label = "0" if video in real_videos else "1"  # 0: REAL, 1: FAKE
                f.write(f"{os.path.join(video_dir, video)} {label}\n")

    save_list(train_videos, "train_list.txt")
    save_list(valid_videos, "valid_list.txt")

    print(f"Train videos: {len(train_videos)} (REAL: {len(real_videos[:real_split])}, FAKE: {len(fake_videos[:fake_split])})")
    print(f"Validation videos: {len(valid_videos)} (REAL: {len(real_videos[real_split:])}, FAKE: {len(fake_videos[fake_split:])})")

# 경로 설정
metadata_path = "D:/01.deepfake/dfdc_train_part_0 - 복사본/metadata.json"
video_dir = "D:/01.deepfake/dfdc_train_part_0 - 복사본/videos"
output_dir = "D:/01.deepfake/dfdc_train_part_0 - 복사본"

# Train/Validation 리스트 생성
create_train_valid_lists(metadata_path, video_dir, output_dir)


Train videos: 4135 (REAL: 1923, FAKE: 2212)
Validation videos: 1035 (REAL: 481, FAKE: 554)


In [98]:
# import os
# import json

# # 폴더 경로 설정
# base_path = r"D:\임시\새 폴더"
# folders = [f"dfdc_train_part_{i}" for i in range(2, 10)]  # dfdc_train_part_2 to dfdc_train_part_9

# # REAL 데이터 갯수 계산
# real_counts = {}

# for folder in folders:
#     folder_path = os.path.join(base_path, folder)
#     metadata_path = os.path.join(folder_path, "metadata.json")
    
#     if os.path.exists(metadata_path):  # Check if metadata.json exists
#         with open(metadata_path, "r") as f:
#             metadata = json.load(f)
        
#         # Count REAL labels
#         real_count = sum(1 for item in metadata.values() if item["label"] == "REAL")
#         real_counts[folder] = real_count
#     else:
#         print(f"metadata.json not found in {folder_path}")

# # 결과 출력
# for folder, count in real_counts.items():
#     print(f"{folder}: {count} REAL videos")


In [99]:
# import os
# import json
# import shutil

# # 원본 폴더 및 대상 폴더 경로 설정
# source_base_path = r"D:\임시\새 폴더"
# destination_path = r"D:\01.deepfake\dfdc_train_part_0 - 복사본"
# folders = [f"dfdc_train_part_{i}" for i in range(2, 10)]  # dfdc_train_part_2 to dfdc_train_part_9

# # 대상 폴더에 metadata.json 파일 초기화
# metadata_file = os.path.join(destination_path, "metadata.json")
# if not os.path.exists(destination_path):
#     os.makedirs(destination_path)
# if not os.path.exists(metadata_file):
#     with open(metadata_file, "w") as f:
#         json.dump({}, f)

# # 기존 metadata.json 읽기
# with open(metadata_file, "r") as f:
#     combined_metadata = json.load(f)

# # REAL 데이터 복사 및 메타데이터 업데이트
# for folder in folders:
#     folder_path = os.path.join(source_base_path, folder)
#     metadata_path = os.path.join(folder_path, "metadata.json")
    
#     if os.path.exists(metadata_path):  # Check if metadata.json exists
#         with open(metadata_path, "r") as f:
#             metadata = json.load(f)
        
#         for file_name, data in metadata.items():
#             if data["label"] == "REAL":
#                 source_file = os.path.join(folder_path, file_name)
#                 destination_file = os.path.join(destination_path, file_name)
                
#                 if os.path.exists(source_file):  # Check if source file exists
#                     # 파일 복사
#                     shutil.copy2(source_file, destination_file)
                    
#                     # 메타데이터 추가
#                     combined_metadata[file_name] = data
#                 else:
#                     print(f"File not found: {source_file}")
#     else:
#         print(f"metadata.json not found in {folder_path}")

# # 업데이트된 metadata.json 저장
# with open(metadata_file, "w") as f:
#     json.dump(combined_metadata, f, indent=4)

# print("REAL 데이터 복사 및 메타데이터 업데이트 완료!")


In [100]:
# import os
# import json
# import shutil

# # 경로 설정
# source_folder = r"D:\임시\새 폴더\dfdc_train_part_2"
# destination_folder = r"D:\01.deepfake\dfdc_train_part_0 - 복사본"
# source_metadata_file = os.path.join(source_folder, "metadata.json")
# destination_metadata_file = os.path.join(destination_folder, "metadata.json")

# # 대상 폴더와 metadata.json 초기화
# if not os.path.exists(destination_folder):
#     os.makedirs(destination_folder)
# if not os.path.exists(destination_metadata_file):
#     with open(destination_metadata_file, "w") as f:
#         json.dump({}, f)

# # 기존 metadata.json 읽기
# with open(destination_metadata_file, "r") as f:
#     destination_metadata = json.load(f)

# # 원본 metadata.json 읽기
# if not os.path.exists(source_metadata_file):
#     print(f"metadata.json not found in {source_folder}")
# else:
#     with open(source_metadata_file, "r") as f:
#         source_metadata = json.load(f)

#     # FAKE 라벨의 동영상 파일 복사 및 메타데이터 업데이트
#     for file_name, data in source_metadata.items():
#         if data["label"] == "FAKE":
#             source_file = os.path.join(source_folder, file_name)
#             destination_file = os.path.join(destination_folder, file_name)
            
#             if os.path.exists(source_file):  # 파일이 존재하는 경우에만 처리
#                 # 파일 복사
#                 shutil.copy2(source_file, destination_file)
                
#                 # 메타데이터 업데이트
#                 destination_metadata[file_name] = data
#             else:
#                 print(f"File not found: {source_file}")

#     # 업데이트된 metadata.json 저장
#     with open(destination_metadata_file, "w") as f:
#         json.dump(destination_metadata, f, indent=4)

# print("FAKE 데이터 복사 및 메타데이터 업데이트 완료!")
