In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as T

import os, cv2, time
import tqdm
import numpy as np
import math
from itertools import product
import matplotlib.pyplot as plt
from PIL import Image
import io
import pickle

PROJECT_PATH = '.'
DATA_PATH = os.path.join(PROJECT_PATH, 'datas')
MODEL_PATH = os.path.join(PROJECT_PATH, 'checkpoints')
TRAIN_PT_PATH = os.path.join(PROJECT_PATH, 'datas', 'train_data.pt')
VALID_PT_PATH = os.path.join(PROJECT_PATH, 'datas', 'val_data.pt')
CHECKPOINT_PATH = os.path.join(PROJECT_PATH, 'checkpoints')

DATASET_LEN = 12880
BATCH_SIZE = 8
IMAGE_WIDTH = 320
IMAGE_HEIGHT = 256
IMAGE_LABELS = ['background', 'face']

print(torch.__version__)

2.7.0+cu128


In [2]:
def parse_box(data):
    x0 = int(data[0])
    y0 = int(data[1])
    w = int(data[2])
    h = int(data[3])
    return x0, y0, w, h

print('슝=3')

슝=3


In [3]:
def parse_widerface(file):
    infos = []
    with open(file) as fp:
        line = fp.readline()
        while line:
            n_object = int(fp.readline())
            boxes = []
            for i in range(n_object):
                box = fp.readline().split(' ')
                x0, y0, w, h = parse_box(box)
                if (w == 0) or (h == 0):
                    continue
                boxes.append([x0, y0, w, h])
            if n_object == 0:
                box = fp.readline().split(' ')
                x0, y0, w, h = parse_box(box)
                boxes.append([x0, y0, w, h])
            infos.append((line.strip(), boxes))
            line = fp.readline()
    return infos

print('슝=3')

슝=3


In [4]:
def process_image(image_file):
    try:
        with open(image_file, 'rb') as f:
            image_string = f.read()
            image_data = Image.open(io.BytesIO(image_string)).convert('RGB')
            image_data = torch.from_numpy(np.array(image_data)).permute(2, 0, 1)  # HWC to CHW
            return 0, image_string, image_data
    except Exception as e:
        return 1, image_string, None

print('슝=3')

슝=3


In [5]:
def xywh_to_voc(file_name, boxes, image_data):
    shape = image_data.shape
    image_info = {}
    image_info['filename'] = file_name
    image_info['width'] = shape[1]
    image_info['height'] = shape[0]
    image_info['depth'] = 3

    difficult = []
    classes = []
    xmin, ymin, xmax, ymax = [], [], [], []

    for box in boxes:
        classes.append(1)
        difficult.append(0)
        xmin.append(box[0])
        ymin.append(box[1])
        xmax.append(box[0] + box[2])
        ymax.append(box[1] + box[3])
    image_info['class'] = classes
    image_info['xmin'] = xmin
    image_info['ymin'] = ymin
    image_info['xmax'] = xmax
    image_info['ymax'] = ymax
    image_info['difficult'] = difficult

    return image_info

print('슝=3')

슝=3


In [6]:
file_path = os.path.join(DATA_PATH, 'wider_face_split', 'wider_face_train_bbx_gt.txt')
for i, info in enumerate(parse_widerface(file_path)):
    print('--------------------')
    image_file = os.path.join(DATA_PATH, 'WIDER_train', 'images', info[0])
    _, image_string, image_data = process_image(image_file)
    boxes = xywh_to_voc(image_file, info[1], image_data)
    print(boxes)
    if i > 3:
        break

--------------------
{'filename': '.\\datas\\WIDER_train\\images\\0--Parade/0_Parade_marchingband_1_849.jpg', 'width': 1385, 'height': 3, 'depth': 3, 'class': [1], 'xmin': [449], 'ymin': [330], 'xmax': [571], 'ymax': [479], 'difficult': [0]}
--------------------
{'filename': '.\\datas\\WIDER_train\\images\\0--Parade/0_Parade_Parade_0_904.jpg', 'width': 1432, 'height': 3, 'depth': 3, 'class': [1], 'xmin': [361], 'ymin': [98], 'xmax': [624], 'ymax': [437], 'difficult': [0]}
--------------------
{'filename': '.\\datas\\WIDER_train\\images\\0--Parade/0_Parade_marchingband_1_799.jpg', 'width': 768, 'height': 3, 'depth': 3, 'class': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'xmin': [78, 78, 113, 134, 163, 201, 182, 245, 304, 328, 389, 406, 436, 522, 643, 653, 793, 535, 29, 3, 20], 'ymin': [221, 238, 212, 260, 250, 218, 266, 279, 265, 295, 281, 293, 290, 328, 320, 224, 337, 311, 220, 232, 215], 'xmax': [85, 92, 124, 149, 177, 211, 197, 263, 320, 344, 406, 427, 458, 543,

In [7]:
import io

def make_example(image_string, image_infos):
    for info in image_infos:
        filename = info['filename']
        width = info['width']
        height = info['height']
        depth = info['depth']
        classes = info['class']
        xmin = info['xmin']
        ymin = info['ymin']
        xmax = info['xmax']
        ymax = info['ymax']

    # 이미지 데이터를 numpy 배열로 변환
    image_data = np.frombuffer(image_string, dtype=np.uint8)
    image_data = Image.open(io.BytesIO(image_data)).convert('RGB')
    image_data = np.array(image_data)

    # 데이터를 dict 형태로 저장
    example = {
        'filename': filename,
        'height': height,
        'width': width,
        'classes': classes,
        'xmin': xmin,
        'ymin': ymin,
        'xmax': xmax,
        'ymax': ymax,
        'image_raw': image_data
    }

    return example

In [9]:
import torch
import os
import tqdm
from PIL import Image

for split in ['train']:
    if split == 'train':
        output_file = TRAIN_PT_PATH
        anno_txt = 'wider_face_train_bbx_gt.txt'
        file_path = 'WIDER_train'
    else:
        output_file = VALID_PT_PATH
        anno_txt = 'wider_face_val_bbx_gt.txt'
        file_path = 'WIDER_val'

    dataset = []  # 데이터를 저장할 리스트

    for info in tqdm.tqdm(parse_widerface(os.path.join(DATA_PATH, 'wider_face_split', anno_txt))):
        image_file = os.path.join(DATA_PATH, file_path, 'images', info[0])
        error, image_string, image_data = process_image(image_file)
        boxes = xywh_to_voc(image_file, info[1], image_data)

        if not error:
            example = make_example(image_string, [boxes])
            dataset.append(example)

    # dataset을 .pt 파일로 저장
    # torch.save(dataset, output_file)
    print("done")

print('슝=3')

 83%|████████▎ | 10681/12880 [02:02<00:25, 86.88it/s]


MemoryError: Unable to allocate 2.84 MiB for an array with shape (968, 1024, 3) and data type uint8