In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.models import resnet18
from torchmetrics.classification import BinaryAccuracy
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
import pytorch_lightning as pl
from models import FaceID_CNN, Ready_faceID_CNN
import os
from torchvision import datasets
from PIL import Image
import pandas as pd
from torchvision.datasets import ImageFolder

base_path : str = os.path.dirname(os.getcwd())
CSV_PATH  : str = base_path + '\\csv'
src_path  : str = base_path + '\\src'
json_path : str = base_path + '\\json'

traits = ['Male']


class CelebADataset(torch.utils.data.Dataset):
    def __init__(self, img_folder, labels, transform=None):
        self.img_folder = img_folder
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        img_name = os.path.join(self.img_folder, self.labels.iloc[idx]["image_id"])
        label = self.labels.iloc[idx][1:].values.astype("float32")
        image = Image.open(img_name).convert("RGB")

        if self.transform:
            image = self.transform(image)

        return image, torch.tensor(label)

In [2]:
transform = transforms.Compose([
        transforms.Resize((128, 128)),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

dataset_path     = os.path.join(CSV_PATH, "celeba")
img_folder_path  = os.path.join(dataset_path, "img_align_celeba")
attr_path        = os.path.join(dataset_path, "list_attr_celeba.txt")
partition_path   = os.path.join(dataset_path, "list_eval_partition.txt")

if not os.path.exists(img_folder_path) or not os.path.exists(attr_path) or not os.path.exists(partition_path):
    raise FileNotFoundError("The dataset folder or required files are missing.")

attr_df = pd.read_csv(attr_path, sep=r'\s+', skiprows=1)
attr_df = attr_df.reset_index().rename(columns={"index": "image_id"})
attr_df["image_id"] = attr_df["image_id"].astype(str)

attr_df         = attr_df[["image_id"] + traits]

attr_df[traits] = (attr_df[traits] + 1) // 2

filtered_df     = attr_df[(attr_df[traits] > 0).any(axis=1)]

partition_df = pd.read_csv(partition_path, sep=' ', header=None, names=["image_id", "partition"])
partition_df["image_id"] = partition_df["image_id"].astype(str)

filtered_df = filtered_df.merge(partition_df, on="image_id")
non_filtered_df = attr_df.merge(partition_df, on="image_id")

train_df = filtered_df[filtered_df["partition"] == 0].drop(columns=["partition"])
val_df   = non_filtered_df[non_filtered_df["partition"] == 1].drop(columns=["partition"])
test_df  = non_filtered_df[non_filtered_df["partition"] == 2].drop(columns=["partition"])

print(f"Train size: {len(train_df)}, Validation size: {len(val_df)}, Test size: {len(test_df)}")

if len(train_df) == 0 or len(val_df) == 0 or len(test_df) == 0:
    raise ValueError("One of the dataset splits is empty. Check your data files and partition file.")

train_data = CelebADataset(img_folder_path, train_df, transform)
val_data   = CelebADataset(img_folder_path, val_df, transform)
test_data  = CelebADataset(img_folder_path, test_df, transform)

Train size: 68261, Validation size: 19867, Test size: 19962


In [23]:
attr_df[traits] = (attr_df[traits] + 1) // 2
filtered_df = attr_df[(attr_df[traits] > 0).any(axis=1)]

In [6]:
print(len(train_df["Male"] == 1))

68261


In [25]:
attr_df

Unnamed: 0,image_id,Male
0,000001.jpg,0
1,000002.jpg,0
2,000003.jpg,1
3,000004.jpg,0
4,000005.jpg,0
...,...,...
202594,202595.jpg,0
202595,202596.jpg,1
202596,202597.jpg,1
202597,202598.jpg,0


In [6]:
import os
import torch
base_path : str = os.path.dirname(os.getcwd())
model_path : str = base_path + "\\models"
checkpoint = torch.load(model_path+"\\torch.ckpt")

for state in checkpoint["state_dict"]:
    print(f"State: {state}") 

# Remove "criterion.pos_weight" from state_dict
if "criterion.pos_weight" in checkpoint["state_dict"]:
    del checkpoint["state_dict"]["criterion.pos_weight"]

# Save the cleaned checkpoint
torch.save(checkpoint, model_path+"\\torch.ckpt")

State: model.model.conv1.weight
State: model.model.bn1.weight
State: model.model.bn1.bias
State: model.model.bn1.running_mean
State: model.model.bn1.running_var
State: model.model.bn1.num_batches_tracked
State: model.model.layer1.0.conv1.weight
State: model.model.layer1.0.bn1.weight
State: model.model.layer1.0.bn1.bias
State: model.model.layer1.0.bn1.running_mean
State: model.model.layer1.0.bn1.running_var
State: model.model.layer1.0.bn1.num_batches_tracked
State: model.model.layer1.0.conv2.weight
State: model.model.layer1.0.bn2.weight
State: model.model.layer1.0.bn2.bias
State: model.model.layer1.0.bn2.running_mean
State: model.model.layer1.0.bn2.running_var
State: model.model.layer1.0.bn2.num_batches_tracked
State: model.model.layer1.1.conv1.weight
State: model.model.layer1.1.bn1.weight
State: model.model.layer1.1.bn1.bias
State: model.model.layer1.1.bn1.running_mean
State: model.model.layer1.1.bn1.running_var
State: model.model.layer1.1.bn1.num_batches_tracked
State: model.model.lay

In [8]:
import os
import pandas as pd
base_path : str = os.path.dirname(os.getcwd())
annotations_path : str = base_path + "\\csv\\processed_wider_faces"
test = pd.read_json(annotations_path + "\\annotations_DONE.json")
test["attributes"][0]["Smiling"]

True

In [8]:
import os
import pandas as pd
from PIL import Image
base_path : str = os.path.dirname(os.getcwd())
DATA_PATH = os.path.join(base_path, "csv")
images_folder = os.path.join(DATA_PATH, f"widerface\\WIDER_train\\WIDER_train\\images")
annotations = os.path.join(DATA_PATH, f"widerface\\wider_face_split\\wider_face_train_bbx_gt.txt")


imgs = []
bounding_boxes = []
with open(annotations, 'r') as file:
    lines = file.readlines()

for root, dirs, files in os.walk(images_folder):
    for filename in files:
        if filename.endswith('.jpg'):
            image_bboxes = []
            idx = 0
            while idx < len(lines):
                    relative_image_path = os.path.relpath(os.path.join(root, filename), images_folder)
                    annotation_image_path = lines[idx].strip()

                    if annotation_image_path == relative_image_path:
                        num_faces = int(lines[idx + 1].strip())
                        idx += 2
                        for _ in range(num_faces):
                            bbox = list(map(int, lines[idx].strip().split()[:4]))
                            x, y, w, h = bbox
                            image_bboxes.append([x, y, x + w, y + h])
                            idx += 1

                        bounding_boxes.append(image_bboxes)
                    else:
                        idx += 1
bounding_boxes

KeyboardInterrupt: 