In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as f
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from model import Net

net = Net()
print(net)

Net(
  (conv1): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (conv4): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1))
  (conv5): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1))
  (fc1): Linear(in_features=18432, out_features=1024, bias=True)
  (fc2): Linear(in_features=1024, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=8, bias=True)
  (drop1): Dropout(p=0.25, inplace=False)
)


In [2]:
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

In [3]:
annotations = pd.read_csv('../data/AFLW/annotations.csv')

In [4]:
annotations.shape

(16418, 12)

In [5]:
annotations.rename(columns={"Unnamed: 0": "index"}, inplace=True)
annotations.head()

Unnamed: 0,index,face_id,LeftEye_x,LeftEye_y,RightEye_x,RightEye_y,Nose_x,Nose_y,Mouth_x,Mouth_y,file_id,sex
0,0,39341,209.0,150.0,302.0,186.0,277.0,268.0,232.0,308.0,image00035.jpg,m
1,1,39343,148.0,203.0,234.0,207.0,219.053909,272.376251,203.0,308.0,image00168.jpg,f
2,2,39348,143.0,123.0,223.0,173.0,159.983505,209.756149,134.0,257.0,image00102.jpg,f
3,3,39349,229.0,196.0,291.0,198.0,274.0,218.0,270.0,254.0,image00104.jpg,f
4,4,39352,334.0,229.0,526.0,249.0,486.200958,406.574982,433.417664,518.424988,image00122.jpg,f


In [6]:
from dotenv import load_dotenv
import os

load_dotenv()

dataset_path = os.getenv("DATA_PATH")

In [7]:
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

class FaceKeypointDataset(Dataset):
    def __init__(self, annotations, root_dir, transform=None):
        
        self.annotations = annotations
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        try:
            img_name = os.path.join(self.root_dir, self.annotations["file_id"][idx])
            image = plt.imread(img_name)
            
            if len(image.shape) == 2:
                image = np.stack([image] * 3, axis=-1)
            elif image.shape[-1] == 4:
                image = image[:, :, :3]
            elif image.shape[-1] != 3:
                raise ValueError(f"Unexpected number of channels: {image.shape[-1]} in {img_name}")

            landmarks = self.annotations.iloc[idx, 2:-2].values
            landmarks = landmarks.astype('float').reshape(-1, 2)

            if self.transform:
                image = self.transform(image)

            h, w = image.shape[1:]
            landmarks[:, 0] *= (224 / w)
            landmarks[:, 1] *= (224 / h)

            return image, torch.tensor(landmarks, dtype=torch.float32)

        except Exception as e:
            print(f"Skipping {idx} due to error: {e}")
            return None

In [8]:
annotations.iloc[0, 2:10].values

array([np.float64(209.0), np.float64(150.0), np.float64(302.0),
       np.float64(186.0), np.float64(277.0), np.float64(268.0),
       np.float64(232.0), np.float64(308.0)], dtype=object)

In [9]:
os.path.join(dataset_path + "/annotated_imgs/", annotations["file_id"][5])

'//OMVICECAVE/nas/ml_data/AFLW//annotated_imgs/image00144.jpg'

In [10]:
# os.mkdir(dataset_path + "/annotaded_imgs/")
import shutil

# for dir in ["0", "2", "3"]:
#     for img in os.listdir(os.path.join(dataset_path, dir)):
#         if img in annotations['file_id'].values:
#             img_name = os.path.join(dataset_path, dir, img)
"""
# image = plt.imread(img_name)
# landmarks = annotations[annotations['file_id'] == img].values[0][1:-2].astype('float').reshape(-1, 2)
# plt.imshow(image)
# plt.scatter(landmarks[:, 0], landmarks[:, 1], s=10, marker='.', c='r')
# plt.savefig(dataset_path + "/annotaded_imgs/" + img)
# plt.close()
"""
            # shutil.copy(img_name, dataset_path + "/annotaded_imgs/" + img)
    # print(dir)

'\n# image = plt.imread(img_name)\n# landmarks = annotations[annotations[\'file_id\'] == img].values[0][1:-2].astype(\'float\').reshape(-1, 2)\n# plt.imshow(image)\n# plt.scatter(landmarks[:, 0], landmarks[:, 1], s=10, marker=\'.\', c=\'r\')\n# plt.savefig(dataset_path + "/annotaded_imgs/" + img)\n# plt.close()\n'

In [11]:
len(os.listdir(dataset_path + "/annotaded_imgs/"))

14280

In [12]:
annotations["file_id"].unique().shape

(14287,)

In [13]:
annotations["face_id"].unique().shape

(16418,)

In [14]:
annotations = annotations[annotations["file_id"].isin(os.listdir(dataset_path + "/annotaded_imgs/"))]

In [15]:
dataset = FaceKeypointDataset(annotations, dataset_path + "/annotaded_imgs/", transform=transform)

In [16]:
from torch.utils.data import DataLoader

dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [17]:
for images, landmarks in dataloader:
    print(images.shape)
    print(landmarks.shape)
    break

torch.Size([32, 3, 224, 224])
torch.Size([32, 4, 2])


In [18]:
import torch.optim as optim

criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

net.to(device)

for epoch in range(10):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(dataloader, 0):
        if inputs is None:
            continue
        
        inputs, labels = inputs.to(device), labels.to(device)

        labels = labels.view(labels.size(0), -1)

        optimizer.zero_grad()

        outputs = net(inputs)

        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()
        
        # print(f"outputs shape: {outputs.shape}")
        # print(f"labels shape: {labels.shape}")

        running_loss += loss.item()
        if i % 10 == 9:
            print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 10}")
            running_loss = 0.0


[1, 10] loss: 205591.71796875
[1, 20] loss: 156475.6822265625
[1, 30] loss: 218189.89140625
[1, 40] loss: 138735.87109375
[1, 50] loss: 124636.437109375
[1, 60] loss: 151327.63203125
[1, 70] loss: 167743.8359375
[1, 80] loss: 161673.781640625
[1, 90] loss: 126939.130078125
[1, 100] loss: 153179.70625
[1, 110] loss: 215793.9154296875
[1, 120] loss: 136402.19296875
[1, 130] loss: 199366.34453125
[1, 140] loss: 195384.9890625
[1, 150] loss: 142296.821875
[1, 160] loss: 122308.2859375
[1, 170] loss: 109473.73359375
[1, 180] loss: 158681.258984375
[1, 190] loss: 142967.703125
[1, 200] loss: 170791.027734375
[1, 210] loss: 138124.683203125
[1, 220] loss: 169906.5671875


KeyboardInterrupt: 

In [20]:
print(inputs.device, labels.device)

cuda:0 cuda:0


In [None]:
from ..utils import train_test_split

train_loader, val_loader, test_loader = train_test_split(dataloader, test_size=0.2)