In [1]:
import data_loader

import torch
import torch.nn as nn

import random
import normalizer
from PIL import Image, ImageDraw, ImageFont

In [2]:
def show_tensor(tensor, landmarks):
    # Make sure tensors are on CPU and detached from grad
    image = tensor.cpu().detach()
    landmarks = landmarks.cpu().detach()
    
    # Scale to 0-255 range
    image = (image * 255).clamp(0, 255)
    # Convert to numpy and correct data type
    image = image.numpy().astype('uint8')
    # If tensor is [C,H,W], convert to [H,W,C]
    if len(image.shape) == 3:
        image = image.transpose(1, 2, 0)
    
    # Convert to PIL Image
    pil_image = Image.fromarray(image)
    draw = ImageDraw.Draw(pil_image)
    
    # Get image dimensions
    width, height = pil_image.size
    
    # Draw each landmark
    for i in range(68):
        # Get coordinates (scale from 0-1 to image dimensions)
        x = int(landmarks[i, 0].item() * width)
        y = int(landmarks[i, 1].item() * height)
        z = landmarks[i, 2].item()
        
        # Draw point (red circle)
        radius = 2
        draw.ellipse([x-radius, y-radius, x+radius, y+radius], fill='white')
        
        # Draw value next to point
        draw.text((x+5, y-5), f'{i:.0f}', fill='white')
    
    return pil_image

In [3]:



class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        
    def forward(self, yhat, y):
        return torch.sqrt(self.mse(yhat, y))




train = data_loader.load("./data/train/coords", "./data/train/images", 
                        firstn = 7000, batchSize = 16, shuffle = True)

test = data_loader.load("./data/test/coords", "./data/test/images", 
                        firstn = 2000, batchSize = 16, shuffle = True)

scaler = normalizer.MinMaxNormalizer()
scaler.fit([y for _, y in train])

print("Number of batches:", len(train))
for x, y in train:
    print(x.shape, y.shape)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("devise is: ", device)



condcords03894_3d.txt dataimg03894.jpeg batch 1 from 437.5
condcords01867_3d.txt dataimg01867.jpeg batch 1 from 437.5
condcords12935_3d.txt dataimg12935.jpeg batch 1 from 437.5
condcords19559_3d.txt dataimg19559.jpeg batch 1 from 437.5
condcords17237_3d.txt dataimg17237.jpeg batch 1 from 437.5
condcords20962_3d.txt dataimg20962.jpeg batch 1 from 437.5
condcords10454_3d.txt dataimg10454.jpeg batch 1 from 437.5
condcords07775_3d.txt dataimg07775.jpeg batch 1 from 437.5
condcords09630_3d.txt dataimg09630.jpeg batch 1 from 437.5
condcords26199_3d.txt dataimg26199.jpeg batch 1 from 437.5
condcords25466_3d.txt dataimg25466.jpeg batch 1 from 437.5
condcords19192_3d.txt dataimg19192.jpeg batch 1 from 437.5
condcords26401_3d.txt dataimg26401.jpeg batch 1 from 437.5
condcords03143_3d.txt dataimg03143.jpeg batch 1 from 437.5
condcords02883_3d.txt dataimg02883.jpeg batch 1 from 437.5
condcords21374_3d.txt dataimg21374.jpeg batch 1 from 437.5
condcords26332_3d.txt dataimg26332.jpeg batch 2 from 437

In [4]:
class Net(nn.Module):
    def __init__(self, device):
        super(Net, self).__init__()
        self.last_detector_size = 128 
        self.adpool = nn.AdaptiveAvgPool2d((256, 256)).to(device) 
        self.conv1 = nn.Conv2d(3, 6, 3, padding = 1).to(device)
        self.pool = nn.MaxPool2d(2, 2).to(device)
        self.conv2 = nn.Conv2d(6, 9, 3, padding = 1).to(device)
        self.conv3 = nn.Conv2d(9, 20, 3, padding = 1).to(device)
        self.conv4 = nn.Conv2d(20, 32, 3, padding = 1).to(device)
        self.conv5 = nn.Conv2d(32, 64, 3, padding = 1).to(device)
        self.conv6 = nn.Conv2d(64, self.last_detector_size, 3, padding = 1).to(device)
        fcsize = 256
        self.fc1 = nn.Linear(self.last_detector_size*32*32, fcsize).to(device)
        self.fc_list = []
        for i in range(6):
            self.fc_list.append(nn.Linear(fcsize, fcsize).to(device))
        self.prelast = nn.Linear(fcsize, 512).to(device)
        self.fc_last = nn.Linear(512, 3 * 68).to(device)
        self.act = nn.ReLU().to(device)
        self.sigm = nn.Sigmoid().to(device)
    
    def forward(self, x):
        # Input: [batch, 3, H, W]
        x = self.adpool(x)
        x = self.pool(self.act(self.conv1(x)))
        x = self.pool(self.act(self.conv2(x)))
        x = self.act(self.conv3(x))  
        x = self.pool(self.act(self.conv4(x)))
        x = self.act(self.conv5(x))
        x = self.act(self.conv6(x))  
        x = x.view(-1, self.last_detector_size*32*32)       
        x = self.act(self.fc1(x))
        for i in range(len(self.fc_list)):
            x = self.act(self.fc_list[i](x))
        x = self.act(self.prelast(x))
        x = self.fc_last(x)
        x = x.view(-1, 68, 3)
        return x


In [5]:
class CustomLoss(nn.Module):
    def __init__(self, vertlist, reverse):
        super().__init__()
        self.weight = weight
    
    def forward(self, predictions, targets):
        # You can implement any custom loss calculation here
        element_wise_loss = torch.abs(predictions - targets)
        # You can add weights, combine losses, or add regularization terms
        weighted_loss = element_wise_loss * self.weight
        return torch.mean(weighted_loss)

In [6]:
class SelectiveRMSELoss(nn.Module):
    def __init__(self, pointlist, reverse, device):
        super().__init__()
        self.reverse = reverse
        self.pointlist = pointlist
        self.device = device
        
    def forward(self, x, y):
       ls = (x-y)**2
       losslist = []
       if(self.reverse):
           self.pointlist = list(set(range(68)) - set(self.pointlist))
       sm = torch.tensor(0.0).to(device)
       k = torch.tensor(1.0).to(device)
       for i in range(x.shape[0]):
           for j in self.pointlist:
               sm += (ls[i][j].mean())
               k += 1
       return torch.sqrt(sm / float(k))

In [7]:
mouth_pointlist = [44, 7, 33, 14, 2, 31, 49, 15, 42, 32, 9, 51, 38, 61,
    18, 23, 12, 47, 67, 1, 2]

In [13]:
model = Net(device)
#criterion = SelectiveRMSELoss(mouth_pointlist, False, device)
criterion = nn.MSELoss()
learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# learning loop
epoch_loss = 0
for epoch in range(4):
    epoch_loss = 0
    step_loss = 0
    random.shuffle(train)
    for batch_idx, (inputs, answers) in enumerate(train):
        inputs = inputs.to(device)
        answers = answers.to(device)
        answers = scaler.transform(answers)
        outputs = model(inputs)
        outputs = scaler.inverse_transform(outputs)
        loss = criterion(outputs, answers)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item() 
        
        if batch_idx % 5 == 0:
            print(f'Batch {batch_idx}, Loss: {loss.item():.5f}')

        if batch_idx == 160:
            break
    
    learning_rate /= 50.0
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    print(f'shape {inputs.shape}, Epoch {epoch + 1}, Loss: {epoch_loss/len(train):.5f}')


Batch 0, Loss: 0.27079
Batch 5, Loss: 0.18138
Batch 10, Loss: 0.06431
Batch 15, Loss: 0.01832
Batch 20, Loss: 0.00871
Batch 25, Loss: 0.00345
Batch 30, Loss: 0.00258
Batch 35, Loss: 0.00173
Batch 40, Loss: 0.00080
Batch 45, Loss: 0.00081
Batch 50, Loss: 0.00088
Batch 55, Loss: 0.00044
Batch 60, Loss: 0.00052
Batch 65, Loss: 0.00058
Batch 70, Loss: 0.00069
Batch 75, Loss: 0.00048
Batch 80, Loss: 0.00082
Batch 85, Loss: 0.00048
Batch 90, Loss: 0.00071
Batch 95, Loss: 0.00067
Batch 100, Loss: 0.00062
Batch 105, Loss: 0.00037
Batch 110, Loss: 0.00056
Batch 115, Loss: 0.00041
Batch 120, Loss: 0.00069
Batch 125, Loss: 0.00080
Batch 130, Loss: 0.00079
Batch 135, Loss: 0.00050
Batch 140, Loss: 0.00049
Batch 145, Loss: 0.00058
Batch 150, Loss: 0.00086
Batch 155, Loss: 0.00050
Batch 160, Loss: 0.00049
shape torch.Size([16, 3, 512, 512]), Epoch 1, Loss: 0.00511
Batch 0, Loss: 0.00077
Batch 5, Loss: 0.00049
Batch 10, Loss: 0.00073
Batch 15, Loss: 0.00052
Batch 20, Loss: 0.00061
Batch 25, Loss: 0.0

In [18]:

# testloop
criterion = nn.MSELoss()
epoch_loss = 0
test = data_loader.load("./data/test/coords", "./data/test/images", 
                        firstn = 2000, batchSize = 16, shuffle = True)
with torch.no_grad():
    for batch_idx, (inputs, answers) in enumerate(test):
        inputs = inputs.to(device)
        answers = answers.to(device)
        answers = scaler.transform(answers)
        outputs = model(inputs)
        outputs = scaler.inverse_transform(outputs)
        loss = criterion(outputs, answers)
        epoch_loss += loss.item()

    print(f'Test Loss: {epoch_loss/len(test):.4f}')

    inputs, answers = test[1]
    inputs = inputs.to(device)
    answers = answers.to(device)
    outputs = model(inputs)
    outputs = scaler.inverse_transform(outputs)
    print(outputs.shape, answers.shape)
    img = show_tensor(inputs[0], outputs[0])
    img.show()
    imgdlib = show_tensor(inputs[0], answers[0])
    imgdlib.show()



# Save the model after training
# torch.save({
#     'epoch': epoch,
#     'model_state_dict': model.state_dict(),
#     'optimizer_state_dict': optimizer.state_dict(),
#     'loss': epoch_loss,
#     'scaler_min': scaler.min,
#     'scaler_max': scaler.max
# }, 'landmark_model.pth')

condcords27699_3d.txt dataimg27699.jpeg batch 1 from 125.0
condcords27568_3d.txt dataimg27568.jpeg batch 1 from 125.0
condcords29240_3d.txt dataimg29240.jpeg batch 1 from 125.0
condcords28448_3d.txt dataimg28448.jpeg batch 1 from 125.0
condcords27237_3d.txt dataimg27237.jpeg batch 1 from 125.0
condcords27899_3d.txt dataimg27899.jpeg batch 1 from 125.0
condcords27475_3d.txt dataimg27475.jpeg batch 1 from 125.0
condcords28205_3d.txt dataimg28205.jpeg batch 1 from 125.0
condcords27081_3d.txt dataimg27081.jpeg batch 1 from 125.0
condcords28039_3d.txt dataimg28039.jpeg batch 1 from 125.0
condcords29053_3d.txt dataimg29053.jpeg batch 1 from 125.0
condcords27583_3d.txt dataimg27583.jpeg batch 1 from 125.0
condcords28231_3d.txt dataimg28231.jpeg batch 1 from 125.0
condcords28032_3d.txt dataimg28032.jpeg batch 1 from 125.0
condcords28770_3d.txt dataimg28770.jpeg batch 1 from 125.0
condcords27374_3d.txt dataimg27374.jpeg batch 1 from 125.0
condcords27489_3d.txt dataimg27489.jpeg batch 2 from 125

In [10]:
with torch.no_grad():
    
    inputs, answers = test[1]
    inputs = inputs.to(device)
    answers = answers.to(device)
    outputs = model(inputs)
    outputs = scaler.inverse_transform(outputs)
    print(outputs.shape, answers.shape)
    img = show_tensor(inputs[0], outputs[0])
    img.show()
    imgdlib = show_tensor(inputs[0], answers[0])
    imgdlib.show()

NameError: name 'model' is not defined