<a href="https://colab.research.google.com/github/bernaprioste1704/6-7-edition/blob/main/heatmaptraining.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
timport pandas as pd
import torch
import torch.nn as nn
import numpy as np
import torchvision.transforms as transforms
import torch.utils.data as data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Upload dataset, Remove nulls

In [None]:
df = pd.read_pickle('drive/MyDrive/dataset22462_augmented.pkl')
# df = df.dropna().drop(['NAME','Scale','Activity','Category','ID'], axis=1)
#convert the columns upper neck_X upper neck_Y  head top_X  head top_Y to int64
df = df.astype({'upper neck_X': 'int64', 'upper neck_Y': 'int64', 'head top_X': 'int64', 'head top_Y': 'int64'})

In [None]:
df = df[["image","r wrist_X", "r wrist_Y"]]

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22346 entries, 0 to 22345
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   image      22346 non-null  object 
 1   r wrist_X  22346 non-null  float64
 2   r wrist_Y  22346 non-null  float64
dtypes: float64(2), object(1)
memory usage: 523.9+ KB


## Contruct the Dataset Object

In [None]:
keypoint_columns = [col for col in df.columns if col != 'image'] #todas as coluunas
keypoints = df[keypoint_columns].values.astype(np.float32)

keypoints = keypoints / 224.0 #normaliza os pontos para 0-1

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

class PoseDataset(data.Dataset):
    def __init__(self, df):
        self.df = df
        self.images = df['image'].tolist()
        self.keypoints = keypoints
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_array = self.images[idx]
        img_tensor = transform(img_array) #converter para pythorch
        kp_tensor = torch.tensor(self.keypoints[idx])
        return img_tensor, kp_tensor

## Spliting for test and train

In [None]:
dataset = PoseDataset(df)
train_size = int(0.2 * len(dataset))
val_size = len(dataset) -train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

train_loader = data.DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = data.DataLoader(val_dataset, batch_size=64, shuffle=False)

## Defining the model

In [None]:
class KeypointCNN(nn.Module):
    def __init__(self):
        super(KeypointCNN, self).__init__()

        # Feature extractor (backbone)
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1)

        self.pool = nn.MaxPool2d(2, 2)

        # Upsampling layers to generate heatmap
        self.upconv1 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1)
        self.upconv2 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1)
        self.upconv3 = nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, output_padding=1)

        # Final layer for heatmap (1 channel for single joint)
        self.heatmap = nn.Conv2d(32, 1, kernel_size=1)

    def forward(self, x):
        # Feature extraction
        x = F.relu(self.conv1(x))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = self.pool(F.relu(self.conv4(x)))

        # Upsampling to restore original spatial resolution
        x = F.relu(self.upconv1(x))
        x = F.relu(self.upconv2(x))
        x = F.relu(self.upconv3(x))

        # Generate heatmap (probability distribution)
        heatmap = torch.sigmoid(self.heatmap(x))  # Shape: (B, 1, 224, 224)
        return heatmap

In [None]:
import torch
import torch.nn.functional as F

def generate_heatmap(keypoints, img_size=224, sigma=3):
    """
    Converts keypoint coordinates (x, y) into a heatmap.

    Args:
        keypoints: Tensor of shape (batch_size, 32) where (x1, y1, x2, y2, ..., x16, y16)
        img_size: Size of the output heatmap
        sigma: Standard deviation for Gaussian heatmaps

    Returns:
        A heatmap of shape (batch_size, 1, img_size, img_size)
    """
    batch_size = keypoints.shape[0]
    heatmaps = torch.zeros((batch_size, 1, img_size, img_size), device=keypoints.device)

    for b in range(batch_size):
        for j in range(0, keypoints.shape[1], 2):  # Iterate through (x, y) pairs
            x, y = keypoints[b, j], keypoints[b, j+1]

            if x < 0 or y < 0:  # Skip invalid keypoints
                continue

            x, y = int(x), int(y)

            # Create a Gaussian heatmap
            grid_x, grid_y = torch.meshgrid(torch.arange(img_size, device=keypoints.device),
                                            torch.arange(img_size, device=keypoints.device),
                                            indexing='ij')
            heatmap = torch.exp(-((grid_x - x)**2 + (grid_y - y)**2) / (2 * sigma**2))

            heatmaps[b, 0] = torch.maximum(heatmaps[b, 0], heatmap)  # Max over multiple joints

    return heatmaps


In [None]:
import torch.nn.functional as F

def heatmap_loss(pred, target):
    """
    Compute the loss between predicted and ground truth heatmaps.
    Uses Mean Squared Error (MSE) loss, a common choice for heatmaps.
    """
    return F.mse_loss(pred, target)  # Smooth heatmap regression


In [None]:

model = KeypointCNN().to("cuda")
criterion = nn.SmoothL1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0012, weight_decay=1e-5)

# optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
epochs = 20

for epoch in range(epochs):
    model.train()  # Set to training mode
    train_loss = 0

    for images, heatmaps in train_loader:
        images, heatmaps = images.to("cuda"), heatmaps.to("cuda")
        optimizer.zero_grad()

        pred_heatmap = model(images)
        loss = heatmap_loss(pred_heatmap, heatmaps)

        loss.backward()
        optimizer.step()

        train_loss += loss.item()  # Accumulate training loss

    # Compute average training loss
    train_loss /= len(train_loader)

    # Validation phase
    model.eval()  # Set to evaluation mode
    val_loss = 0

    with torch.no_grad():
        for images, heatmaps in val_loader:
            images, heatmaps = images.to("cuda"), heatmaps.to("cuda")
            heatmaps = generate_heatmap(keypoints)
            pred_heatmap = model(images)
            loss = heatmap_loss(pred_heatmap, heatmaps)

            val_loss += loss.item()  # Accumulate validation loss

    # Compute average validation loss
    val_loss /= len(val_loader)

    print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f} - Val Loss: {val_loss:.4f}")

  return F.mse_loss(pred, target)  # Smooth heatmap regression


RuntimeError: The size of tensor a (224) must match the size of tensor b (2) at non-singleton dimension 3

## Training the model

In [None]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# MSEcriterion = nn.MSELoss()
# epochs = 200
# for epoch in range(epochs):
#     model.train()
#     train_smooth_loss, train_mse_loss = 0.0, 0.0

#     for images, keypoints in train_loader:
#         if device.type == 'cuda':
#             images, keypoints = images.to("cuda"), keypoints.to("cuda")

#         optimizer.zero_grad()
#         outputs = model(images)

#         smooth_loss = criterion(outputs, keypoints)
#         mse_loss = MSEcriterion(outputs, keypoints)  # Just for monitoring

#         mse_loss.backward()
#         optimizer.step()

#         train_smooth_loss += smooth_loss.item()
#         train_mse_loss += mse_loss.item()

#     # scheduler.step(smooth_loss)
#     avg_train_smooth_loss = train_smooth_loss / len(train_loader)
#     avg_train_mse_loss = train_mse_loss / len(train_loader)
#     print(f"Epoch [{epoch+1}/{epochs}], Train Loss (SmoothL1): {avg_train_smooth_loss:.4f}, Train Loss (MSE): {avg_train_mse_loss:.4f}")

#     # Validation
#     if(epoch+1) % 10 == 0:
#         model.eval()
#         val_smooth_loss, val_mse_loss = 0.0, 0.0
#         correct, total = 0, 0  # Track total and correct keypoints

#         with torch.no_grad():
#             for images, keypoints in val_loader:
#                 if device.type == 'cuda':
#                     images, keypoints = images.to("cuda"), keypoints.to("cuda")
#                 outputs = model(images)
#                 smooth_loss = criterion(outputs, keypoints)
#                 mse_loss = MSEcriterion(outputs, keypoints)  # Just for monitoring

#                 resized_keypoints = keypoints * 224
#                 resized_outputs = outputs * 224

#                 distances = torch.norm(resized_outputs.view(-1, 16, 2) - resized_keypoints.view(-1, 16, 2), dim=2)

#                 # Count correct keypoints (within threshold)
#                 correct += (distances < (0.10 * 224)).sum().item()
#                 total += keypoints.numel() // 2  # Number of keypoints in batch
#                 # print(f"Total: {total}")
#                 val_smooth_loss += smooth_loss.item()
#                 val_mse_loss += mse_loss.item()

#         mAP = correct / total
#         print(f"Val mAP: {correct} / {total} = {mAP:.5f}")
#         avg_val_smooth_loss = val_smooth_loss / len(val_loader)
#         avg_val_mse_loss = val_mse_loss / len(val_loader)
#         print(f"Val Loss (SmoothL1): {avg_val_smooth_loss:.5f}, Val Loss (MSE): {avg_val_mse_loss:.5f}")
#         if epoch > 50:
#             if epoch % 25:
#                 torch.save(model.state_dict(), f"posev{avg_val_mse_loss:.5f}.pth")



Epoch [1/200], Train Loss (SmoothL1): 0.0231, Train Loss (MSE): 0.0501
Epoch [2/200], Train Loss (SmoothL1): 0.0107, Train Loss (MSE): 0.0254
Epoch [3/200], Train Loss (SmoothL1): 0.0101, Train Loss (MSE): 0.0241
Epoch [4/200], Train Loss (SmoothL1): 0.0097, Train Loss (MSE): 0.0233
Epoch [5/200], Train Loss (SmoothL1): 0.0094, Train Loss (MSE): 0.0227
Epoch [6/200], Train Loss (SmoothL1): 0.0093, Train Loss (MSE): 0.0225
Epoch [7/200], Train Loss (SmoothL1): 0.0092, Train Loss (MSE): 0.0222
Epoch [8/200], Train Loss (SmoothL1): 0.0090, Train Loss (MSE): 0.0218
Epoch [9/200], Train Loss (SmoothL1): 0.0089, Train Loss (MSE): 0.0215
Epoch [10/200], Train Loss (SmoothL1): 0.0088, Train Loss (MSE): 0.0212
Val mAP: 34162 / 71520 = 0.47766
Val Loss (SmoothL1): 0.00781, Val Loss (MSE): 0.01605
Epoch [11/200], Train Loss (SmoothL1): 0.0086, Train Loss (MSE): 0.0209
Epoch [12/200], Train Loss (SmoothL1): 0.0085, Train Loss (MSE): 0.0206
Epoch [13/200], Train Loss (SmoothL1): 0.0086, Train Loss 

## Training logs
lr=0.01; epoch: 10; Loss: 0.0014; Val Loss: 0.0441

In [None]:
torch.save(model.state_dict(), "posev001255.pth")
