In [3]:
import torch
import torchvision
from torchvision.models.detection import KeypointRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone

def get_keypoint_rcnn_model(num_keypoints):
    # Load a pre-trained ResNet-FPN backbone
    backbone = resnet_fpn_backbone('resnet50', pretrained=True)

    # Define the anchor generator
    anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                       aspect_ratios=((0.5, 1.0, 2.0),))

    # Define the ROI pooling feature extractor
    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0', '1', '2', '3'],
                                                    output_size=7,
                                                    sampling_ratio=2)

    # Define the keypoint ROI pooling feature extractor
    keypoint_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0', '1', '2', '3'],
                                                             output_size=14,
                                                             sampling_ratio=2)

    # Define the Keypoint RCNN model
    model = KeypointRCNN(backbone=backbone,
                          num_classes=2,  # Including background class
                          rpn_anchor_generator=anchor_generator,
                          box_roi_pool=roi_pooler,
                          keypoint_roi_pool=keypoint_roi_pooler,
                          keypoint_head=torchvision.models.detection.keypointrcnn_resnet50_fpn(num_keypoints))
    return model

# Initialize the Keypoint RCNN model
num_keypoints = 4  # Number of keypoints to predict
model = get_keypoint_rcnn_model(num_keypoints)




Downloading: "https://download.pytorch.org/models/keypointrcnn_resnet50_fpn_coco-fc266e95.pth" to C:\Users\19722/.cache\torch\hub\checkpoints\keypointrcnn_resnet50_fpn_coco-fc266e95.pth
100%|███████████████████████████████████████████████████████████████████████████████| 226M/226M [00:07<00:00, 33.6MB/s]


In [None]:
import torch
import torch.nn as nn

class v_0(nn.Module):
    def __init__(self,  out_channels):
        super(v_0, self).__init__()
        
        # Load EfficientNet-B7 as the backbone
        self.backbone = EfficientNet.from_pretrained('efficientnet-b7')
        self.backbone._fc = nn.Identity()
        backbone_out_features = 2560
        # Define additional layers
        self.regression_head_1 = nn.Linear(backbone_out_features, out_channels)
      
        
        for param in self.backbone.parameters():
            param.requires_grad = False
        
    def forward(self, x):
        # Backbone feature extraction
        features = self.backbone(x)
        
        # Apply regression heads
        regression_output_1 = self.regression_head_1(features)

        
        return regression_output_1

In [19]:


'''
import torch
import torch.nn as nn

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += residual  # Add residual connection
        out = self.relu(out)
        return out

class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(DoubleConv, self).__init__()
        self.residual_block = ResidualBlock(in_channels, out_channels)  # Check the number of input channels here
        self.conv = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.bn = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        residual = x
        out = self.residual_block(x)
        out = self.conv(out)
        out = self.bn(out)
        out += residual  # Add residual connection
        out = self.relu(out)
        return out

class UNet(nn.Module):
    def __init__(self, in_channels, num_regressors):
        super(UNet, self).__init__()
        self.down1 = DoubleConv(in_channels, 64)
        self.down2 = DoubleConv(64, 128)
        self.down3 = DoubleConv(128, 256)
        self.down4 = DoubleConv(256, 512)

        self.maxpool = nn.MaxPool2d(2)
        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

        self.up3 = DoubleConv(256 + 512, 256)
        self.up2 = DoubleConv(128 + 256, 128)
        self.up1 = DoubleConv(128 + 64, 64)

        self.fc = nn.Linear(256 * 28 * 28, num_regressors)

    def forward(self, x):
        x1 = self.down1(x)
        x2 = self.maxpool(x1)
        x3 = self.down2(x2)
        x4 = self.maxpool(x3)
        x5 = self.down3(x4)
        x6 = self.maxpool(x5)
        x7 = self.down4(x6)

        x = self.upsample(x7)
        x = torch.cat([x, x5], dim=1)
        x = self.up3(x)

        x = self.upsample(x)
        x = torch.cat([x, x3], dim=1)
        x = self.up2(x)

        x = self.upsample(x)
        x = torch.cat([x, x1], dim=1)
        x = self.up1(x)
        
        x = x.view(x.size(0), -1)  # Reshape to (batch_size, num_regressors)
        x = self.fc(x)  # Reshape to (batch_size, num_regressors)
        return x



#V4 learning_rate = 0.001
class UNet(nn.Module):
    def __init__(self,  out_channels):
        super(UNet, self).__init__()
        
        # Load EfficientNet-B7 as the backbone
        self.backbone = EfficientNet.from_pretrained('efficientnet-b7')
        self.backbone._fc = nn.Identity()
        backbone_out_features = 2560
        # Define additional layers
        self.regression_head_1 = nn.Linear(backbone_out_features, out_channels)
      
        
        for param in self.backbone.parameters():
            param.requires_grad = False
        
    def forward(self, x):
        # Backbone feature extraction
        features = self.backbone(x)
        
        # Apply regression heads
        regression_output_1 = self.regression_head_1(features)

        
        return regression_output_1




# V3
import torch
import torch.nn as nn

class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(DoubleConv, self).__init__()
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)

class UNet(nn.Module):
    def __init__(self, in_channels, num_regressors):
        super(UNet, self).__init__()
        self.down1 = DoubleConv(in_channels, 64)
        self.down2 = DoubleConv(64, 128)
        self.down3 = DoubleConv(128, 256)
        self.down4 = DoubleConv(256, 512)
        self.down5 = DoubleConv(512, 1024)
        self.down6 = DoubleConv(1024, 2048)  # Increase channels further

        self.maxpool = nn.MaxPool2d(2)
        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

        self.up5 = DoubleConv(2048 + 1024, 1024)  # Adjust channels accordingly
        self.up4 = DoubleConv(1024 + 512, 512)
        self.up3 = DoubleConv(512 + 256, 256)
        self.up2 = DoubleConv(256 + 128, 128)
        self.up1 = DoubleConv(128 + 64, 64)

        self.final_conv = nn.Conv2d(64, num_regressors, kernel_size=1)

        # Adjust the input size of the linear layer based on the size of the feature maps produced by the final convolutional layer
        self.fc = nn.Linear(401408, 8)  # Assuming input image size is 256x256

    def forward(self, x):
        x1 = self.down1(x)
        x2 = self.maxpool(x1)
        x3 = self.down2(x2)
        x4 = self.maxpool(x3)
        x5 = self.down3(x4)
        x6 = self.maxpool(x5)
        x7 = self.down4(x6)
        x8 = self.maxpool(x7)
        x9 = self.down5(x8)
        x10 = self.maxpool(x9)

        x = self.down6(x10)

        x = self.upsample(x)
        x = torch.cat([x, x9], dim=1)
        x = self.up5(x)

        x = self.upsample(x)
        x = torch.cat([x, x7], dim=1)
        x = self.up4(x)

        x = self.upsample(x)
        x = torch.cat([x, x5], dim=1)
        x = self.up3(x)

        x = self.upsample(x)
        x = torch.cat([x, x3], dim=1)
        x = self.up2(x)

        x = self.upsample(x)
        x = torch.cat([x, x1], dim=1)
        x = self.up1(x)
        
        x = self.final_conv(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc(x)
        return x


# V2
import torch
import torch.nn as nn

class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(DoubleConv, self).__init__()
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)

class UNet(nn.Module):
    def __init__(self, in_channels, num_regressors):
        super(UNet, self).__init__()
        self.down1 = DoubleConv(in_channels, 64)
        self.down2 = DoubleConv(64, 128)
        self.down3 = DoubleConv(128, 256)
        self.down4 = DoubleConv(256, 512)
        self.down5 = DoubleConv(512, 1024)

        self.maxpool = nn.MaxPool2d(2)
        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

        self.up4 = DoubleConv(1024 + 512, 512)
        self.up3 = DoubleConv(512 + 256, 256)
        self.up2 = DoubleConv(256 + 128, 128)
        self.up1 = DoubleConv(128 + 64, 64)

        self.final_conv = nn.Conv2d(64, num_regressors, kernel_size=1)

        self.fc = nn.Linear(401408, 8)  # Assuming input image size is 256x256

    def forward(self, x):
        x1 = self.down1(x)
        x2 = self.maxpool(x1)
        x3 = self.down2(x2)
        x4 = self.maxpool(x3)
        x5 = self.down3(x4)
        x6 = self.maxpool(x5)
        x7 = self.down4(x6)
        x8 = self.maxpool(x7)
        x9 = self.down5(x8)

        x = self.upsample(x9)
        x = torch.cat([x, x7], dim=1)
        x = self.up4(x)

        x = self.upsample(x)
        x = torch.cat([x, x5], dim=1)
        x = self.up3(x)

        x = self.upsample(x)
        x = torch.cat([x, x3], dim=1)
        x = self.up2(x)

        x = self.upsample(x)
        x = torch.cat([x, x1], dim=1)
        x = self.up1(x)
        
        x = self.final_conv(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc(x)
        return x



# Example usage:
num_channels = 3  # Input channels
num_regressors = 8  # Number of regressors
model = UNet(num_channels, num_regressors)

# Dummy input tensor
x = torch.randn(32, 3, 256, 256)  # Batch size of 32, 3 channels, 256x256 resolution
output = model(x)
print(output.size())  # Should output torch.Size([32, 8])
'''

'''
class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(DoubleConv, self).__init__()
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)

class UNet(nn.Module):
    def __init__(self, in_channels, num_regressors):
        super(UNet, self).__init__()
        self.down1 = DoubleConv(in_channels, 64)
        self.down2 = DoubleConv(64, 128)
        self.down3 = DoubleConv(128, 256)
        self.down4 = DoubleConv(256, 512)

        self.maxpool = nn.MaxPool2d(2)
        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

        self.up3 = DoubleConv(256 + 512, 256)
        self.up2 = DoubleConv(128 + 256, 128)
        self.up1 = DoubleConv(128 + 64, 64)

        
        self.fc=nn.Linear(3211264,8)

    def forward(self, x):
        x1 = self.down1(x)
        x2 = self.maxpool(x1)
        x3 = self.down2(x2)
        x4 = self.maxpool(x3)
        x5 = self.down3(x4)
        x6 = self.maxpool(x5)
        x7 = self.down4(x6)

        x = self.upsample(x7)
        x = torch.cat([x, x5], dim=1)
        x = self.up3(x)

        x = self.upsample(x)
        x = torch.cat([x, x3], dim=1)
        x = self.up2(x)

        x = self.upsample(x)
        x = torch.cat([x, x1], dim=1)
        x = self.up1(x)
        
        x = x.view(x.size(0), -1)  # Reshape to (batch_size, num_regressors)
        #print(x.shape)
        x = self.fc(x)  # Reshape to (batch_size, num_regressors)
        return x
'''

torch.Size([32, 4194304])
torch.Size([32, 8])


In [None]:

# V3

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)

        # Adjust the number of channels in residual connection if needed
        self.adjust_residual = in_channels != out_channels

    def forward(self, x):
        residual = x.clone().detach()  # Clone and detach to ensure the same device as input
        #print(residual.shape)
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)

        # Adjust the residual connection if needed
        if self.adjust_residual:
            residual = nn.Conv2d(residual.shape[1], out.shape[1], kernel_size=1).to(x.device)(residual)

        #print(out.shape)
        out += residual  # Add residual connection
        out = self.relu(out)
        return out


class v_3(nn.Module):
    def __init__(self):
        super(v_3, self).__init__()
        
        # Load EfficientNet-B7 as the backbone
        self.backbone = EfficientNet.from_pretrained('efficientnet-b7')
        for param in self.backbone.parameters():
            param.requires_grad = False
            
        # Replace the last three layers with nn.Identity
        self.identity_layers()
        
        # Define additional layers
        self.residual_1 = ResidualBlock(2560, 1024)
        self.residual_2 = ResidualBlock(1024, 512)
        self.residual_3 = ResidualBlock(512, 256)
        self.regression_head_1 = nn.Linear(256, 8)
        self.regression_head_2 = nn.Linear(256, 2)
        self.regression_head_3 = nn.Linear(256, 2)
        #print(self.backbone)
    
    def identity_layers(self):
        # Replace last three layers with nn.Identity
        
        #self.backbone._avg_pooling = nn.Identity()
        #self.backbone._dropout = nn.Identity()
        self.backbone._fc = nn.Identity()

    def forward(self, x):
        # Backbone feature extraction
       
        x = self.backbone(x)
        x = x.unsqueeze(2).unsqueeze(3)
        #print(x.shape)
        
        # Pass through additional residual blocks
        x = self.residual_1(x)
        x = self.residual_2(x)
        x = self.residual_3(x)
        x = x.view(x.size(0), -1)  # Flatten
        x_a = self.regression_head_1(x)
        x_b = self.regression_head_2(x)
        x_c = self.regression_head_3(x)
        
        return x_a, x_b, x_c


# V2
import torch
import torch.nn as nn

class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(DoubleConv, self).__init__()
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)

class v_2(nn.Module):
    def __init__(self):
        super(v_2, self).__init__()
        self.down1 = DoubleConv(3, 64)
        self.down2 = DoubleConv(64, 128)
        self.down3 = DoubleConv(128, 256)
        self.down4 = DoubleConv(256, 512)
        self.down5 = DoubleConv(512, 1024)

        self.maxpool = nn.MaxPool2d(2)
        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

        self.up4 = DoubleConv(1024 + 512, 512)
        self.up3 = DoubleConv(512 + 256, 256)
        self.up2 = DoubleConv(256 + 128, 128)
        self.up1 = DoubleConv(128 + 64, 64)

        self.final_conv = nn.Conv2d(64, 8, kernel_size=1)

        #self.fc = nn.Linear(401408, 8)  # Assuming input image size is 256x256
        self.fc1=nn.Linear(401408,8)
        self.fc2=nn.Linear(401408,2)
        self.fc3=nn.Linear(401408,2)
        
    def forward(self, x):
        x1 = self.down1(x)
        x2 = self.maxpool(x1)
        x3 = self.down2(x2)
        x4 = self.maxpool(x3)
        x5 = self.down3(x4)
        x6 = self.maxpool(x5)
        x7 = self.down4(x6)
        x8 = self.maxpool(x7)
        x9 = self.down5(x8)

        x = self.upsample(x9)
        x = torch.cat([x, x7], dim=1)
        x = self.up4(x)

        x = self.upsample(x)
        x = torch.cat([x, x5], dim=1)
        x = self.up3(x)

        x = self.upsample(x)
        x = torch.cat([x, x3], dim=1)
        x = self.up2(x)

        x = self.upsample(x)
        x = torch.cat([x, x1], dim=1)
        x = self.up1(x)
        
        x = self.final_conv(x)
        x = x.view(x.size(0), -1)  # Flatten
        x_a = self.fc1(x)  # Reshape to (batch_size, num_regressors)
        x_b = self.fc2(x)
        x_c = self.fc3(x)
        return x_a, x_b, x_c

class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(DoubleConv, self).__init__()
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)
class v_1(nn.Module):
    def __init__(self):
        super(v_1, self).__init__()
        self.down1 = DoubleConv(3, 64)
        self.down2 = DoubleConv(64, 128)
        self.down3 = DoubleConv(128, 256)
        self.down4 = DoubleConv(256, 512)

        self.maxpool = nn.MaxPool2d(2)
        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

        self.up3 = DoubleConv(256 + 512, 256)
        self.up2 = DoubleConv(128 + 256, 128)
        self.up1 = DoubleConv(128 + 64, 64)

        
        self.fc1=nn.Linear(3211264,8)
        self.fc2=nn.Linear(3211264,2)
        self.fc3=nn.Linear(3211264,2)

    def forward(self, x):
        x1 = self.down1(x)
        x2 = self.maxpool(x1)
        x3 = self.down2(x2)
        x4 = self.maxpool(x3)
        x5 = self.down3(x4)
        x6 = self.maxpool(x5)
        x7 = self.down4(x6)

        x = self.upsample(x7)
        x = torch.cat([x, x5], dim=1)
        x = self.up3(x)

        x = self.upsample(x)
        x = torch.cat([x, x3], dim=1)
        x = self.up2(x)

        x = self.upsample(x)
        x = torch.cat([x, x1], dim=1)
        x = self.up1(x)
        
        x = x.view(x.size(0), -1)  # Reshape to (batch_size, num_regressors)
        #print(x.shape)
        x1 = self.fc1(x)  # Reshape to (batch_size, num_regressors)
        x2 = self.fc2(x)
        x3 = self.fc3(x)
        return x1, x2, x3

In [None]:
class Dog_Hip_old(nn.Module):
    def __init__(self):
        super(Dog_Hip_old, self).__init__()
        
        if pre_trained_model == 'ResNet-50':
            self.base_model = models.resnet50(pretrained=True)
            in_features = self.base_model.fc.in_features
            self.base_model = nn.Sequential(*list(self.base_model.children())[:-1])
            print('Used model: ResNet-50')
            
        elif pre_trained_model == 'ResNet-152':
            self.base_model = models.resnet152(pretrained=True)
            in_features = self.base_model.fc.in_features
            self.base_model = nn.Sequential(*list(self.base_model.children())[:-1])
            print('Used model: ResNet-152')
            
        elif pre_trained_model == 'inception_v3':
            self.base_model = models.inception_v3(pretrained=True)
            in_features = self.base_model.fc.in_features
            self.base_model.fc = nn.Identity()
            print('Used model: inception_v3')
        
        elif pre_trained_model == 'vgg-16':
            self.base_model = models.vgg16(pretrained=True)
            in_features = self.base_model.classifier[6].in_features
            self.base_model.classifier = nn.Sequential(*list(self.base_model.classifier.children())[:-1])
            print('Used model: VGG16')
         
        elif pre_trained_model == 'vgg-19':
            self.base_model = models.vgg19(pretrained=True)
            in_features = self.base_model.classifier[6].in_features
            self.base_model.classifier = nn.Sequential(*list(self.base_model.classifier.children())[:-1])
            print('Used model: VGG19')
            
        elif pre_trained_model == 'EfficientNet':
            self.base_model = EfficientNet.from_pretrained('efficientnet-b7')          
            self.base_model._fc = nn.Identity()
            in_features = 2560
            print('Used model: EfficientNet') 
            
        elif pre_trained_model == 'ShuffleNet':
            self.base_model = models.shufflenet_v2_x1_0(pretrained=True)
            in_features = self.base_model.fc.in_features
            self.base_model.fc = nn.Identity()
            #in_features = 1024
            print('Used model: ShuffleNet') 
            
        elif pre_trained_model == 'Vit':
            self.base_model = create_model('vit_base_patch16_224', pretrained=True)
            self.base_model.head = nn.Identity()
            in_features = self.base_model.num_features
            print('Used model: Vit')
            
        elif pre_trained_model == 'SqueezeNet':
            self.base_model = models.squeezenet1_0(pretrained=True)
            self.base_model.classifier = nn.Identity()
            in_features = 86528
            print('Used model: SqueezeNet')
        
        elif pre_trained_model == 'AlexNet':
            self.base_model = models.alexnet(pretrained=True)
            self.base_model.classifier = nn.Identity()
            in_features = 9216            
            print('Used model: AlexNet')
            
        elif pre_trained_model == 'GoogLeNet':
            self.base_model = models.googlenet(pretrained=True)
            self.base_model.fc = nn.Identity()  # Remove the classification layer
            in_features = 1024
            print('Used model: GoogLeNet')
        
        elif pre_trained_model == 'MobileNetv2':
            self.base_model = models.mobilenet_v2(pretrained=True)
            self.base_model.classifier = nn.Identity()
            in_features = self.base_model.last_channel
            print('Used model: MobileNetv2')
            
        elif pre_trained_model == 'DenseNet161':
            self.base_model = models.densenet161(pretrained=True)
            self.base_model.classifier = nn.Identity()
            in_features = 2208
            print('Used model: DenseNet161')
        
        elif pre_trained_model == 'DenseNet201':
            self.base_model = models.densenet201(pretrained=True)
            self.base_model.classifier = nn.Identity()
            in_features = 1920
            print('Used model: DenseNet201')
        
        elif pre_trained_model == 'Xception':
            self.base_model = create_model('xception', pretrained=True)        
            self.base_model.last_linear = nn.Identity()
            in_features = 1000
            print('Used model: Xception')
            
        elif pre_trained_model == 'Vit_1':
            self.base_model = create_model('vit_srelpos_medium_patch16_224', pretrained=True)
            self.base_model.head = nn.Identity()
            in_features = self.base_model.num_features
            print('Used model: Vit_1')
            
        elif pre_trained_model == 'Vit_2':
            self.base_model = create_model('vit_large_patch16_224', pretrained=True)
            self.base_model.head = nn.Identity()
            in_features = self.base_model.num_features
            print('Used model: Vit_2')
            
        elif pre_trained_model == 'Vit_3':
            self.base_model = create_model('vit_gigantic_patch14_clip_224', pretrained=True)
            self.base_model.head = nn.Identity()
            in_features = self.base_model.num_features
            print('Used model: Vit_3')
            
        elif pre_trained_model == 'Vit_4':
            self.base_model = create_model('vit_large_patch32_224.orig_in21k', pretrained=True, num_classes=0)
            self.base_model.head = nn.Identity()
            in_features = self.base_model.num_features
            print('Used model: vit_large_patch32_224')   
                     
        if pre_trained_model not in ['ShuffleNet', 'inception_v3', 'EfficientNet']:
            for param in self.base_model.parameters():
                param.requires_grad = False
        
        self.fc1 = nn.Linear(in_features, 8)  # (x, y) for 4 points
        self.fc2 = nn.Linear(in_features, 2)  # radius
        self.fc3 = nn.Linear(in_features, 2)  # angles
        self.dropout = nn.Dropout(p=0.5)
    def forward(self, x):
        if pre_trained_model == 'inception_v3':
            x = self.base_model(x)
            if isinstance(x, torchvision.models.inception.InceptionOutputs):
            # Access the logits attribute to get the tensor from the final fully connected layer
                x = x.logits
            else:
            # If the output is already a tensor, no need to change it
                pass
            #print('Used model: inception_v3 transform logit')
            
        
        else:
            x = self.base_model(x)
            
        x = x.view(x.size(0), -1) 
        #x = self.dropout(x)
        
        #print(x.shape)
        four_points = self.fc1(x)
        radius = self.fc2(x)
        angles = self.fc3(x)
        
        return four_points, radius, angles

In [None]:
class Importdata(Dataset):
    def __init__(self, x_path, y_path, transform=True, centercrop=False, random_rotation=False):
        self.x_path = x_path
        self.y_path = y_path
        self.transform = transform
        self.centercrop = centercrop
        self.random_rotation = random_rotation
        
        self.allx = sorted(os.listdir(self.x_path))
        self.ally = sorted(os.listdir(self.y_path))
        
    def __len__(self):
        return len(self.allx)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.x_path, self.allx[idx])
        img = Image.open(img_path).convert('RGB')
        w, h = img.size        
        
        lab_path = os.path.join(self.y_path, self.ally[idx])
        label = scipy.io.loadmat(lab_path)['Four_points'].astype(float)
        label = torch.as_tensor(label, dtype=torch.float32)
        
        angle = scipy.io.loadmat(lab_path)['Angles'].astype(float)
        angle = torch.as_tensor(angle, dtype=torch.float32)

        if self.transform: 
            if self.centercrop:
                img_transform = transforms.Compose([
                    transforms.Resize((img_size + centercrop_width, img_size + centercrop_width)),
                    transforms.CenterCrop((img_size, img_size)),
                    transforms.ToTensor(),
                    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
                ])
                
                img = img_transform(img)
                h_new, w_new = img.shape[1]+centercrop_width, img.shape[2]+centercrop_width
            
                # Rescale the locations of four points
                label[:-1,0] = w_new/w*label[:-1,0]-(centercrop_width/2)
                label[:-1,1] = h_new/h*label[:-1,1]-(centercrop_width/2)
            
            else:
                img_transform = transforms.Compose([
                    transforms.Resize((img_size, img_size)),
                    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
                    transforms.ToTensor(),
                    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
                ])
                
                # Apply the transformation
                img = img_transform(img)
                h_new, w_new = img.shape[1], img.shape[2]
            
                # Rescale the locations of four points
                label[:-1,0] = w_new/w*label[:-1,0]
                label[:-1,1] = h_new/h*label[:-1,1]            
            
            # Apply random rotation
            if self.random_rotation:
                roangle = torch.randint(-20, 20, (1,)).item()  # Random angle between -20 and 20 degrees
                img = torchvision.transforms.functional.rotate(img, roangle)
                label[:-1,:] = self.rotate_points(label[:-1,:] , (img_size, img_size), roangle)
            
            # Radius used the mean of two re-scaled ratio
            h_new, w_new = img.shape[1], img.shape[2]
            label[-1,:] = torch.tensor(np.dot(label[-1,:].view(2, 1).numpy(),np.array([[w_new/w, h_new/h]])).sum(axis=1)*1/2)
                
        return img, label, angle, w, h, img_path

    def rotate_points(self, points, image_size, angle):
        angle = -angle  # Negative angle because torchvision rotates images clockwise
        angle_rad = math.radians(angle)
        center = (image_size[0] / 2, image_size[1] / 2)
        rotated_points = []
        for point in points:
            x, y = point[0], point[1]
            x -= center[0]
            y -= center[1]
            new_x = x * math.cos(angle_rad) - y * math.sin(angle_rad)
            new_y = x * math.sin(angle_rad) + y * math.cos(angle_rad)
            new_x += center[0]
            new_y += center[1]
            rotated_points.append([new_x, new_y])
        return torch.tensor(rotated_points)




In [None]:
class v_4(nn.Module):
    def __init__(self):
        super(v_4, self).__init__()

        self.base_model1 = EfficientNet.from_pretrained('efficientnet-b7')          
        self.base_model1._fc = nn.Identity()
        in_features1 = 2560
        print('Used model: EfficientNet')
        
        self.fc11 = nn.Linear(in_features1, 8)  # (x, y) for 4 points
        self.fc12 = nn.Linear(in_features1, 2)  # radius
        self.fc13 = nn.Linear(in_features1, 2)  # angles


        
        self.base_model2 = create_model('vit_gigantic_patch14_clip_224', pretrained=True)
        self.base_model2.head = nn.Identity()
        in_features2 = self.base_model2.num_features

        for param in self.base_model2.parameters():
                param.requires_grad = False
        print('Used model: vit_gigantic_patch14_clip_224')
           
               
        self.fc21 = nn.Linear(in_features2, 8)  # (x, y) for 4 points
        self.fc22 = nn.Linear(in_features2, 2)  # radius
        self.fc23 = nn.Linear(in_features2, 2)  # angles
        
    def forward(self, x):
        
        x1 = self.base_model1(x)
        x1 = x1.view(x1.size(0), -1) 

        four_points1 = self.fc11(x1)
        radius1 = self.fc12(x1)
        angles1 = self.fc13(x1)

        x2 = self.base_model2(x)
        x2 = x2.view(x2.size(0), -1) 

        four_points2 = self.fc21(x2)
        radius2 = self.fc22(x2)
        angles2 = self.fc23(x2)

        four_points = 1/2*(four_points1+four_points2)
        
        radius=1/2*(radius1+radius2)
        
        angles=1/2*(angles1+angles2)
        
        return four_points, radius, angles


In [None]:
criterion = nn.MSELoss()

In [None]:
class self_def_pred_new():
    
    def __init__(self, loaded_model, test_loader):
        self.model= loaded_model
        self.test= test_loader
        

    
    def extracted(self, all_labels):
        p1_all, p2_all,p3_all,p4_all=[],[],[],[]
        for i in all_labels:
            p1, p2, p3, p4, r = i
            p1_all.append(p1.numpy())
            p2_all.append(p2.numpy())
            p3_all.append(p3.numpy())
            p4_all.append(p4.numpy())
            #r_all.append(r.numpy())
        return p1_all, p2_all,p3_all,p4_all
        
    def res(self, plot_result=None, report_loss=None):
        c=0.
        p_loss, a_loss, r_loss=0.,0.,0.
        ma_p, ma_a, ma_r=0., 0., 0.
        ma_p_ori, ma_a_ori, ma_r_ori = 0., 0., 0.
        all_labels, all_pre_labels, all_angles, all_pre_angles, all_cal_angles = [], [], [], [],[]
        all_files=[]

        self.model.eval()
        with torch.no_grad():
            for _, data in enumerate(self.test):
                imgs, labels, angles, w_news, h_news, img_path = data
                #angles = angles.to('cuda')
                points, radius = labels[:,:-1,:].to('cuda'), labels[:,-1,:].to('cuda')
                points = points.view(points.size(0), -1)
                print(points.shape)
                imgs = imgs.to('cuda')
                pre_points = self.model(imgs)
                print(pre_points.shape)
                points_loss = criterion(pre_points, points.squeeze())

                #angle_loss = criterion(pre_angles, angles.squeeze())
                #radius_loss = criterion(pre_radius, radius.squeeze())
        
                ma_p+= MAPE(pre_points, points.squeeze()).item()
                #ma_a+= MAPE(pre_angles, angles.squeeze()).item()
                #ma_r+= MAPE(pre_radius, radius.squeeze()).item()
        
                p_loss+=points_loss.item()
                #a_loss+=angle_loss.item()
                #r_loss+=radius_loss.item()
        
                for i in range(imgs.shape[0]):
                    #print(c)
                    #print(img_path[i])
                    ori_from_folder = Image.open(img_path[i]).convert('RGB')
            
                    img, label, angle, w_new, h_new = imgs[i].to('cpu'), labels[i], angles[i], w_news[i], h_news[i]
                    pre_point= pre_points[i].reshape(4,2)
            
                    temp_r=torch.tensor([0,0]).reshape(1,2).to('cuda')
                    pre_label = torch.cat([pre_point, temp_r], dim=0)
           
                    ori_img, ori_label, ori_angle = reverse_image_label(img, label, angle, w_new, h_new)
            
                    ori_img, orisize_pre_label, orisize_pre_angle = reverse_image_label(img, pre_label.cpu(), angle, w_new, h_new)

                    #print(ori_label, orisize_pre_label,  ori_angle, orisize_pre_angle)
            
                    ma_p_ori+=MAPE(orisize_pre_label[:-1,:],ori_label[:-1,:]).item()
                    #ma_r_ori+=MAPE(orisize_pre_label[-1,:],ori_label[-1,:]).item()
                    #print(orisize_pre_angle, ori_angle)
                    #ma_a_ori+=MAPE(orisize_pre_angle, ori_angle).item()

                    angle = [round(i,2) for i in angle.tolist()[0]]
                    #pre_angle=[round(i,2) for i in pre_angle.tolist()]
                    #orisize_pre_angle=[round(i,2) for i in orisize_pre_angle.tolist()]
            
                    all_labels.append(ori_label)
                    all_pre_labels.append(orisize_pre_label.cpu())
                    #all_angles.append(angle)
                    #all_pre_angles.append(pre_angle)
                    all_cal_angles.append(orisize_pre_angle)
                    all_files.append(img_path[i])
                    c+=1
                    
                    if plot_result:    
                        plot_images(ori_from_folder,orisize_pre_label,label_size=10)
                        plt.title(f'{c},{angle} | cal:{orisize_pre_angle}')
                        
            if report_loss:
                print(f'ave_point_loss:{round(p_loss/len(self.test),2)}')
                print(f'mape_point_loss:{round(ma_p/len(self.test)*100,2)}')
            
            
            p1_all, p2_all,p3_all,p4_all=self.extracted(all_labels)
            print(all_pre_labels)
            p1_all_pre, p2_all_pre,p3_all_pre,p4_all_pre= self.extracted(all_pre_labels)
            
            resdf = pd.DataFrame(
                zip(all_files, p1_all, p2_all, p3_all, p4_all, all_angles, r_all,
                    p1_all_pre, p2_all_pre, p3_all_pre, p4_all_pre, all_cal_angles, r_all_pre),
                columns=['file','p1_all', 'p2_all', 'p3_all', 'p4_all','angle_all',
                         'p1_all_pre', 'p2_all_pre', 'p3_all_pre', 'p4_all_pre', 'angle_all_cal']
            )
            resdf = resdf.explode(resdf.columns.tolist()[1:])
        return resdf


In [1]:
class self_def_pred():
    
    def __init__(self, loaded_model, test_loader):
        self.model= loaded_model
        self.test= test_loader
        

    
    def extracted(self, all_labels):
        p1_all, p2_all,p3_all,p4_all,r_all=[],[],[],[],[]
        for i in all_labels:
            p1, p2, p3, p4, r = i
            p1_all.append(p1.numpy())
            p2_all.append(p2.numpy())
            p3_all.append(p3.numpy())
            p4_all.append(p4.numpy())
            r_all.append(r.numpy())
        return p1_all, p2_all,p3_all,p4_all,r_all
        
    def res(self, plot_result=None, report_loss=None):
        c=0.
        p_loss, a_loss, r_loss=0.,0.,0.
        ma_p, ma_a, ma_r=0., 0., 0.
        ma_p_ori, ma_a_ori, ma_r_ori = 0., 0., 0.
        all_labels, all_pre_labels, all_angles, all_pre_angles, all_cal_angles = [], [], [], [],[]
        all_files=[]

        self.model.eval()
        with torch.no_grad():
            for _, data in enumerate(self.test):
                imgs, labels, angles, w_news, h_news, img_path = data
                angles = angles.to('cuda')
                points, radius = labels[:,:-1,:].to('cuda'), labels[:,-1,:].to('cuda')
                points = points.view(points.size(0), -1)
                imgs = imgs.to('cuda')
                pre_points, pre_radius, pre_angles = self.model(imgs)
                points_loss = criterion(pre_points, points.squeeze())

                angle_loss = criterion(pre_angles, angles.squeeze())
                radius_loss = criterion(pre_radius, radius.squeeze())
        
                ma_p+= MAPE(pre_points, points.squeeze()).item()
                ma_a+= MAPE(pre_angles, angles.squeeze()).item()
                ma_r+= MAPE(pre_radius, radius.squeeze()).item()
        
                p_loss+=points_loss.item()
                a_loss+=angle_loss.item()
                r_loss+=radius_loss.item()
        
                for i in range(imgs.shape[0]):
                    #print(c)
                    #print(img_path[i])
                    ori_from_folder = Image.open(img_path[i]).convert('RGB')
            
                    img, label, angle, w_new, h_new = imgs[i].to('cpu'), labels[i], angles[i], w_news[i], h_news[i]
                    pre_point, pre_radiu, pre_angle = pre_points[i].reshape(4,2), pre_radius[i].reshape(1,2), pre_angles[i]
            
                    pre_label = torch.cat([pre_point, pre_radiu], dim=0)
           
                    ori_img, ori_label, ori_angle = reverse_image_label(img, label, angle, w_new, h_new)
            
                    ori_img, orisize_pre_label, orisize_pre_angle = reverse_image_label(img, pre_label.cpu(), pre_angle.cpu(), w_new, h_new)

                    #print(ori_label, orisize_pre_label,  ori_angle, orisize_pre_angle)
            
                    ma_p_ori+=MAPE(orisize_pre_label[:-1,:],ori_label[:-1,:]).item()
                    ma_r_ori+=MAPE(orisize_pre_label[-1,:],ori_label[-1,:]).item()
                    #print(orisize_pre_angle, ori_angle)
                    ma_a_ori+=MAPE(orisize_pre_angle, ori_angle).item()

                    angle = [round(i,2) for i in angle.tolist()[0]]
                    pre_angle=[round(i,2) for i in pre_angle.tolist()]
                    orisize_pre_angle=[round(i,2) for i in orisize_pre_angle.tolist()]
            
                    all_labels.append(ori_label)
                    all_pre_labels.append(orisize_pre_label.cpu())
                    all_angles.append(angle)
                    all_pre_angles.append(pre_angle)
                    all_cal_angles.append(orisize_pre_angle)
                    all_files.append(img_path[i])
                    c+=1
                    
                    if plot_result:    
                        plot_images(ori_from_folder,orisize_pre_label,label_size=10)
                        plt.title(f'{c},{angle} | pre:{pre_angle}, cal:{orisize_pre_angle}')
                        
            if report_loss:
                print(f'ave_point_loss:{round(p_loss/len(self.test),2)}, ave_angle_loss:{round(a_loss/len(self.test),2)},ave_radi_loss:{round(r_loss/len(self.test),2)}')
                print(f'mape_point_loss:{round(ma_p/len(self.test)*100,2)}, mape_angle_loss:{round(ma_a/len(self.test)*100,2)},mape_radi_loss:{round(ma_r/len(self.test)*100,2)}')
            
            
            p1_all, p2_all,p3_all,p4_all,r_all=self.extracted(all_labels)
            p1_all_pre, p2_all_pre,p3_all_pre,p4_all_pre,r_all_pre = self.extracted(all_pre_labels)
            
            resdf = pd.DataFrame(
                zip(all_files, p1_all, p2_all, p3_all, p4_all, r_all, all_angles,
                    p1_all_pre, p2_all_pre, p3_all_pre, p4_all_pre, r_all_pre, all_pre_angles, all_cal_angles),
                columns=['file','p1_all', 'p2_all', 'p3_all', 'p4_all', 'r_all', 'angle_all',
                         'p1_all_pre', 'p2_all_pre', 'p3_all_pre', 'p4_all_pre', 'r_all_pre', 'angle_all_pre', 'angle_all_cal']
            )
            resdf = resdf.explode(resdf.columns.tolist()[1:])
        return resdf


In [2]:
def reverse_image_label(img, label, angles, w_new, h_new): # w_new, h_new are the original size of image
    _, w, h = img.shape
    
    label = label.clone()
    mean = torch.tensor([0.485, 0.456, 0.406])
    std = torch.tensor([0.229, 0.224, 0.225])
    # Undo the normalization
    img = img * std[:, None, None] + mean[:, None, None]
    img_pil = transforms.ToPILImage()(img)
    
    if centercrop_p:
        left_padding = int(centercrop_width/2)
        top_padding = int(centercrop_width/2)
        right_padding = int(centercrop_width/2)
        bottom_padding = int(centercrop_width/2)
        img_pil = ImageOps.expand(img_pil, (left_padding, top_padding, right_padding, bottom_padding), fill=(255, 255, 255))
        # Rescale the locations of four points
        w, h =w+centercrop_width, h+centercrop_width
        label[:-1,0] = w_new/w*(label[:-1,0]+(centercrop_width/2))
        label[:-1,1] = h_new/h*(label[:-1,1]+(centercrop_width/2))
    else:
        # Rescale the locations of four points
        label[:-1,0] = w_new/w*label[:-1,0]
        label[:-1,1] = h_new/h*label[:-1,1]
    
    resize_transform = transforms.Resize((h_new, w_new))  # Use the original image size here
    img_original_size = resize_transform(img_pil)
    

    
    # Radius reverse it back to original radius
    label[-1,:] = label[-1,:]*2/(w/w_new + h/h_new)
    
    # Rescale the angle (used the four points to calculate angles)
    p1,p2,p3,p4,r = label
    p_12 = p2-p1
    p_13 = p3-p1
    p_24 = p4-p2
    p_21 = p1-p2
    cosine_angle_1 = np.dot(p_12, p_13) / (np.linalg.norm(p_12) * np.linalg.norm(p_13))
    cosine_angle_2 = np.dot(p_24, p_21) / (np.linalg.norm(p_24) * np.linalg.norm(p_21))
    angle_1 = np.arccos(cosine_angle_1)
    angle_2 = np.arccos(cosine_angle_2)
    angle_1 = np.degrees(angle_1)
    angle_2 = np.degrees(angle_2)
    angle = torch.as_tensor(np.array([angle_1,angle_2]), dtype=torch.float32) 
    
    return img_original_size, label, angle

In [3]:
def R_square(all_angles,all_cal_angles):
    '''
    all_angles,all_cal_angles are series
    
    '''
    y = np.array(all_angles.tolist())
    mean_y = y.mean(axis=0)
    y_hat =  np.array(all_cal_angles.tolist())
    TSS = np.square(y-mean_y).sum(axis=0)
    RSS = np.square(y-y_hat).sum(axis=0)
    #print('TSS',TSS)
    #print('RSS',RSS)
    return(round(1-(RSS/TSS),3))

In [4]:
def MAPE(pre_y, y):
    #print(y-pre_y,'..........')
    #print(y)
    #print(pre_y)
    
    temp= torch.abs(y-pre_y)/torch.abs(y)
    return torch.mean(temp)

In [None]:
def plot_images(img,label, label_size=1):

    p1, p2, p3, p4, rs = label
    
    circle1 = plt.Circle((p1[0], p1[1]), rs[0], color='r', fill=False)
    circle2 = plt.Circle((p2[0], p2[1]), rs[1], color='r', fill=False)
    p_1 = plt.Circle((p1[0], p1[1]), label_size, color='g', fill=False)
    p_2 = plt.Circle((p2[0], p2[1]), label_size, color='r', fill=False)
    p_3 = plt.Circle((p3[0], p3[1]), label_size, color='b', fill=True)
    p_4 = plt.Circle((p4[0], p4[1]), label_size, color='y', fill=True)
    
    if isinstance(img, torch.Tensor):
        image = np.transpose(img, (1, 2, 0))
    else: image = img

    fig,ax = plt.subplots(figsize=(10,10))
    ax.set_aspect('equal')
    plt.plot([p1[0], p2[0]], [p1[1], p2[1]], color="g", linewidth=1)
    plt.plot([p1[0], p3[0]], [p1[1], p3[1]], color="g", linewidth=1)
    plt.plot([p2[0], p4[0]], [p2[1], p4[1]], color="g", linewidth=1)
    

    # Show the image
    ax.imshow(image)
    ax.add_patch(circle1)
    ax.add_patch(circle2)
    ax.add_patch(p_1)
    ax.add_patch(p_2)
    ax.add_patch(p_3)
    ax.add_patch(p_4)