In [None]:
!pip install torch torchvision opencv-python numpy

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# p-norm SE Module [cite: 241]
class PNormSE(nn.Module):
    def __init__(self, channels, reduction=4):
        super(PNormSE, self).__init__()
        # Excitation step [cite: 259]
        self.fc = nn.Sequential(
            nn.Conv2d(channels * 2, channels // reduction, kernel_size=1, bias=False),
            nn.ReLU(inplace=True),
            nn.Conv2d(channels // reduction, channels, kernel_size=1, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        batch, c, _, _ = x.size()
        
        # Squeeze step: 1-norm (avg pool) and infinity-norm (max pool) [cite: 253, 254]
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)
        
        # Concatenate features [cite: 258, 259]
        y = torch.cat([avg_pool, max_pool], dim=1)
        
        y = self.fc(y)
        return x * y

# Integration Block (IB) [cite: 239]
class IntegrationBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(IntegrationBlock, self).__init__()
        self.residual_branch = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(out_channels)
        )
        self.p_norm_se = PNormSE(out_channels)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        res = self.residual_branch(x)
        res = self.p_norm_se(res)
        return F.relu(res + self.shortcut(x))

# VertexNet Model
class VertexNet(nn.Module):
    def __init__(self, num_anchors=9, num_classes=2):
        super(VertexNet, self).__init__()
        
        # Backbone Network [cite: 224]
        self.stage1 = nn.Sequential(
            nn.Conv2d(3, 128, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True)
        )
        self.stage2 = self._make_stage(128, 128, num_blocks=2, stride=2)
        self.stage3 = self._make_stage(128, 128, num_blocks=2, stride=2)
        self.stage4 = self._make_stage(128, 128, num_blocks=2, stride=2)
        self.stage5 = self._make_stage(128, 128, num_blocks=2, stride=2)
        self.stage6 = self._make_stage(128, 128, num_blocks=2, stride=2)

        # Fusion Network (FPN-like) [cite: 271]
        self.p5_fusion = self._make_fusion_layer(128, 128)
        self.p4_fusion = self._make_fusion_layer(128, 128)
        self.p3_fusion = self._make_fusion_layer(128, 128)

        # Head Network [cite: 278, 279]
        output_channels = num_anchors * (num_classes + 4 + 8) # 2 scores, 4 box offsets, 8 vertex offsets
        self.shared_head_A = nn.Conv2d(128, output_channels, kernel_size=3, padding=1)
        self.shared_head_B = nn.Conv2d(128, output_channels, kernel_size=3, padding=1)
        
    def _make_stage(self, in_channels, out_channels, num_blocks, stride):
        layers = [IntegrationBlock(in_channels, out_channels, stride)]
        for _ in range(1, num_blocks):
            layers.append(IntegrationBlock(out_channels, out_channels))
        return nn.Sequential(*layers)

    def _make_fusion_layer(self, ch1, ch2):
        return nn.Sequential(
            nn.Conv2d(ch1 + ch2, 128, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        # Backbone
        c1 = self.stage1(x)
        c2 = self.stage2(c1)
        c3 = self.stage3(c2) # Output for fusion P3
        c4 = self.stage4(c3) # Output for fusion P4
        c5 = self.stage5(c4) # Output for fusion P5
        c6 = self.stage6(c5) # Output for head

        # Fusion
        p5 = self.p5_fusion(torch.cat([c5, F.interpolate(c6, scale_factor=2, mode='bilinear')], dim=1))
        p4 = self.p4_fusion(torch.cat([c4, F.interpolate(p5, scale_factor=2, mode='bilinear')], dim=1))
        p3 = self.p3_fusion(torch.cat([c3, F.interpolate(p4, scale_factor=2, mode='bilinear')], dim=1))

        # Head
        pred_p3 = self.shared_head_A(p3)
        pred_p4 = self.shared_head_A(p4)
        pred_p5 = self.shared_head_B(p5)
        pred_c6 = self.shared_head_B(c6)

        return [pred_p3, pred_p4, pred_p5, pred_c6]

In [None]:
# A simplified residual block for SCR-Net
class SCRResBlock(nn.Module):
    def __init__(self, channels):
        super(SCRResBlock, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(channels, channels, kernel_size=(1, 3), padding=(0, 1), bias=False),
            nn.BatchNorm2d(channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(channels, channels, kernel_size=(1, 3), padding=(0, 1), bias=False),
            nn.BatchNorm2d(channels),
        )

    def forward(self, x):
        return F.relu(x + self.conv(x))

class SCRNet(nn.Module):
    def __init__(self, num_outputs):
        super(SCRNet, self).__init__()
        
        # Based on architecture in Table II [cite: 319]
        self.s_conv1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True)
        )
        self.s_stage1 = self._make_scr_stage(16, 93, num_blocks=4)
        self.s_stage2 = self._make_scr_stage(93, 176, num_blocks=4, stride=2)
        self.s_stage3 = self._make_scr_stage(176, 256, num_blocks=4, stride=2)
        
        # Horizontal Encoding [cite: 332]
        self.s_conv2 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            # Squeeze feature maps from 8x32 to 1x32 [cite: 332]
            nn.Conv2d(256, 256, kernel_size=(8, 1), stride=(1, 1), padding=0, bias=False),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True)
        )
        
        self.s_stage4 = nn.Sequential(
            SCRResBlock(256), SCRResBlock(256),
            nn.Conv2d(256, 256, kernel_size=(1, 3), padding=(0, 1)),
            nn.BatchNorm2d(256), nn.ReLU(inplace=True),
            SCRResBlock(256), SCRResBlock(256),
            nn.Conv2d(256, 256, kernel_size=(1, 3), padding=(0, 1)),
            nn.BatchNorm2d(256), nn.ReLU(inplace=True)
        )
        
        # Weight-Sharing Classifier [cite: 351, 354]
        # For CCPD, the paper mentions 3 classifiers. For simplicity,
        # we combine them into one final classification layer.
        # The output size depends on the dataset's character set and length.
        self.classifier = nn.Linear(256 * 32, num_outputs)

    def _make_scr_stage(self, in_channels, out_channels, num_blocks, stride=1):
        layers = []
        # Simplified block based on Table II description
        # A full implementation would use the Dynamic Regularization described [cite: 316]
        for _ in range(num_blocks):
            layers.append(nn.Conv2d(in_channels, in_channels, kernel_size=3, padding=1))
        
        if stride != 1:
            layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1))
        
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.s_conv1(x)   # Output: 32x128
        # Stages are simplified here for clarity
        # x = self.s_stage1(x) # Output: 32x128
        # x = self.s_stage2(x) # Output: 16x64
        x = F.adaptive_avg_pool2d(x, (8, 32)) # Simulate stages for shape
        x = self.s_stage3(x) # Output: 8x32
        
        x = self.s_conv2(x)   # Output: 1x32
        x = self.s_stage4(x)   # Output: 1x32
        
        x = x.view(x.size(0), -1) # Flatten for classifier
        x = self.classifier(x)
        
        return x

In [None]:
import cv2
import numpy as np

def rectify_plate(image, vertices):
    """
    Rectifies the license plate using perspective transformation. [cite: 311]
    Args:
        image: The original high-resolution image.
        vertices: A 4x2 numpy array of the LP's corner coordinates.
    Returns:
        A rectified 256x64 image of the license plate.
    """
    # Ensure vertices are in a consistent order (e.g., top-left, top-right, bottom-right, bottom-left)
    # This might require sorting based on coordinates
    
    # Destination points for the 256x64 rectified image
    dst_pts = np.array([[0, 0], [255, 0], [255, 63], [0, 63]], dtype=np.float32)
    
    # Get the perspective transformation matrix
    transform_matrix = cv2.getPerspectiveTransform(vertices.astype(np.float32), dst_pts)
    
    # Apply the transformation
    warped_plate = cv2.warpPerspective(image, transform_matrix, (256, 64))
    
    return warped_plate

def run_inference(image_path, vertexnet_model, scrnet_model):
    """
    Runs the full ALPR pipeline on a single image.
    """
    original_image = cv2.imread(image_path)
    if original_image is None:
        print("Error: Could not load image.")
        return

    # 1. Resize for VertexNet 
    h, w, _ = original_image.shape
    resized_image = cv2.resize(original_image, (256, 256))
    input_tensor = torch.from_numpy(resized_image.transpose(2, 0, 1)).float().unsqueeze(0) / 255.0

    # 2. VertexNet Prediction
    vertexnet_model.eval()
    with torch.no_grad():
        predictions = vertexnet_model(input_tensor)
        # Process predictions: apply NMS, find the best bounding box and vertices
        # This is a complex step involving decoding anchor boxes
        # For simplicity, we'll assume we have the vertices
        # Example vertices (normalized to 256x256)
        norm_vertices = np.array([[50, 100], [150, 100], [150, 130], [50, 130]])

    # 3. Resample and Rectify [cite: 61]
    # Scale vertices back to original image size
    original_vertices = norm_vertices * np.array([w / 256.0, h / 256.0])
    rectified_lp = rectify_plate(original_image, original_vertices)
    
    # Prepare for SCR-Net
    lp_tensor = torch.from_numpy(rectified_lp.transpose(2, 0, 1)).float().unsqueeze(0) / 255.0
    
    # 4. SCR-Net Recognition
    scrnet_model.eval()
    with torch.no_grad():
        char_predictions = scrnet_model(lp_tensor)
        # Decode the output tensor into a character string
        # e.g., by taking argmax for each character position
        plate_text = "DECODED_PLATE" # Placeholder
        
    print(f"Detected License Plate: {plate_text}")
    
    cv2.imshow("Original Image", original_image)
    cv2.imshow("Rectified LP", rectified_lp)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

# Example usage (requires trained model weights)
# vertexnet = VertexNet()
# vertexnet.load_state_dict(torch.load('vertexnet.pth'))
# scrnet = SCRNet(num_outputs=234) # 234 for CCPD dataset [cite: 364]
# scrnet.load_state_dict(torch.load('scrnet.pth'))
# run_inference('path/to/your/car_image.jpg', vertexnet, scrnet)