In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchviz import make_dot

class OCRNet(nn.Module):
    def __init__(self, num_classes):
        super(OCRNet, self).__init__()
        
        # CNN Feature Extractor
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d((2, 1)),
            
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d((2, 1)),
            
            nn.Conv2d(512, 512, kernel_size=2, stride=1, padding=0),
            nn.ReLU()
        )
        
        # Recurrent Layer (LSTM)
        self.lstm = nn.LSTM(512, 256, bidirectional=True, batch_first=True)
        self.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.cnn(x)
        x = x.squeeze(2)  # Remove height dimension
        x = x.permute(0, 2, 1)  # Reshape for LSTM [batch, width, features]
        x, _ = self.lstm(x)
        x = self.fc(x)  # Map to character classes
        return x

# Example Usage
num_classes = 128  # Adjust based on character set (ASCII, Unicode, etc.)
model = OCRNet(num_classes)
dummy_input = torch.randn(1, 1, 32, 128)  # Batch size 1, grayscale image (1 channel), height 32, width 128
output = model(dummy_input)

# Visualize the model structure with high-quality SVG
make_dot(output, params=dict(model.named_parameters())).render("OCRNet", format="svg")

'OCRNet.svg'

In [7]:
import torch
from torchviz import make_dot
from torch import nn
import torchvision.models as models

class CRNN(nn.Module):
    def __init__(self, num_classes=37):  # 26 letters + 10 digits + blank
        super(CRNN, self).__init__()

        # CNN Feature Extractor (ResNet18 without the last FC layer)
        resnet = models.resnet18(pretrained=True)
        self.cnn = nn.Sequential(*list(resnet.children())[:-2])  # Remove last pooling & FC

        # LSTM for Sequence Modeling
        self.rnn = nn.LSTM(input_size=512, hidden_size=256, num_layers=2, 
                           bidirectional=True, batch_first=True)

        # Fully Connected Layer (Mapping to Character Classes)
        self.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        batch_size = x.size(0)

        # Extract Features from CNN
        x = self.cnn(x)  # Shape: (B, C, H, W)

        # Reshape for LSTM: (batch, time_steps, features)
        x = x.permute(0, 2, 3, 1).contiguous()  # (B, H, W, C) → (B, W, H, C)
        x = x.view(batch_size, x.size(1), -1)  # Flatten height and channels
        
        # LSTM Sequence Modeling
        x, _ = self.rnn(x)

        # Output Predictions
        x = self.fc(x)  # Shape: (B, W, num_classes)
        return x

# Example usage
num_classes = 128
model = CRNN(num_classes)
dummy_input = torch.randn(1, 1, 32, 128)  # Batch size 1, grayscale image (1 channel), height 32, width 128
output = model(dummy_input)
make_dot(output, params=dict(model.named_parameters())).render("CRNN", format="svg")



RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[1, 1, 32, 128] to have 3 channels, but got 1 channels instead

In [4]:
import torch
from models.nn_CHAR_RECOG import OCRNet
from torchviz import make_dot
from torchsummary import summary

def debug_model():
    # Initialize model
    num_classes = 128
    model = OCRNet(num_classes)
    
    # Create dummy input (batch_size, channels, height, width)
    x = torch.randn(2, 1, 32, 128)
    
    # Track shapes through the network
    print("\n=== Shape Analysis ===")
    with torch.no_grad():
        # CNN forward pass
        cnn_out = model.cnn(x)
        print(f"Input shape: {x.shape}")
        print(f"After CNN shape: {cnn_out.shape}")
        
        # Squeeze operation
        squeezed = cnn_out.squeeze(2)
        print(f"After squeeze shape: {squeezed.shape}")
        
        # Permute operation
        permuted = squeezed.permute(0, 2, 1)
        print(f"After permute shape: {permuted.shape}")
        
        # LSTM forward pass
        lstm_out, _ = model.lstm(permuted)
        print(f"After LSTM shape: {lstm_out.shape}")
        
        # Final linear layer
        output = model.fc(lstm_out)
        print(f"Final output shape: {output.shape}")
    
    # Visualize model architecture
    dot = make_dot(output, params=dict(model.named_parameters()))
    dot.render("model_architecture", format="png")
    
    # Print model summary
    print("\n=== Model Summary ===")
    summary(model, input_size=(1, 32, 128))

if __name__ == "__main__":
    debug_model()

ModuleNotFoundError: No module named 'models'