In [1]:
from torch import optim
import torch as th
from torch import nn
from dataclasses import dataclass
import os
from src.constants import *
from src.utils.common import *
os.chdir("../")


In [23]:
@dataclass
class VanillaModelConfig:
    image_height: int
    image_width: int
    layer_1: dict
    layer_2: dict

class ConfigurationManager:
    def __init__(self):
        self.config = read_yaml(CONFIG_FILE_PATH)
        self.params=read_yaml(PARAMS_FILE_PATH)

        # Architecture params
        self.vanilla=read_yaml(VANILLA_FILE_PATH) # get vanilla params

    def get_vanilla_architecture_params(self) -> VanillaModelConfig:
        params=VanillaModelConfig(
            # Getting params from params.yaml
            image_height=self.params.image_params.height,
            image_width=self.params.image_params.width,

            # Getting vanilla params from vanilla_params.yaml
            layer_1=self.vanilla.layers.first, # access vanilla architecture layer 1 params
            layer_2=self.vanilla.layers.second # access vanilla architecture layer 2 params

        )
        return params

class Model(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.config=config

        # conv layer
        self.conv_layer = nn.Sequential(
            nn.Conv2d(**self.config.layer_1), # passing layer 1 params as dict
            nn.ReLU(),
            nn.BatchNorm2d(self.config.layer_1.out_channels), # passing output channels
            nn.Conv2d(**self.config.layer_2),
            nn.ReLU(),
            nn.BatchNorm2d(self.config.layer_2.out_channels)
        )

        self.flatten = nn.Flatten()
        self.f1 = nn.Linear(self.get_flattened_size(self.config.image_height, 
                                                    self.config.image_width), 128)
        self.f2 = nn.Linear(128, 8)
        
    def get_flattened_size(self, image_height, image_width):
        # Create a dummy input with batch size 1 and the correct number of input channels
        dummy_input = th.randn(1, self.config.layer_1.in_channels, 
                               image_height, image_width)
        
        # Pass the dummy input through the conv layers
        conv_output = self.conv_layer(dummy_input)
        # Flatten the conv output and calculate its size
        # flattened_size = len(conv_output.view(conv_output.shape[0], -1)[0])
        flattened_size = conv_output.view(1, -1).numel()
        return flattened_size

    def forward(self, x):
        x = self.conv_layer(x)
        x = self.flatten(x)
        x = self.f1(x)
        x = nn.ReLU()(x)
        x = self.f2(x)
        x = nn.ReLU()(x)
        return x
    


In [24]:
if __name__ == "__main__":
    config_manager = ConfigurationManager()
    config_params = config_manager.get_vanilla_architecture_params()

    model = Model(config_params)
    

2025-01-14 20:39:03,825 - root - INFO - Yaml read successfully from config/config.yaml
2025-01-14 20:39:03,829 - root - INFO - Yaml read successfully from params.yaml
2025-01-14 20:39:03,831 - root - INFO - Yaml read successfully from src/models/vanilla/vanilla_params.yaml


In [25]:
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
print(model)

Model(
  (conv_layer): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (f1): Linear(in_features=614400, out_features=128, bias=True)
  (f2): Linear(in_features=128, out_features=8, bias=True)
)


In [26]:
from torchvision import transforms as transforms

transform = transforms.Compose([
    # transforms.Grayscale(num_output_channels=model_params.num_output_channels),
    transforms.Resize([100,96]),  # Resize images to match model input size
    transforms.RandomHorizontalFlip(),  # Data augmentation
    transforms.RandomRotation(10),  # Randomly rotate images
    transforms.ToTensor(),  # Convert image to tensor
    transforms.Normalize(mean=0, std=1)  # Normalize with mean and std for grayscale images
])

In [27]:
from torchvision.datasets import ImageFolder
train_path = "data/processed/train/"

train_dataset = ImageFolder(train_path, transform=transform)

In [28]:
from torch.utils.data import DataLoader
train_loader = DataLoader(
    train_dataset,
    batch_size=32,
    shuffle=True,
    num_workers=4
)

In [29]:

# Training loop
for epoch in range(2):
    model.train()
    
    for images, labels in train_loader:
        # Move images and labels to device (GPU/CPU)
        images, labels = images.to('cpu'), labels.to('cpu')

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{2}], Loss: {loss.item():.4f}')

TypeError: linear(): argument 'input' (position 1) must be Tensor, not ReLU