In [59]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.multiprocessing as mp

In [78]:
class CNN(nn.Module):
    def __init__(self, num_filters=[32, 64, 128, 256, 512], kernel_size=3, num_dense=256, activation=F.relu):
        super(CNN, self).__init__()
        self.activation = activation  # Set activation function

        self.pool = nn.MaxPool2d(2, 2)

        self.conv1 = nn.Conv2d(3, num_filters[0], kernel_size)
        self.conv2 = nn.Conv2d(num_filters[0], num_filters[1], kernel_size)
        self.conv3 = nn.Conv2d(num_filters[1], num_filters[2], kernel_size)
        self.conv4 = nn.Conv2d(num_filters[2], num_filters[3], kernel_size)
        self.conv5 = nn.Conv2d(num_filters[3], num_filters[4], kernel_size)

        self.fc1 = nn.Linear(512*5*5 ,512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256,10)

    def forward(self, x):
        x = self.pool(self.activation(self.conv1(x)))
        x = self.pool(self.activation(self.conv2(x)))
        x = self.pool(self.activation(self.conv3(x)))
        x = self.pool(self.activation(self.conv4(x)))
        x = self.pool(self.activation(self.conv5(x)))

        x = self.activation(self.fc1(torch.flatten(x, 1)))
        x = self.activation(self.fc2(x))
        x = self.fc3(x)

        return x
        
model = CNN()

In [80]:
test_input = torch.randn(32,3,224,224)

y = model.forward(test_input)
print(y.shape)

torch.Size([32, 10])


In [81]:
transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Resize images to 224x224 (change if needed)
        transforms.ToTensor(),          # Convert images to PyTorch tensors
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize
    ])

# Define dataset paths
train_dir = "../inaturalist_12k/train"
val_dir = "../inaturalist_12k/val"

# Load datasets
train_dataset = datasets.ImageFolder(root=train_dir, transform=transform)
val_dataset = datasets.ImageFolder(root=val_dir, transform=transform)

# Check class mapping (optional)
# print("Class names:", train_dataset.classes)  # List of class names
# print("Class indices:", train_dataset.class_to_idx)  # Mapping class → index

# device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
device = torch.device('cpu')
print(f"Using device: {device}")

Using device: cpu


In [82]:
learning_rate = 1e-3
batch_size = 64
epochs = 5

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=1)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=1)

In [91]:
loss_fn = nn.CrossEntropyLoss() # Loss function
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Optimizer

In [92]:
losses = []
for images, labels in train_loader:
    optimizer.zero_grad()
    pred_labels = model.forward(images)
    loss = loss_fn(labels, pred_labels)
    loss.backward()
    optimizer.step()
    losses.append(loss)
    break

RuntimeError: "log_softmax_lastdim_kernel_impl" not implemented for 'Long'

torch.float32
torch.Size([32, 3, 224, 224])
