In [128]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.multiprocessing as mp
from tqdm import tqdm

In [129]:
class CNN(nn.Module):
    def __init__(self, num_filters=[32, 64, 128, 256, 512], kernel_size=3, num_dense=256, activation=F.relu):
        super(CNN, self).__init__()
        self.activation = activation  # Set activation function

        self.pool = nn.MaxPool2d(2, 2)

        self.conv1 = nn.Conv2d(3, num_filters[0], kernel_size)
        self.conv2 = nn.Conv2d(num_filters[0], num_filters[1], kernel_size)
        self.conv3 = nn.Conv2d(num_filters[1], num_filters[2], kernel_size)
        self.conv4 = nn.Conv2d(num_filters[2], num_filters[3], kernel_size)
        self.conv5 = nn.Conv2d(num_filters[3], num_filters[4], kernel_size)

        self.fc1 = nn.Linear(512*5*5 , 256)
        # self.fc2 = nn.Linear(2048, 256)
        self.fc3 = nn.Linear(256, 10)

    def forward(self, x):
        x = self.pool(self.activation(self.conv1(x)))
        x = self.pool(self.activation(self.conv2(x)))
        x = self.pool(self.activation(self.conv3(x)))
        x = self.pool(self.activation(self.conv4(x)))
        x = self.pool(self.activation(self.conv5(x)))

        x = self.activation(self.fc1(torch.flatten(x, 1)))
        # x = self.activation(self.fc2(x))
        x = self.fc3(x)

        return x
        
model = CNN()

In [130]:
transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Resize images to 224x224 (change if needed)
        transforms.ToTensor(),          # Convert images to PyTorch tensors
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize
    ])

# Define dataset paths
train_dir = "../inaturalist_12k/train"
val_dir = "../inaturalist_12k/val"

# Load datasets
train_dataset = datasets.ImageFolder(root=train_dir, transform=transform)
val_dataset = datasets.ImageFolder(root=val_dir, transform=transform)

# Check class mapping (optional)
# print("Class names:", train_dataset.classes)  # List of class names
# print("Class indices:", train_dataset.class_to_idx)  # Mapping class → index

device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
# device = torch.device('cpu')
print(f"Using device: {device}")

Using device: mps


In [131]:
learning_rate = 1e-4
batch_size = 64
max_epochs = 5

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size, shuffle=False, num_workers=4, pin_memory=True)

In [132]:
loss_fn = nn.CrossEntropyLoss() # Loss function
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Optimizer

In [None]:
model.to(device)
for epoch in tqdm(range(max_epochs)):
    running_loss = 0
    counter = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        model.train()
        optimizer.zero_grad()
        pred_labels = model(images)
        loss = loss_fn(pred_labels, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if counter % 10 == 9:
            print(f'[{epoch + 1}, {counter + 1:5d}] loss: {running_loss / 10:.3f}')
            running_loss = 0.0
        counter += 1

  0%|                                                            | 0/5 [00:00<?, ?it/s]

[1,    10] loss: 2.301
[1,    20] loss: 2.302
[1,    30] loss: 2.299
[1,    40] loss: 2.294
[1,    50] loss: 2.275
[1,    60] loss: 2.289
[1,    70] loss: 2.256
[1,    80] loss: 2.237
[1,    90] loss: 2.224
[1,   100] loss: 2.197
[1,   110] loss: 2.218
[1,   120] loss: 2.198
[1,   130] loss: 2.179
[1,   140] loss: 2.165
[1,   150] loss: 2.208


 20%|██████████▍                                         | 1/5 [00:51<03:26, 51.71s/it]

In [None]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for images, labels in val_loader:
        # calculate outputs by running images through the network
        pred_labels = model(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(pred_labels, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')