In [1]:
import pandas as pd 
def count_breeds(csv_file):
    df = pd.read_csv(csv_file)
    df = df.drop(columns = ['filepath'])
    df = df.drop(columns = ['class'])
    total = df.sum().astype(int)
    return total

csv_file = "/home/phucdz/Python_project/basic_CNN/data/train/_classes.csv"
print(count_breeds(csv_file))

antelope      48
badger        48
bat           48
bear          48
bee           48
              ..
whale         48
wolf          48
wombat        48
woodpecker    48
zebra         48
Length: 92, dtype: int64


In [2]:
import torch.nn as nn
import torch
class CNN(nn.Module):
    def __init__(self, numclasses):
        super().__init__()
        self.conv1 = self.block(1, 16)
        self.conv2 = self.block(16, 32)
        self.conv3 = self.block(32, 64)
        self.conv4 = self.block(64,128)
        self.conv5 = self.block(128, 128)

        self.fcl1 = nn.Sequential(
            # nn.dropout(p=0.5) p is the probability of an element to be zeroed
            nn.Linear(in_features= 6272, out_features=512),
            nn.ReLU()
        )
        self.fcl2 = nn.Sequential(
            nn.Linear(in_features= 512, out_features=256),
            nn.ReLU()
        )

        self.fcl3 = nn.Linear(in_features= 256, out_features=numclasses)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)

        b, c, h, w = x.shape    # batch, channel, height, width
        x= x.view(b, -1)        # flatten the tensor     

        x = self.fcl1(x)
        x = self.fcl2(x)
        x = self.fcl3(x)

        return x
        
    def block ( self, in_channels, out_channels, kernel_size=3, stride=1, padding=1):
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding),
            nn.BatchNorm2d(num_features=out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, kernel_size, stride, padding),
            nn.BatchNorm2d(num_features=out_channels),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2) # 2x2 max pooling and stride = kernel_size
        )


Check torch version and GPU

In [9]:
import torch
print(torch.__version__)  # Check PyTorch version
print(torch.cuda.is_available())  # Should return True
print(torch.cuda.get_device_name(0))  # Should display your GPU name


2.6.0+cu118
True
NVIDIA GeForce RTX 4050 Laptop GPU


Split data

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNN(10).to(device)
# check if the model is on the device
print(next(model.parameters()).is_cuda)

True


In [4]:
# Define your dataset first
from torchvision import datasets, transforms

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)

# Then create a DataLoader using the defined dataset
from torch.utils.data import DataLoader

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)


In [5]:
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)

In [6]:
import torch.optim as optim 
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [7]:
def train(model, device, train_loader, optimizer, criterion, epoch):
    """
    Train the model for one epoch.

    Args:
        model (torch.nn.Module): The CNN model.
        device (torch.device): Device to run the model on (e.g., "cuda" or "cpu").
        train_loader (DataLoader): DataLoader for the training data.
        optimizer (torch.optim.Optimizer): Optimizer for updating model weights.
        criterion (torch.nn.Module): Loss function.
        epoch (int): Current epoch number (for logging purposes).

    Returns:
        tuple: (epoch_loss, epoch_accuracy)
    """
    model.train()  # Set model to training mode
    running_loss = 0.0
    correct = 0
    total = 0

    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)

        # Zero the gradients
        optimizer.zero_grad()
        # Forward pass
        outputs = model(data)
        # Compute loss
        loss = criterion(outputs, target)
        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Update running loss and accuracy
        running_loss += loss.item() * data.size(0)
        _, preds = torch.max(outputs, 1)
        total += target.size(0)
        correct += (preds == target).sum().item()

    epoch_loss = running_loss / total
    epoch_acc = correct / total

    print(f"Train Epoch {epoch}: Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")
    return epoch_loss, epoch_acc


In [8]:
def evaluate(model, device, test_loader, criterion):
    model.eval()  # Set model to evaluation mode
    running_loss = 0.0
    correct = 0
    total = 0

    # Disable gradient calculation for evaluation
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            loss = criterion(outputs, target)

            running_loss += loss.item() * data.size(0)
            _, preds = torch.max(outputs, 1)
            total += target.size(0)
            correct += (preds == target).sum().item()

    test_loss = running_loss / total
    test_acc = correct / total

    print(f"Test: Loss: {test_loss:.4f}, Accuracy: {test_acc:.4f}")
    return test_loss, test_acc

In [35]:
if __name__ == "__main__":
    model = CNN(10)
    model = model.to(device)
    num_epochs = 20
    for epoch in range(num_epochs):
        train_loss, train_acc = train(model, device, train_loader, optimizer, criterion, epoch)
        test_loss, test_acc = evaluate(model, device, test_loader, criterion)
        print()  # Add an empty line for better output readability
    

RuntimeError: Given input size: (128x1x1). Calculated output size: (128x0x0). Output size is too small