In [34]:
!git clone https://github.com/jgbeni/BirdPics.git
!mv BirdPics/utils .
!rm -r BirdPics
!ls -l

Cloning into 'BirdPics'...
remote: Enumerating objects: 62, done.[K
remote: Counting objects: 100% (62/62), done.[K
remote: Compressing objects: 100% (51/51), done.[K
remote: Total 62 (delta 20), reused 38 (delta 7), pack-reused 0 (from 0)[K
Receiving objects: 100% (62/62), 6.79 MiB | 13.43 MiB/s, done.
Resolving deltas: 100% (20/20), done.
mv: cannot move 'BirdPics/utils' to './utils': Directory not empty
total 12
drwx------ 5 root root 4096 Nov  6 11:28 drive
drwxr-xr-x 1 root root 4096 Nov  4 14:36 sample_data
drwxr-xr-x 3 root root 4096 Nov  6 11:27 utils


In [35]:
import utils.data_preprocessing as dp
import numpy as np
import h5py
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
from tqdm import tqdm
from google.colab import drive

drive.mount('/content/drive',force_remount=True)
device = "cuda" if torch.cuda.is_available() else "cpu"

Mounted at /content/drive


In [36]:
dir = '/content/drive/MyDrive/BirdPics'
os.makedirs(dir+'/models',exist_ok=True)

In [37]:
f = h5py.File(dir+'/data/bird_data.hdf5', "r")

In [38]:
X_train,Y_train = f['train']['X'],np.copy(f['train']['Y'])
X_val,Y_val = f['val']['X'],np.copy(f['val']['Y'])

In [39]:
Y_train = dp.prepare_labels(Y_train)
Y_val = dp.prepare_labels(Y_val)

In [40]:
train_dataset = dp.HDF5Dataset(X_train,Y_train,train=True)
val_dataset = dp.HDF5Dataset(X_val,Y_val)

In [41]:
batch_size = 64

train_loader = DataLoader(train_dataset, num_workers=8, batch_size=batch_size, pin_memory=True,
                                                shuffle=True)
val_loader = DataLoader(val_dataset, num_workers=8, batch_size=batch_size, pin_memory=True,
                                                shuffle=True)



## Building the model

In [50]:
class CNN_Net(nn.Module):
   def __init__(self, in_channels, num_classes):

       """
       Building blocks of convolutional neural network.

       Parameters:
           * in_channels: Number of channels in the input image (for grayscale images, 1)
           * num_classes: Number of classes to predict. In our problem, 10 (i.e digits from  0 to 9).
       """
       super(CNN_Net, self).__init__()

       # 1st convolutional layer
       self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=16, kernel_size=3)
       # Max pooling layer
       self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
       # 2nd convolutional layer
       self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3)
       # 3rd convolutional layer
       self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
       # 4rd convolutional layer
       self.conv4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3)
       #Dropout
       self.dropout = nn.Dropout(0.2)
       # Fully connected layers
       self.fc1 = nn.Linear(128*12*12, 500)
       self.fc2 = nn.Linear(500,num_classes)

   def forward(self, x):
       """
       Define the forward pass of the neural network.

       Parameters:
           x: Input tensor.

       Returns:
           torch.Tensor
               The output tensor after passing through the network.
       """
       x = F.relu(self.conv1(x))  # Apply first convolution and ReLU activation
       x = self.pool(x)           # Apply max pooling
       x = F.relu(self.conv2(x))  # Apply second convolution and ReLU activation
       x = self.pool(x)           # Apply max pooling
       x = F.relu(self.conv3(x))  # Apply third convolution and ReLU activation
       x = self.pool(x)           # Apply max pooling
       x = F.relu(self.conv4(x))  # Apply third convolution and ReLU activation
       x = self.pool(x)           # Apply max pooling
       x = x.reshape(x.shape[0], -1)  # Flatten the tensor
       x = self.dropout(x) # Dropout
       x = F.relu(self.fc1(x))            # Apply fully connected layer 1
       x = self.fc2(x) # Fully connected layer 2
       return x
model = CNN_Net(in_channels=3, num_classes=3).to(device)
print(model)

CNN_Net(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (dropout): Dropout(p=0.2, inplace=False)
  (fc1): Linear(in_features=18432, out_features=500, bias=True)
  (fc2): Linear(in_features=500, out_features=3, bias=True)
)


## Training

In [51]:
# Define the loss function
criterion = nn.CrossEntropyLoss().to(device)

# Define the optimizer
optimizer = optim.AdamW(model.parameters(), lr=0.001 ,weight_decay=1e-5)

In [52]:
n_total_steps = len(train_loader)
num_epochs = 30

max_val = 62.
PATH = os.path.join(dir,'models/cnn_v1.pth')

train_loss,val_loss = np.zeros(num_epochs,dtype=np.float32),np.zeros(num_epochs,dtype=np.float32)
train_acc,val_acc = np.zeros(num_epochs,dtype=np.float32),np.zeros(num_epochs,dtype=np.float32)

for epoch in range(num_epochs):
    train_correct,train_samples = 0,0
    val_correct,val_samples = 0,0
    for i, (images, labels) in enumerate(tqdm(train_loader)):
        # origin shape: [32, 3, 224, 224] = 32, 3, 1024
        # input_layer: 3 input channels, 6 output channels, 5 kernel size
        images = images.to(device)
        labels = labels.type(torch.LongTensor)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        train_loss[epoch] += loss.item()/len(train_loader)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        _, predicted = torch.max(outputs, 1)
        train_samples += labels.size(0)
        train_correct += (predicted == labels).sum().item()

    train_acc[epoch] = 100.0 * train_correct / train_samples
    print('train loss %.3f - train acc. %.2f' %(train_loss[epoch],train_acc[epoch]))


    for val_images, val_labels in val_loader:
        val_images = val_images.to(device)
        val_labels = val_labels.type(torch.LongTensor)
        val_labels = val_labels.to(device)
        outputs = model(val_images)
        # max returns (value ,index)
        _, predicted = torch.max(outputs, 1)
        val_samples += val_labels.size(0)
        val_correct += (predicted == val_labels).sum().item()

        val_loss[epoch] += criterion(outputs, val_labels).item()/len(val_loader)
    val_acc[epoch] = 100.0 * val_correct / val_samples
    if val_acc[epoch] > max_val:
        max_val = val_acc[epoch]
        torch.save(model.state_dict(), PATH)
    print('val loss %.3f - val acc. %.2f' %(val_loss[epoch],val_acc[epoch]))

print('Finished Training')

  0%|          | 0/312 [00:00<?, ?it/s]Exception ignored in: Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7a83489109a0><function _MultiProcessingDataLoaderIter.__del__ at 0x7a83489109a0>

Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
        Exception ignored in: self._shutdown_workers()self._shutdown_workers()
<function _MultiProcessingDataLoaderIter.__del__ at 0x7a83489109a0>

  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1647, in _shutdown_workers
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1647, in _shutdown_workers
Exception ignored in: Traceback (most recent call last):
    <function _MultiProcessingDataLoaderIter.__del__ at 0x7a83489109a0>  

train loss 2.394 - train acc. 34.28





val loss 1.099 - val acc. 33.58


 16%|█▌        | 50/312 [00:26<02:18,  1.89it/s]


KeyboardInterrupt: 

In [None]:
np.savez(os.path.join(dir,'models/cnn_v1.npz'),train_loss=train_loss,val_loss=val_loss,train_acc=train_acc,val_acc=val_acc)