## Copy the data to the local file system

In [None]:
!cp ../data/train_data /tmp -R && find /tmp/train_data -type f | wc -l

In [None]:
!cp ../data/test_data /tmp -R && find /tmp/test_data -type f | wc -l

In [None]:
%matplotlib inline

In [None]:
import matplotlib.pyplot as plt
import torch
import torchvision
from torchvision import transforms
from torchvision.datasets import ImageFolder

from torch.utils.data.dataloader import DataLoader
from torch.utils.data import random_split

import torch.nn as nn
import torch.nn.functional as F

from torchvision.utils import make_grid

import time
from tqdm import tqdm

In [None]:
#training data directory
data_dir = "/tmp/train_data"

In [None]:
#load the training data
dataset = ImageFolder(data_dir,transform = transforms.Compose([
    transforms.Resize((200,200)),transforms.ToTensor()
]))

In [None]:
batch_size = 128
val_size = 3600
train_size = len(dataset) - val_size 

train_data,val_data = random_split(dataset,[train_size,val_size])
print(f"Length of Train Data : {len(train_data)}")
print(f"Length of Validation Data : {len(val_data)}")

In [None]:
#load the train and validation into batches.
train_dl = DataLoader(train_data, batch_size, shuffle = True, num_workers = 6, pin_memory = True)
val_dl = DataLoader(val_data, batch_size*2, num_workers = 6, pin_memory = True)

In [None]:
print("Follwing classes are there : \n",dataset.classes)

In [None]:
def display_img(img,label):
    print(f"Label : {dataset.classes[label]}")
    f = plt.figure()
    f.set_figwidth(5)
    f.set_figheight(5)
    plt.imshow(img.permute(1,2,0))

## We need to distinguish scenes with an animal and without animal

In [None]:
display_img(*dataset[589])

In [None]:
display_img(*dataset[14000])

## Show one batch of the data

In [None]:
def show_batch(dl):
    for images, labels in dl:
        fig,ax = plt.subplots(figsize = (16,16))
        ax.set_xticks([])
        ax.set_yticks([])
        ax.imshow(make_grid(images,nrow=16).permute(1,2,0))
        break

show_batch(train_dl)

In [None]:
class ImageClassificationBase(nn.Module):
    
    def training_step(self, batch):
        images, labels = batch 
        images, labels = images.cuda(), labels.cuda()
        out = self(images)                  # Generate predictions
        loss = F.cross_entropy(out, labels) # Calculate loss
        return loss
    
    def validation_step(self, batch):
        images, labels = batch
        images, labels = images.cuda(), labels.cuda()
        out = self(images)# Generate predictions
        loss = F.cross_entropy(out, labels)   # Calculate loss
        acc = accuracy(out, labels)           # Calculate accuracy
        return {'loss': loss.detach(), 'acc': acc}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_accs = [x['acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
        return {'loss': epoch_loss.item(), 'acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
            epoch, result['train_loss'], result['loss'], result['acc']))
        
class FishingNet(ImageClassificationBase):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            
            nn.Conv2d(3, 256, kernel_size = 3, padding = 1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size = 3, padding = 1),
            nn.ReLU(),
            nn.Conv2d(256,256, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.Conv2d(256,64, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
        
            nn.Conv2d(64, 128, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.Conv2d(128 ,128, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.Conv2d(128 ,128, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.Conv2d(128 ,128, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            
            nn.Conv2d(128, 128, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.Conv2d(128,128, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            
            nn.Flatten(),
            nn.Linear(80000,1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512,2)
                        
            )
        
    def forward(self, xb):
        return self.network(xb)

In [None]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

@torch.no_grad()
def evaluate(model, val_loader):
    model.eval()
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

  
def fit(epochs, lr, model, train_loader, val_loader, opt_func = torch.optim.Adam):
    
    history = []
    optimizer = opt_func(model.parameters(),lr)
    for epoch in range(epochs):
        
        model.train()
        train_losses = []
        
        pbar = tqdm(enumerate(train_loader), total = len(train_loader))
        
        #for batch in tqdm(train_loader):
        for it, batch in pbar:
            loss = model.training_step(batch)
            train_losses.append(loss)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            pbar.set_description(f"epoch {epoch+1} iter {it}: train loss {loss:.5f}.")
            
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        model.epoch_end(epoch, result)
        history.append(result) 
    
    return history

In [None]:
#if a GPU is available, use it
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = FishingNet()
#load a pre-trained model
model.load_state_dict(torch.load("/beegfs/ws/1/s4122485-jh_ws/model/pretrained_model"))

#move model to device
model.to(device)

# Train the model
It is already pre-trained, so no further training necessary - only in case, you want to enhance it

In [None]:
#train another 5 epochs - if you want :)
evaluate(model,val_dl)
num_epochs = 5
lr = 0.0006#fitting the model on training data and record the result after each epoch
history = fit(num_epochs, lr, model, train_dl, val_dl)

# Test the model

In [None]:
test_data_dir = "/tmp/test_data"
dataset_test = ImageFolder(test_data_dir,transform = transforms.Compose([
    transforms.Resize((200,200)),transforms.ToTensor()
]))
test_loader = DataLoader(dataset_test, batch_size)
model.eval()
predictions = []
for batch in test_loader:
    images, labels = batch
    images, labels = images.cuda(), labels.cuda()
    _, preds = torch.max(model(images), dim=1)
    predictions.extend(preds.tolist())# Generate predictions

# compute the acc manually here
correct_predictions = 0
for i in range(0, len(predictions)):
    if predictions[i] == dataset_test.targets[i]:
        correct_predictions+=1

print("accuracy: " + str(correct_predictions/len(predictions)))

In [None]:
idx=123
display_img((dataset_test[idx])[0],predictions[idx])

In [None]:
idx=9
display_img((dataset_test[idx])[0],predictions[idx])

# Remarks
## License data
We use the NOAA Puget Sound Nearshore Fish 2017-2018 for this example notebook:
If you use these data in a publication or report, please use the following citation to refer to the data collection:

Ferriss B, Veggerby K, Bogeberg M, Conway-Cranos L, Hoberecht L, Kiffney P, Litle K, Toft J, Sanderson B. Characterizing the habitat function of bivalve aquaculture using underwater video. Aquaculture Environment Interactions. 2021 Nov 18;13:439-54.

…and/or the following citation to refer to the annotations and public data set:

Farrell DM, Ferriss B, Trivedi A, Pathak S, Muppalla S, Dodhia R, Wang J, Veggerby K, Morris D, Sanderson B, Scheuerell M. 2022. Using a computer vision model to locate fish in underwater video: a case study in shellfish aquaculture. 4th ICES PICES Early Career Scientist Conference, St. John’s, Newfoundland, Canada, 9–12 May 2022.

## This notebook is inspired by the natural scence notebook by Pranjal Soni
Find more information: https://www.kaggle.com/code/pranjalsoni17/natural-scene-classification