In [68]:
import pandas as pd
import numpy as np
import cv2
from tqdm import tqdm

from pathlib import Path
import re

YOLO_TRAIN_DIR = Path("/home/lex/data/Spatial_Monitoring_and_Insect_Behavioural_Analysis_Dataset/YOLOv4_Training_and_Test_Dataset/training")
YOLO_TEST_DIR = Path("/home/lex/data/Spatial_Monitoring_and_Insect_Behavioural_Analysis_Dataset/YOLOv4_Training_and_Test_Dataset/testing")
CLASSIFIER_TRAIN_DIR = Path("out/classification/training")
CLASSIFIER_TEST_DIR = Path("out/classification/testing")

CLASSIFIER_TRAIN_DIR.mkdir(parents=True, exist_ok=True)
CLASSIFIER_TEST_DIR.mkdir(parents=True, exist_ok=True)

INSECT_TXT_REGEX = re.compile(r"insect_(\d+).txt")

TRAIN = "train"
TEST = "test"

In [98]:
# Get list of target classes (e.g. bee/wasp, flower, etc.) and corresponding zero-based index for that class
TARGET_TO_NAME = pd.Series(
    {
        0: "honeybee_vespidae", 
        1: "flower", 
        2: "syrphidae", 
        3: "lepidoptera"
    }, 
    name="target_name"
)
NAME_TO_TARGET = {v: k for k, v in TARGET_TO_NAME.items()}
TARGET_TO_NAME

0    honeybee_vespidae
1               flower
2            syrphidae
3          lepidoptera
Name: target_name, dtype: object

In [70]:
# Get DataFrame where each row corresponds to a unique image+bounding box for every
# instance of an insect that occurs in the (full-resolution) training images
csv_columns = ["target", "centre_x", "centre_y", "box_width", "box_height"]
extra_columns = ["image"]

dataframes = {
    "train": pd.DataFrame(columns=[*extra_columns, *csv_columns]),
    "test": pd.DataFrame(columns=[*extra_columns, *csv_columns])
}

for directory, phase in zip([YOLO_TRAIN_DIR, YOLO_TEST_DIR], [TRAIN, TEST]):
    for file in directory.iterdir():
        match = INSECT_TXT_REGEX.match(file.name)
        if match:
            row = pd.read_csv(file, sep=" ", names=csv_columns)
            
            # Get corresponding image for this insect 
            image_fp = file.with_suffix(".png")
            row["image"] = str(image_fp)

            dataframes[phase] = pd.concat([dataframes[phase], row], ignore_index=True)

dataframes["train"].head()

Unnamed: 0,image,target,centre_x,centre_y,box_width,box_height
0,/home/lex/data/Spatial_Monitoring_and_Insect_B...,0,0.438305,0.51225,0.016172,0.033593
1,/home/lex/data/Spatial_Monitoring_and_Insect_B...,0,0.131612,0.41275,0.011568,0.03513
2,/home/lex/data/Spatial_Monitoring_and_Insect_B...,0,0.394792,0.306481,0.017708,0.025926
3,/home/lex/data/Spatial_Monitoring_and_Insect_B...,1,0.294792,0.488889,0.030208,0.055556
4,/home/lex/data/Spatial_Monitoring_and_Insect_B...,1,0.514062,0.241204,0.038542,0.065741


In [71]:
# Add insect type as human-friendly string to dataframe just because
for phase in [TRAIN, TEST]:
    dataframes[phase] = dataframes[phase].join(TARGET_TO_NAME, on="target")

dataframes[TRAIN].head()

Unnamed: 0,image,target,centre_x,centre_y,box_width,box_height,target_name
0,/home/lex/data/Spatial_Monitoring_and_Insect_B...,0,0.438305,0.51225,0.016172,0.033593,honeybee_vespidae
1,/home/lex/data/Spatial_Monitoring_and_Insect_B...,0,0.131612,0.41275,0.011568,0.03513,honeybee_vespidae
2,/home/lex/data/Spatial_Monitoring_and_Insect_B...,0,0.394792,0.306481,0.017708,0.025926,honeybee_vespidae
3,/home/lex/data/Spatial_Monitoring_and_Insect_B...,1,0.294792,0.488889,0.030208,0.055556,flower
4,/home/lex/data/Spatial_Monitoring_and_Insect_B...,1,0.514062,0.241204,0.038542,0.065741,flower


In [72]:
def crop_to_insect(image: np.ndarray, centre_x: int, centre_y: int, height_width:int=128):
    """Convert a row from our insect bounding boxes DataFrame into a cropped image."""
    # Note we're not actually using the original height/width of bounding boxes, 
    # we want something a little less precise and more consistent
    top_y = int(centre_y - 0.5 * height_width)
    bottom_y = int(centre_y + 0.5 * height_width)
    left_x = int(centre_x - 0.5 * height_width)
    right_x = int(centre_x + 0.5 * height_width)

    height, width, channels = image.shape

    # print(f"Before, top_y={top_y}, bottom_y={bottom_y}, left_x={left_x}, right_x={right_x}")
    # Check if our crop goes out of bounds
    pad_width = [[0, 0], [0, 0], [0, 0]] # padding width for (before, after) for each axis of image arrays
    if top_y < 0:
        pad_width[0][0] = abs(top_y)
        bottom_y += abs(top_y)
        top_y = 0
    if bottom_y > height:
        pad_width[0][1] = abs(bottom_y - height) + 1
    if left_x < 0:
        pad_width[1][0] = abs(left_x)
        right_x += abs(left_x)
        left_x = 0
    if right_x > width:
        pad_width[1][1] = abs(right_x - width) + 1
    # print(f"After, top_y={top_y}, bottom_y={bottom_y}, left_x={left_x}, right_x={right_x}")

    image_padded = np.pad(image, pad_width=pad_width, mode="symmetric")
    image_cropped = image_padded[top_y:bottom_y, left_x:right_x]

    return image_cropped

In [73]:
# Make sure our output directories exist before we put files into them
for target in TARGET_TO_NAME:
    print(f"Creating training/ and testing/ directories for '{target}' in '{CLASSIFIER_TRAIN_DIR}' and '{CLASSIFIER_TEST_DIR}', respectively")
    (CLASSIFIER_TRAIN_DIR / target).mkdir(parents=True, exist_ok=True)
    (CLASSIFIER_TEST_DIR / target).mkdir(parents=True, exist_ok=True)

Creating training/ and testing/ directories for 'honeybee_vespidae' in 'out/classification/training' and 'out/classification/testing', respectively
Creating training/ and testing/ directories for 'flower' in 'out/classification/training' and 'out/classification/testing', respectively
Creating training/ and testing/ directories for 'syrphidae' in 'out/classification/training' and 'out/classification/testing', respectively
Creating training/ and testing/ directories for 'lepidoptera' in 'out/classification/training' and 'out/classification/testing', respectively


In [74]:
# Now convert those fractional positions above to absolute pixel values (e.g. 
# centre_x=0.44 --> 0.44*1920~=845)
for phase, input_dir, output_dir in zip([TRAIN, TEST], [YOLO_TRAIN_DIR, YOLO_TEST_DIR], [CLASSIFIER_TRAIN_DIR, CLASSIFIER_TEST_DIR]):
    df = dataframes[phase]

    # We'll store these back into the dataframe to store the integer, not fractional,
    # measurements. All images are probably the same resolution, but do it this
    # way just in case there are differing resolutions
    centre_ys = pd.Series(index=df.index, name="centre_y", dtype=int)
    centre_xs = pd.Series(index=df.index, name="centre_x", dtype=int)
    box_heights = pd.Series(index=df.index, name="box_height", dtype=int)
    box_widths = pd.Series(index=df.index, name="box_width", dtype=int)
    image_heights = pd.Series(index=df.index, name="image_height", dtype=int)
    image_widths = pd.Series(index=df.index, name="image_width", dtype=int)
    
    for index, row in tqdm(df.iterrows(), total=len(df), desc=phase):
        row = row.squeeze()

        input_fp = input_dir / row["image"]
        image = cv2.imread(str(input_fp))
        image_height, image_width, num_channels = image.shape

        # Store conversions from fractional to absolute values for later reuse
        centre_y = int(row["centre_y"] * image_height)
        centre_x = int(row["centre_x"] * image_width)
        box_height = int(row["box_height"] * image_height)
        box_width = int(row["box_width"] * image_width)

        # Crop image and save to output directory
        image_cropped = crop_to_insect(image=image, centre_x=centre_x, centre_y=centre_y)

        input_stem = input_fp.stem
        input_suffix = input_fp.suffix
        target_name = row["target_name"]
        
        output_name = f"{input_stem}_{row['target']}_{centre_x}_{centre_y}{input_suffix}"
        output_fp = output_dir / target_name / output_name
        # print(output_fp)
        
        cv2.imwrite(str(output_fp), image_cropped)

        # Save these as integers, maybe for later
        centre_ys[index] = centre_y
        centre_xs[index] = centre_x
        box_heights[index] = box_height
        box_widths[index] = box_width
        image_heights[index] = image_height
        image_widths[index] = image_width
    
    dataframes[phase] = df.assign(
        centre_y=centre_ys,
        centre_x=centre_xs,
        box_height=box_heights,
        box_width=box_widths,
        image_height=image_heights,
        image_width=image_widths
    )
    dataframes[phase] = dataframes[phase].astype({
        "centre_y": int,
        "centre_x": int,
        "box_height": int,
        "box_width": int,
        "image_height": int,
        "image_width": int
    })
    
    
dataframes[TRAIN].head()

train: 100%|██████████| 16550/16550 [13:11<00:00, 20.91it/s]
test: 100%|██████████| 3246/3246 [02:32<00:00, 21.28it/s]


Unnamed: 0,image,target,centre_x,centre_y,box_width,box_height,target_name,image_height,image_width
0,/home/lex/data/Spatial_Monitoring_and_Insect_B...,0,841,553,31,36,honeybee_vespidae,1080,1920
1,/home/lex/data/Spatial_Monitoring_and_Insect_B...,0,252,445,22,37,honeybee_vespidae,1080,1920
2,/home/lex/data/Spatial_Monitoring_and_Insect_B...,0,758,330,33,28,honeybee_vespidae,1080,1920
3,/home/lex/data/Spatial_Monitoring_and_Insect_B...,1,566,528,57,60,flower,1080,1920
4,/home/lex/data/Spatial_Monitoring_and_Insect_B...,1,986,260,74,71,flower,1080,1920


In [209]:
from torchvision import transforms
from torchvision.datasets import ImageFolder, DatasetFolder
from torchvision.datasets.folder import default_loader
from torch.utils.data import SubsetRandomSampler, Dataset, DataLoader
from torch import Tensor
import matplotlib.pyplot as plt
from IPython import display
%matplotlib inline

import time
from typing import Tuple, List, Dict, Optional, Callable, Any, Union
import os


class InsectDataset(Dataset):
    def __init__(self, directory: Union[Path, str]) -> None:
        super().__init__()

        if not isinstance(directory, Path):
            directory = Path(directory)
        self.directory = directory

        self.class_names = self.find_class_names()
        self.images = self.find_images()

        self.transform = transform = transforms.Compose(
            [
                transforms.CenterCrop(32),
                transforms.ToTensor(),
                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
            ]
        )

    def find_class_names(self):
        class_names = [fp.name for fp in self.directory.iterdir() if fp.is_dir()]
        return class_names

    def find_images(self) -> List[str]:
        images = []
        for class_name in self.class_names:
            class_index = NAME_TO_TARGET[class_name]
            class_dir = self.directory / class_name
            images.extend([(video_file, class_index) for video_file in class_dir.iterdir()])
        
        return images

    def __len__(self) -> int:
        return len(self.images)

    def __getitem__(self, index):
        image_fp, target = self.images[index]
        image = default_loader(image_fp)
        image = self.transform(image)
        # print(f"In dataset, image shape is {image.shape}")

        return image, target

    def __str__(self) -> str:
        return f"InsectDataset (directory={self.directory}, {len(self)} images)"

    def __repr__(self) -> str:
        return str(self)


train_dataset = InsectDataset(CLASSIFIER_TRAIN_DIR)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataset = InsectDataset(CLASSIFIER_TEST_DIR)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [170]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, len(TARGET_TO_NAME))

    def forward(self, x):
        y = self.conv1(x)
        y = F.relu(y)
        y = self.pool(y)

        y = self.conv2(y)
        y = F.relu(y)
        y = self.pool(y)

        y = torch.flatten(y, 1) # flatten all dimensions except batch
        
        y = self.fc1(y)
        y = F.relu(y)

        y = self.fc2(y)
        y = F.relu(y)

        y = self.fc3(y)
        return y


net = Net()

import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

for epoch in range(10):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(train_dataloader):
        # get the inputs; data is a list of [inputs, labels]
        images, labels = data
        # print(f"Shape here is {images.shape}")

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if (i+1) % 100 == 0:
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

print('Finished Training')

[1,   100] loss: 0.036
[1,   200] loss: 0.020
[1,   300] loss: 0.020
[1,   400] loss: 0.021
[1,   500] loss: 0.018
[2,   100] loss: 0.016
[2,   200] loss: 0.016
[2,   300] loss: 0.014
[2,   400] loss: 0.012
[2,   500] loss: 0.013
[3,   100] loss: 0.011
[3,   200] loss: 0.012
[3,   300] loss: 0.009
[3,   400] loss: 0.011
[3,   500] loss: 0.009
[4,   100] loss: 0.009
[4,   200] loss: 0.008
[4,   300] loss: 0.008
[4,   400] loss: 0.009
[4,   500] loss: 0.007
[5,   100] loss: 0.007
[5,   200] loss: 0.008
[5,   300] loss: 0.008
[5,   400] loss: 0.006
[5,   500] loss: 0.007
[6,   100] loss: 0.006
[6,   200] loss: 0.005
[6,   300] loss: 0.007
[6,   400] loss: 0.006
[6,   500] loss: 0.006
[7,   100] loss: 0.006
[7,   200] loss: 0.005
[7,   300] loss: 0.006
[7,   400] loss: 0.005
[7,   500] loss: 0.005
[8,   100] loss: 0.006
[8,   200] loss: 0.005
[8,   300] loss: 0.006
[8,   400] loss: 0.005
[8,   500] loss: 0.005
[9,   100] loss: 0.004
[9,   200] loss: 0.005
[9,   300] loss: 0.006
[9,   400] 

In [210]:
# prepare to count predictions for each class
correct_pred = {classname: 0 for classname in NAME_TO_TARGET}
total_pred = {classname: 0 for classname in NAME_TO_TARGET}
max_vals_list = np.array([])

# again no gradients needed
with torch.no_grad():
    for data in test_dataloader:
        images, labels = data
        outputs = net(images)
        max_vals, predictions = torch.max(outputs, 1)
        max_vals_list = np.concatenate([max_vals_list, max_vals.numpy()])

        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[TARGET_TO_NAME[int(label)]] += 1
            total_pred[TARGET_TO_NAME[int(label)]] += 1

# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')

Accuracy for class: honeybee_vespidae is 98.7 %
Accuracy for class: flower is 98.5 %
Accuracy for class: syrphidae is 0.0 %
Accuracy for class: lepidoptera is 13.3 %


In [180]:
net.eval()
fake_image = torch.rand((1, 3, 32, 32)) * 2 - 1
net(fake_image)

tensor([[ 0.3119,  0.0328, -1.2583,  0.0039]], grad_fn=<AddmmBackward0>)

In [208]:
print(f"Probabilities used in predictions were: {max_vals_list.mean()} +- {max_vals_list.std()} (range: {max_vals_list.min()} to {max_vals_list.max()})")


Probabilities used in predictions were: 6.029759963918122 +- 2.039279442263269 (range: 0.15661728382110596 to 8.838119506835938)


In [211]:
max_vals_list.argmin()

1

In [212]:
max_vals_list

array([0.57413983, 0.15661728, 0.32433099, ..., 2.63999248, 1.05817676,
       1.6402936 ])

In [213]:

test_dataset[1]

(tensor([[[ 0.8353,  0.8039,  0.8980,  ...,  0.7255,  0.7961,  0.8118],
          [ 0.7569,  0.7725,  0.9294,  ...,  0.5922,  0.6392,  0.6941],
          [ 0.7412,  0.7569,  0.9137,  ...,  0.5765,  0.5922,  0.6314],
          ...,
          [ 0.2235,  0.1686,  0.1216,  ...,  0.4588,  0.5059,  0.5451],
          [ 0.2863,  0.1922,  0.1529,  ...,  0.4980,  0.5294,  0.5529],
          [ 0.3804,  0.2549,  0.2078,  ...,  0.5608,  0.5529,  0.5529]],
 
         [[ 0.8510,  0.8196,  0.8510,  ...,  0.3176,  0.3804,  0.3961],
          [ 0.7725,  0.7882,  0.8745,  ...,  0.1765,  0.2235,  0.2784],
          [ 0.7569,  0.7725,  0.8588,  ...,  0.1608,  0.1765,  0.2157],
          ...,
          [-0.3176, -0.3725, -0.4039,  ...,  0.0039,  0.0510,  0.0902],
          [-0.2549, -0.3490, -0.3725,  ...,  0.0431,  0.0745,  0.0980],
          [-0.1686, -0.2941, -0.3098,  ...,  0.0902,  0.0980,  0.0980]],
 
         [[ 0.2941,  0.2627,  0.3333,  ..., -0.2392, -0.1529, -0.1373],
          [ 0.2157,  0.2314,