In [1]:
# Loading in functions
from non_max_surpression import NonMaxSurpression
from model_dog_detection import DogDetectorModel
from image_to_tensor import to_tensor

# Loading libraries
import torch 
import os
import math
import pandas as pd

## Model Loading

In [2]:
# Initializing the model
model = DogDetectorModel()

In [3]:
# Initializing Non Max Surpression with default parameters
nms = NonMaxSurpression()

In [4]:
# Setting the device
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

CPU_DEVICE = torch.device("cpu")

In [5]:
# Loading the checkpoint
checkpoint = torch.load('dog-detection-model.pt')

# Loading the model weights
model.load_state_dict(checkpoint['model_state_dict'])

# Setting model to evaluation model
model = model.eval()

# Moving to training device
model = model.to(DEVICE)

## Dog Loading

In [6]:
dogInfo = pd.read_csv('attributes.csv', index_col=0)

In [None]:
# dogInfo = dogInfo[dogInfo['path'] == 'dog_20']

## Dog Cleaning

In [7]:
def split(a, n):
    # CITATION: https://stackoverflow.com/questions/2130016/splitting-a-list-into-n-parts-of-approximately-equal-length
    k, m = divmod(len(a), n)
    return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))

In [8]:
numCleanedImagesAll = []
boxes = []

# Iterating through each dog
for index, dog in dogInfo.iterrows():
    if index % 65 == 0:
        print(f"{round((index / 6500) * 100, 3)} %")
        
    # Defining batch size of 2
    numBatches = math.ceil(dog['downloadedImages'] / 2)
    
    batchRanges = list(split(range(dog['downloadedImages']),numBatches))
    
    # Defining counter of the number of cleaned images
    numCleanedImages = 0
    
    for i in range(0,len(batchRanges)):
        batchRange = batchRanges[i]
    
        # Defining list to hold path to i
        imagePaths = []

        # Appending the path to each image
        for k in batchRange:
            imagePaths.append(f"dogs/{dog['path']}/img_{k}.jpg")

        # Loading and transforming images
        images = to_tensor(imagePaths)

        # Moving images to DEVICE
        images = list(image.to(DEVICE) for image in images)

        # Running image through model
        outputs = model(images, '')

        # Moving images to CPU
        images = list(image.to(CPU_DEVICE) for image in images)

        # Moving images back to CPU
        outputs = [{k: v.to(CPU_DEVICE) for k, v in t.items()} for t in outputs]

        # Executing non max surpression on the model output
        outputs = nms(outputs)
        
        # Iterating through the results and removing the bad pictures
        for j in range(0, len(imagePaths)):
            # If the image is not clean (i.e. no dog or more than 1 dog, we delete it)
            if len(outputs[j]['labels']) != 1:
                'bfjhdks'
                os.remove(f"dogs/{dog['path']}/img_{i*2 + j}.jpg")

            # If the image is clean, we just rename it to keep it inline with numCleanedImagesAll
            else:
                # Renaming the file
                os.rename(f"dogs/{dog['path']}/img_{i*2 + j}.jpg", f"dogs/{dog['path']}/img_{numCleanedImages}.jpg")

                # Extracting the bounding box
                box = outputs[j]['boxes'][0]

                # Recording the bounding box
                boxes.append(
                    {
                        'path':dog['path'], 
                        'image':f"img_{numCleanedImages}.jpg",
                        'xmin': box[0],
                        'ymin': box[1],
                        'xmax': box[2],
                        'ymax': box[3]

                    }
                )
                # Incrementing the number of cleaned images
                numCleanedImages += 1
            
            
    # Appending the cleaned image counts to the list
    numCleanedImagesAll.append(numCleanedImages)
    
    
# Appending the cleaned image counts to the df
dogInfo['cleanImages'] = numCleanedImagesAll

0.0 %


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


1.0 %
2.0 %
3.0 %
4.0 %
5.0 %
6.0 %
7.0 %
8.0 %
9.0 %
10.0 %
11.0 %
12.0 %
13.0 %
14.0 %
15.0 %
16.0 %
17.0 %
18.0 %
19.0 %
20.0 %
21.0 %
22.0 %
23.0 %
24.0 %
25.0 %
26.0 %
27.0 %
28.0 %
29.0 %
30.0 %
31.0 %
32.0 %
33.0 %
34.0 %
35.0 %
36.0 %
37.0 %
38.0 %
39.0 %
40.0 %
41.0 %
42.0 %
43.0 %
44.0 %
45.0 %
46.0 %
47.0 %
48.0 %
49.0 %
50.0 %
51.0 %
52.0 %
53.0 %
54.0 %
55.0 %
56.0 %
57.0 %
58.0 %
59.0 %
60.0 %
61.0 %
62.0 %
63.0 %
64.0 %
65.0 %
66.0 %
67.0 %
68.0 %
69.0 %
70.0 %
71.0 %
72.0 %
73.0 %
74.0 %
75.0 %
76.0 %
77.0 %
78.0 %
79.0 %
80.0 %
81.0 %
82.0 %
83.0 %
84.0 %
85.0 %
86.0 %
87.0 %
88.0 %
89.0 %
90.0 %
91.0 %
92.0 %
93.0 %
94.0 %
95.0 %
96.0 %
97.0 %
98.0 %
99.0 %


In [9]:
boxesDF = pd.DataFrame(boxes)
boxesDF.to_csv('boxesDF.csv')

In [15]:
dogInfo[dogInfo['cleanImages'] >= 2].reset_index(drop=True).to_csv('attributes.csv')

In [None]:
# YOU DON"T NEED THIS PART ITS JUST FOR FUN

from plot_tensor import plot_tensor
import numpy as np

# Plotting image just for fun (you don't need this)
for i in range(0, len(images)):
    plot_tensor(images[i],np.array(outputs[i]['boxes']))