In [3]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
import matplotlib.pyplot as plt
import numpy as np
import cv2
from PIL import Image
import os

In [5]:
device= torch.device("cpu")
trainimagePath='yolo_model/dataset/images/train'
trainlabelPath='yolo_model/dataset/labels/train/FASTER-RCNN'
valimagePath='yolo_model/dataset/images/val'
vallabelPath='yolo_model/dataset/labels/val/FASTER-RCNN'

label_txt = []
delete = []

print("Train Image:" + str(len(os.listdir(trainimagePath))))
print("Train Label:" +str(len(os.listdir(trainlabelPath))))
for filename in os.listdir(trainlabelPath):
    word = filename[:-4]
    label_txt.append(word)
for file in os.listdir(trainimagePath):
    if file[:-4] not in label_txt:
        delete.append(file)
print(delete)
print(len(delete))

Train Image:171
Train Label:171
[]
0


In [6]:
label_txt = []
delete = []
print("Val Image:" +str(len(os.listdir(valimagePath))))
print("Val Label:" +str(len(os.listdir(vallabelPath))))
for filename in os.listdir(vallabelPath):
    word = filename[:-4]
    label_txt.append(word)
for file in os.listdir(valimagePath):
    if file[:-4] not in label_txt:
        delete.append(file)
print(delete)
print(len(delete))

Val Image:168
Val Label:168
[]
0


In [1]:
import torch
import numpy as np
import math
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import torchvision.transforms as transforms
import torchvision.datasets.voc as VOC
import matplotlib.pyplot as plt
import matplotlib.patches as patches
# xml library for parsing xml files
from xml.etree import ElementTree as et

In [5]:
train_dir = 'yolo_model/dataset/images/train/'
trainlabelPath='yolo_model/dataset/labels/train/FASTER-RCNN'
test_dir = 'yolo_model/dataset/images/val/'

In [3]:
import torch
import random
import cv2
import os

class MaizeDataset(torch.utils.data.Dataset):
    def __init__(self, img_dir,label_dir, width, height, transforms=None):
        self.transforms = transforms
        self.img_dir = img_dir
        self.label_dir = label_dir
        self.height = height
        self.width = width
        
        #sort images for consistency
        self.imgs = [image for image in sorted(os.listdir(train_dir)) if image[-3:] == 'jpg']
        self.classes = ['_', 'Maize']

    def __getitem__(self, index):
        img_name = self.imgs[index]
        image_path = os.path.join(self.img_dir, img_name)
        img = cv2.imread(image_path)
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
        img_res = cv2.resize(img_rgb, (self.width, self.height), cv2.INTER_AREA)
        
        #divide all pixels rgb vals by 255
        img_res /= 255.0

        #annotation file
        annot_filename = img_name[:-4] + '.xml'
        annot_file_path = os.path.join(self.label_dir, annot_filename)

        boxes = []
        labels = []
        tree = et.parse(annot_file_path)

        root = tree.getroot()

        wt = img.shape[1]
        ht = img.shape[0]

        #box coordinates for xml files are extracted
        for member in root.findall('object'):
            labels.append(self.classes.index(member.find('name').text))
            
            #bounding box x coords
            xmin = int(member.find('bndbox').find('xmin').text)
            xmax = int(member.find('bndbox').find('xmax').text)
            
            #bounding box y coords
            ymin = int(member.find('bndbox').find('ymin').text)
            ymax = int(member.find('bndbox').find('ymax').text)

            xmin_corr = (xmin/wt)*self.width
            xmax_corr = (xmax/wt)*self.width
            ymin_corr = (ymin/ht)*self.height
            ymax_corr = (ymax/ht)*self.height

            boxes.append([xmin_corr, ymin_corr, xmax_corr, ymax_corr])

        #convert boxes into tensors
        boxes = torch.as_tensor(boxes, dtype=torch.float32)

        #areas of the boxes
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)
        labels = torch.as_tensor(labels, dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["area"] = area
        target["iscrowd"] = iscrowd

        #image_id
        image_id = torch.tensor([index])
        target["image_id"] = image_id

        if self.transforms:
            sample = self.transforms(image=img_res, bboxes=target['boxes'], labels=labels)
            img_res = sample['image']
            target['boxes'] = torch.Tensor(sample['bboxes'])

        return img_res, target

    def __len__(self):
        return len(self.imgs)

In [6]:
train_data = MaizeDataset(train_dir,trainlabelPath, 224, 224)
print("here's dataset length: ", len(train_data))

here's dataset length:  171


In [7]:
my_img, my_target = train_data[24]
print("shape of the image: ", my_img.shape)
print("target: ", my_target)

shape of the image:  (224, 224, 3)
target:  {'boxes': tensor([[ 32.3596, 132.3860,  38.5000, 139.5088]]), 'labels': tensor([1]), 'area': tensor([43.7365]), 'iscrowd': tensor([0]), 'image_id': tensor([24])}
