-
Notifications
You must be signed in to change notification settings - Fork 4
/
windows_utils.py
executable file
·124 lines (96 loc) · 4.98 KB
/
windows_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
from torch.utils.data import Dataset
import torch
import torchvision
torchvision.disable_beta_transforms_warning()
from torchvision.tv_tensors import BoundingBoxes, Mask
import torchvision.transforms.v2 as transforms
from PIL import Image, ImageDraw
import numpy as np
class StudentIDDataset(Dataset):
"""
This class represents a PyTorch Dataset for a collection of images and their annotations.
The class is designed to load images along with their corresponding segmentation masks, bounding box annotations, and labels.
"""
def __init__(self, img_keys, annotation_df, img_dict, class_to_idx, transforms=None):
"""
Constructor for the HagridDataset class.
Parameters:
img_keys (list): List of unique identifiers for images.
annotation_df (DataFrame): DataFrame containing the image annotations.
img_dict (dict): Dictionary mapping image identifiers to image file paths.
class_to_idx (dict): Dictionary mapping class labels to indices.
transforms (callable, optional): Optional transform to be applied on a sample.
"""
super(Dataset, self).__init__()
self._img_keys = img_keys # List of image keys
self._annotation_df = annotation_df # DataFrame containing annotations
self._img_dict = img_dict # Dictionary mapping image keys to image paths
self._class_to_idx = class_to_idx # Dictionary mapping class names to class indices
self._transforms = transforms # Image transforms to be applied
def __len__(self):
"""
Returns the length of the dataset.
Returns:
int: The number of items in the dataset.
"""
return len(self._img_keys)
def __getitem__(self, index):
"""
Fetch an item from the dataset at the specified index.
Parameters:
index (int): Index of the item to fetch from the dataset.
Returns:
tuple: A tuple containing the image and its associated target (annotations).
"""
# Retrieve the key for the image at the specified index
img_key = self._img_keys[index]
# Get the annotations for this image
annotation = self._annotation_df.loc[img_key]
# Load the image and its target (segmentation masks, bounding boxes and labels)
image, target = self._load_image_and_target(annotation)
# Apply the transformations, if any
if self._transforms:
image, target = self._transforms(image, target)
return image, target
def _load_image_and_target(self, annotation):
"""
Load an image and its target (bounding boxes and labels).
Parameters:
annotation (pandas.Series): The annotations for an image.
Returns:
tuple: A tuple containing the image and a dictionary with 'boxes' and 'labels' keys.
"""
# Retrieve the file path of the image
filepath = self._img_dict[annotation.name]
# Open the image file and convert it to RGB
image = Image.open(filepath).convert('RGB')
# Convert the class labels to indices
labels = [shape['label'] for shape in annotation['shapes']]
labels = torch.Tensor([self._class_to_idx[label] for label in labels])
labels = labels.to(dtype=torch.int64)
# Convert polygons to mask images
shape_points = [shape['points'] for shape in annotation['shapes']]
xy_coords = [[tuple(p) for p in points] for points in shape_points]
mask_imgs = [create_polygon_mask(image.size, xy) for xy in xy_coords]
masks = Mask(torch.concat([Mask(transforms.PILToTensor()(mask_img), dtype=torch.bool) for mask_img in mask_imgs]))
# Generate bounding box annotations from segmentation masks
bboxes = BoundingBoxes(data=torchvision.ops.masks_to_boxes(masks), format='xyxy', canvas_size=image.size[::-1])
return image, {'masks': masks,'boxes': bboxes, 'labels': labels}
def tuple_batch(batch):
return tuple(zip(*batch))
def create_polygon_mask(image_size, vertices):
"""
Create a grayscale image with a white polygonal area on a black background.
Parameters:
- image_size (tuple): A tuple representing the dimensions (width, height) of the image.
- vertices (list): A list of tuples, each containing the x, y coordinates of a vertex
of the polygon. Vertices should be in clockwise or counter-clockwise order.
Returns:
- PIL.Image.Image: A PIL Image object containing the polygonal mask.
"""
# Create a new black image with the given dimensions
mask_img = Image.new('L', image_size, 0)
# Draw the polygon on the image. The area inside the polygon will be white (255).
ImageDraw.Draw(mask_img, 'L').polygon(vertices, fill=(255))
# Return the image with the drawn polygon
return mask_img