# Pre-processing YOLO labels
<sub>Author: [faridjn</sub>](https://github.com/faridjn)

## Libraries

In [1]:
#!pip install shapely

### Import necessary libraries

In [2]:
#fundamental
import pandas as pd
import json

#OS and sys
import os

#labels geometry
from shapely.geometry import box
from PIL import Image
from shapely.geometry import Polygon

## Utility Functions

In [3]:
def get_files_with_extension(folder, extension):    
    '''a function that takes a folder path and a file extension as inputs, 
    and returns a list of all files in the folder with the specified extension,
    case-insensitive. It can be used for quickly obtaining a list of files
    with a specific file type from a directory'''
    # specify the desired file extension
    extension = extension.lower()
    # get a list of all files in the folder
    files = os.listdir(folder)
    # filter out only the files with the desired extension
    files_with_extention = [f for f in files if f.lower().endswith(extension)]
    # print the list of desired files
    return files_with_extention

def get_image_read_path_and_size(image_file):
    imgpil = Image.open(image_file)
    wd, ht = imgpil.size
    return wd, ht


def resize_image(image_file, OUT_IMGSZ):
    imgpil = Image.open(image_file)
    wd, ht = imgpil.size
    scale_factor = OUT_IMGSZ / max(wd, ht)
    img_resized = imgpil.resize((int(wd * scale_factor), int(ht * scale_factor)), Image.LANCZOS)
    return img_resized, scale_factor


def save_resized_image(img_resized, image_write_path, img):
    img_resized.save(image_write_path + '/' + img)


def get_image_object(annotations, img):
    filt = annotations.index.str.lower() == img.lower()
    if filt.sum() == 1:
        return annotations.loc[filt]
    return None


def process_label_data(img_obj):
    lbl = img_obj['label'].iloc[0]
    lbl = lbl.replace("\'", "\"")
    return json.loads(lbl)


def create_label_file(label_write_path, label_file_name):
    return open(label_write_path + label_file_name, 'w')


# polygon = Polygon([(0, 0), (1, 1), (1, 0)])
def polygon_to_bbox(polygon_cords):
    '''Args: List of Polygon_cords '''
    polygon = Polygon(polygon_cords)
    return polygon.bounds


def process_objects(lbl, wd, ht, f1, scale_factor):
    for obj in lbl['objects']:
        polygon_coords_list = []

        polygon_coords = obj.get('polygon')
        if polygon_coords is not None:
            for pnt in polygon_coords:
                polygon_coords_list.append(tuple(pnt.values()))

            polygon_coords_list = [(int(x * scale_factor), int(y * scale_factor)) for x, y in polygon_coords_list]

            bbox = polygon_to_bbox(polygon_coords_list)
            minx, miny, maxx, maxy = bbox

            bbox_s = box(minx, miny, maxx, maxy)

            object_annos_line = str(CLASS_DICT[obj['value']]) + ' ' \
                                + str(bbox_s.centroid.x / (wd * scale_factor)) + ' ' \
                                + str(bbox_s.centroid.y / (ht * scale_factor)) + ' ' \
                                + str((maxx - minx) / (wd * scale_factor)) + ' ' \
                                + str((maxy - miny) / (ht * scale_factor)) + '\n'

            f1.write(object_annos_line)


def process_images(all_images, annotations, image_read_path, label_write_path, image_write_path, OUT_IMGSZ):

    for img in all_images:

        print(img)
        image_file = image_read_path + '/' + img
        wd, ht = get_image_read_path_and_size(image_file)

        img_resized, scale_factor = resize_image(image_file, OUT_IMGSZ)
        save_resized_image(img_resized, image_write_path, img)

        img_obj = get_image_object(annotations, img)
        if img_obj is not None:
            lbl = process_label_data(img_obj)

            label_file_name = img.split('.')[0] + '.txt'
            with create_label_file(label_write_path, label_file_name) as f1:
                process_objects(lbl, wd, ht, f1, scale_factor)

## Settings

In [4]:
# Label dictionary
CLASS_DICT = {'crossarm':0,
              'cutouts':1,
              'insulator':2,
              'pole':3,
              'transformers':4,
              'background_structure':5}

In [5]:
#image size
OUT_IMGSZ = 768

In [6]:
# Define root directory
ROOT_PATH = os.path.normpath(os.getcwd() + os.sep + os.pardir)
print(ROOT_PATH)

/home/farid_javadnejad/power_line_inspection_yolov8


## Read Annotation

In [7]:
# Annotation file
annotation_csv = ROOT_PATH + '/data/raw/annotation/' + 'Overhead-Distribution-Labels.csv'

In [8]:
#Read annotations
annotations = pd.read_csv(annotation_csv)

In [9]:
#prepare annotation dataframe
annotations.columns = ['label', 'image']
annotations['image'] = annotations['image'].str.lower()
annotations.set_index(['image'], inplace=True)
annotations.head()

Unnamed: 0_level_0,label
image,Unnamed: 1_level_1
1 (1).jpg,"{'objects': [{'value': 'insulator', 'polygon':..."
1 (10).jpg,"{'objects': [{'value': 'other_wire', 'line': [..."
1 (100).jpg,"{'objects': [{'value': 'insulator', 'polygon':..."
1 (1000).jpg,"{'objects': [{'value': 'conductor', 'line': [{..."
1 (1001).jpg,"{'objects': [{'value': 'insulator', 'polygon':..."


## Process labels

In [10]:
# Choose dataset "test|train|val"
DATASETS = ['test', 'train', 'val']

print('Processing labels...')

for DATASET in DATASETS:
    
    print('###################### ' + DATASET + ' #############################')
        
    ############## READ IMAGES #############
    # Define image and label read folders
    image_read_path= ROOT_PATH + '/data/intermdiate/' + DATASET + '/images/'
    # Display reading images
    all_images = get_files_with_extension(image_read_path, 'jpg')
    
    ############## WRITE FODLER ##############
    # Define image and label write folders
    image_write_path = ROOT_PATH + '/data/processed/' + DATASET + '/images/'
    label_write_path = ROOT_PATH + '/data/processed/' + DATASET + '/labels/'
    
    
    ############## CREATE FODLERS ##############
    #Create the folder if it doesn't exist
    if not os.path.exists(image_write_path):
        os.makedirs(image_write_path, exist_ok=True)        
    if not os.path.exists(label_write_path):
        os.makedirs(label_write_path, exist_ok=True)

    ############## PROCESS LABELS ##############
    process_images(all_images, annotations, image_read_path, label_write_path, image_write_path, OUT_IMGSZ)

Processing labels...
###################### test #############################
1 (413).JPG
1 (1874).JPG
1 (570).JPG
1 (397).JPG
1 (128).JPG
1 (169).JPG
1 (937).JPG
1 (1571).JPG
1 (703).JPG
1 (696).JPG
1 (336).JPG
1 (7).JPG
1 (1530).JPG
1 (436).JPG
1 (999).JPG
###################### train #############################
1 (1280).JPG
1 (668).JPG
1 (709).JPG
1 (477).JPG
1 (807).JPG
1 (542).JPG
1 (985).JPG
1 (1387).JPG
1 (186).JPG
1 (458).JPG
1 (746).JPG
1 (981).JPG
1 (1678).JPG
1 (816).JPG
1 (884).JPG
1 (1096).JPG
1 (175).JPG
1 (266).JPG
1 (125).JPG
1 (1538).JPG
1 (313).JPG
1 (1322).JPG
1 (1364).JPG
1 (1188).JPG
1 (993).JPG
1 (612).JPG
1 (1314).JPG
1 (479).JPG
1 (188).JPG
1 (123).JPG
1 (1406).JPG
1 (559).JPG
1 (727).JPG
1 (1893).JPG
1 (1633).JPG
1 (45).JPG
1 (750).JPG
1 (1193).JPG
1 (1674).JPG
1 (1494).JPG
1 (30).JPG
1 (1130).JPG
1 (718).JPG
1 (1204).JPG
1 (629).JPG
1 (323).JPG
1 (1296).JPG
1 (80).JPG
1 (1407).JPG
1 (1262).JPG
1 (541).JPG
1 (16).JPG
1 (1577).JPG
1 (1331).JPG
1 (109).JPG
1 (

In [11]:
print('Done')

Done
