# Generate YOLO labels
<sub>Author: [faridjn</sub>](https://github.com/faridjn)

## Import necessary libraries

In [1]:
#fundamental
import pandas as pd
import numpy as np
import json

#OS and sys
import os
import sys

#labels geometry
from shapely.geometry import box
from PIL import Image
from shapely.geometry import Polygon



import os
import random
import shutil


## Define label dictionary

In [2]:
CLASS_DICT = {'crossarm':0,
              'cutouts':1,
              'insulator':2,
              'pole':3,
              'transformers':4,
              'background_structure':5}

## Read annotations

In [3]:
# Define root directory
root_path = os.path.normpath(os.getcwd() + os.sep + os.pardir)
print(root_path)

c:\Farid\gitProjects\power_line_inspection_yolov8


In [4]:
#Read annotations
annotation_csv = root_path + '\\data\\annotation\\' + 'Overhead-Distribution-Labels.csv'
annotations = pd.read_csv(annotation_csv)

In [5]:
#prepare annotation dataframe
annotations.columns = ['label', 'image']
annotations['image'] = annotations['image'].str.lower()
annotations.set_index(['image'], inplace=True)
annotations.head()

Unnamed: 0_level_0,label
image,Unnamed: 1_level_1
1 (1).jpg,"{'objects': [{'value': 'insulator', 'polygon':..."
1 (10).jpg,"{'objects': [{'value': 'other_wire', 'line': [..."
1 (100).jpg,"{'objects': [{'value': 'insulator', 'polygon':..."
1 (1000).jpg,"{'objects': [{'value': 'conductor', 'line': [{..."
1 (1001).jpg,"{'objects': [{'value': 'insulator', 'polygon':..."


## Read Images

In [6]:
DATASETS = ['test', 'train', 'val']
DATASET = DATASETS[2]

In [7]:
data_path = root_path + '\\data\\' + DATASET
image_path= data_path + '\\images\\'
label_path = data_path + '\\labels\\'

In [8]:
if not os.path.exists(label_path):
    os.mkdir(label_path)

In [9]:
def get_files_with_extension(folder, extension):
    
    '''a function that takes a folder path and a file extension as inputs, 
    and returns a list of all files in the folder with the specified extension,
    case-insensitive. It can be used for quickly obtaining a list of files
    with a specific file type from a directory'''
    
    # specify the desired file extension
    extension = extension.lower()

    # get a list of all files in the folder
    files = os.listdir(folder)
    
    # filter out only the files with the desired extension
    files_with_extention = [f for f in files if f.lower().endswith(extension)]

    # print the list of desired files
    return files_with_extention

In [10]:
all_images = get_files_with_extension(image_path, 'jpg')
all_images

['1 (1082).JPG',
 '1 (1126).JPG',
 '1 (1145).JPG',
 '1 (1352).JPG',
 '1 (1418).JPG',
 '1 (1531).JPG',
 '1 (1665).JPG',
 '1 (344).JPG',
 '1 (5).JPG',
 '1 (579).JPG',
 '1 (705).JPG',
 '1 (857).JPG',
 '1 (869).JPG',
 '1 (878).JPG',
 '1 (912).JPG']

## Preprocess labels

In [11]:
# polygon = Polygon([(0, 0), (1, 1), (1, 0)])
def polygon_to_bbox(polygon_cords):
    '''
    Args: List of Polygon_cords
    '''
    polygon = Polygon(polygon_cords)
    return polygon.bounds

In [12]:
display(annotations.index)

Index(['1 (1).jpg', '1 (10).jpg', '1 (100).jpg', '1 (1000).jpg',
       '1 (1001).jpg', '1 (1002).jpg', '1 (1003).jpg', '1 (1004).jpg',
       '1 (1005).jpg', '1 (1006).jpg',
       ...
       '9 (990).jpg', '9 (991).jpg', '9 (992).jpg', '9 (993).jpg',
       '9 (994).jpg', '9 (995).jpg', '9 (996).jpg', '9 (997).jpg',
       '9 (998).jpg', '9 (999).jpg'],
      dtype='object', name='image', length=29620)

In [13]:
out_imgsz = 640
output_path = image_path

def get_image_path_and_size(image_file):
    imgpil = Image.open(image_file)
    wd, ht = imgpil.size
    return wd, ht

def resize_image(image_file, out_imgsz):
    imgpil = Image.open(image_file)
    wd, ht = imgpil.size
    scale_factor = out_imgsz / max(wd, ht)
    img_resized = imgpil.resize((int(wd * scale_factor), int(ht * scale_factor)), Image.LANCZOS)
    return img_resized, scale_factor

def save_resized_image(img_resized, output_path, img):
    img_resized.save(output_path + '\\' + img)

def get_image_object(annotations, img):
    filt = annotations.index.str.lower() == img.lower()
    if filt.sum() == 1:
        return annotations.loc[filt]
    return None

def process_label_data(img_obj):
    lbl = img_obj['label'].iloc[0]
    lbl = lbl.replace("\'", "\"")
    return json.loads(lbl)

def create_label_file(label_path, label_file_name):
    return open(label_path + label_file_name, 'w')

def process_objects(lbl, wd, ht, f1, scale_factor):
    for obj in lbl['objects']:
        polygon_coords_list = []

        polygon_coords = obj.get('polygon')
        if polygon_coords is not None:
            for pnt in polygon_coords:
                polygon_coords_list.append(tuple(pnt.values()))

            polygon_coords_list = [(int(x * scale_factor), int(y * scale_factor)) for x, y in polygon_coords_list]

            bbox = polygon_to_bbox(polygon_coords_list)
            minx, miny, maxx, maxy = bbox

            bbox_s = box(minx, miny, maxx, maxy)

            object_annos_line = str(CLASS_DICT[obj['value']]) + ' ' \
                                + str(bbox_s.centroid.x / (wd * scale_factor)) + ' ' \
                                + str(bbox_s.centroid.y / (ht * scale_factor)) + ' ' \
                                + str((maxx - minx) / (wd * scale_factor)) + ' ' \
                                + str((maxy - miny) / (ht * scale_factor)) + '\n'

            f1.write(object_annos_line)

def process_images(all_images, annotations, image_path, label_path, output_path, out_imgsz):
    print('Processing labels...')

    for img in all_images:

        print(img)
        image_file = image_path + '\\' + img
        wd, ht = get_image_path_and_size(image_file)

        img_resized, scale_factor = resize_image(image_file, out_imgsz)
        save_resized_image(img_resized, output_path, img)

        img_obj = get_image_object(annotations, img)
        if img_obj is not None:
            lbl = process_label_data(img_obj)

            label_file_name = img.split('.')[0] + '.txt'
            with create_label_file(label_path, label_file_name) as f1:
                process_objects(lbl, wd, ht, f1, scale_factor)

process_images(all_images, annotations, image_path, label_path, output_path, out_imgsz)


Processing labels...
1 (1082).JPG
1 (1126).JPG
1 (1145).JPG
1 (1352).JPG
1 (1418).JPG
1 (1531).JPG
1 (1665).JPG
1 (344).JPG
1 (5).JPG
1 (579).JPG
1 (705).JPG
1 (857).JPG
1 (869).JPG
1 (878).JPG
1 (912).JPG
