# Generate YOLO labels
<sub>Author: [faridjn</sub>](https://github.com/faridjn)

## Import necessary libraries

In [15]:
#fundamental
import pandas as pd
import numpy as np
import json

#OS and sys
import os
import sys

#labels geometry
from shapely.geometry import box
from PIL import Image
from shapely.geometry import Polygon

## Define label dictionary

In [16]:
CLASS_DICT = {'crossarm':0,
              'cutouts':1,
              'insulator':2,
              'pole':3,
              'transformers':4,
              'background_structure':5}

## Read annotations

In [17]:
# Define root directory
root_path = os.path.normpath(os.getcwd() + os.sep + os.pardir)
print(root_path)

c:\Farid\gitProjects\power_line_inspection_yolov8


In [18]:
#feature_path = root_path + '\\src\\features\\'
#sys.path.append(feature_path)

In [19]:
#Read annotations
annotation_csv = root_path + '\\data\\raw\\annotation\\' + 'Overhead-Distribution-Labels.csv'
annotations = pd.read_csv(annotation_csv)

In [21]:
#prepare annotation dataframe
annotations.columns = ['label', 'image']
annotations['image'] = annotations['image'].str.lower()
annotations.set_index(['image'], inplace=True)
annotations.head()

Unnamed: 0_level_0,label
image,Unnamed: 1_level_1
1 (1).jpg,"{'objects': [{'value': 'insulator', 'polygon':..."
1 (10).jpg,"{'objects': [{'value': 'other_wire', 'line': [..."
1 (100).jpg,"{'objects': [{'value': 'insulator', 'polygon':..."
1 (1000).jpg,"{'objects': [{'value': 'conductor', 'line': [{..."
1 (1001).jpg,"{'objects': [{'value': 'insulator', 'polygon':..."


## Read Images

In [28]:
data_path = root_path + '\\data\\raw\\train\\'
image_path= data_path + '\\images\\'
label_path = data_path + '\\labels\\'

In [29]:
if not os.path.exists(label_path):
    os.mkdir(label_path)

In [30]:
def get_files_with_extension(folder, extension):
    
    '''a function that takes a folder path and a file extension as inputs, 
    and returns a list of all files in the folder with the specified extension,
    case-insensitive. It can be used for quickly obtaining a list of files
    with a specific file type from a directory'''
    
    # specify the desired file extension
    extension = extension.lower()

    # get a list of all files in the folder
    files = os.listdir(folder)
    
    # filter out only the files with the desired extension
    files_with_extention = [f for f in files if f.lower().endswith(extension)]

    # print the list of desired files
    return files_with_extention

In [31]:
all_images = get_files_with_extension(image_path, 'jpg')
all_images

['1 (1).JPG',
 '1 (10).JPG',
 '1 (100).JPG',
 '1 (1000).JPG',
 '1 (1001).JPG',
 '1 (1002).JPG',
 '1 (1003).JPG',
 '1 (1004).JPG',
 '1 (1005).JPG',
 '1 (1006).JPG',
 '1 (1007).JPG',
 '1 (1008).JPG',
 '1 (1009).JPG']

## Preprocess labels

In [32]:
# polygon = Polygon([(0, 0), (1, 1), (1, 0)])
def polygon_to_bbox(polygon_cords):
    '''
    Args: List of Polygon_cords
    '''
    polygon = Polygon(polygon_cords)
    return polygon.bounds

In [33]:
display(annotation.index)

Index(['1 (1).jpg', '1 (10).jpg', '1 (100).jpg', '1 (1000).jpg',
       '1 (1001).jpg', '1 (1002).jpg', '1 (1003).jpg', '1 (1004).jpg',
       '1 (1005).jpg', '1 (1006).jpg',
       ...
       '9 (990).jpg', '9 (991).jpg', '9 (992).jpg', '9 (993).jpg',
       '9 (994).jpg', '9 (995).jpg', '9 (996).jpg', '9 (997).jpg',
       '9 (998).jpg', '9 (999).jpg'],
      dtype='object', name='image', length=29620)

In [34]:
print('Processing ...')

for img in all_images:
    
    ######################## IMAGE ########################
    
    print(img + ' ...')
    
    #Get the shape of the image for further calculations
    imgpil = Image.open(image_path + '\\' + img)
    wd,ht = imgpil.size

    # get the row with index (case-insensitive)
    filt = annotation.index.str.lower() == img.lower()
    img_obj = annotation.loc[filt]

    
    ######################## LABELS ########################
                
    #Extract the label data
    lbl = img_obj['label'].iloc[0]
    
    #Replace single quotation marks to double quotation marks
    lbl = lbl.replace("\'", "\"")
   
    #Load into a dictionary using json
    lbl = json.loads(lbl)
    
    ######################## LABEL FILE ########################
    
    #Create a txt annotation file name
    label_file_name = img.split('.')[0] + '.txt'
    
    # Create a new txt file and put annotations into it (overwrite)
    f1 = open(label_path + label_file_name,'w')
    
    ######################## LABEL GEOMETRY ########################
    
    #Iterate over each object/asset in the image and convert polygon into bounding boxes
    for obj in lbl['objects']:
    
        polygon_coords_list=[]

        #This ensure you only pick polygon annotations and automatically ignore polyline annotations
        polygon_coords = obj.get('polygon')
        
        if polygon_coords is not None:
            for pnt in polygon_coords:
                polygon_coords_list.append(tuple(pnt.values()))
            
            #Typecast to int 
            polygon_coords_list = [(int(x), int(y)) for x,y in polygon_coords_list]
            
            #bounding box
            bbox = polygon_to_bbox(polygon_coords_list)
            minx, miny, maxx, maxy = bbox
            
            bbox_s = box(minx, miny, maxx, maxy)

            object_annos_line = str(CLASS_DICT[obj['value']]) + ' ' \
                                    + str(bbox_s.centroid.x/wd) + ' ' \
                                    + str(bbox_s.centroid.y/ht) + ' ' \
                                    + str((maxx-minx)/wd) + ' ' \
                                    + str((maxy-miny)/ht) + ""'\n'
            
            #write object annotation into file    
            f1.write(object_annos_line)
    
    #Close the file
    f1.close()

Processing ...
1 (1).JPG ...
1 (10).JPG ...
1 (100).JPG ...
1 (1000).JPG ...
1 (1001).JPG ...
1 (1002).JPG ...
1 (1003).JPG ...
1 (1004).JPG ...
1 (1005).JPG ...
1 (1006).JPG ...
1 (1007).JPG ...
1 (1008).JPG ...
1 (1009).JPG ...
