In [None]:
!pip install opencv-python
!pip install labelImg

import os
import cv2
import math
import numpy as np
import pandas as pd
from PIL import Image
from collections import Counter
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET
from PIL import Image, ImageDraw, ImageFont

# Define directory paths
image_directory = 'New_billets/New_billets' # Path to images
annotations_file = 'annotations.csv' # CSV containing 'Image_Name' and 'Label'
annotation_directory = 'New_billets/New_Billets_coords' # Path to XML files
#output_directory = 'New_billets/New_billets_marked' # Path to save images with boxes
output_directory = 'New_billets/New_billets_marked_new' # Path to save images with boxes



# Fill annotations file

In [89]:
# create DataFrame to store image names and labels if it doesn't exist
if os.path.exists(annotations_file):
    data = pd.read_csv(annotations_file)
else:
    data = pd.DataFrame(columns=['Image_Name', 'Label'])

In [None]:
# Iterate over images
for image_filename in os.listdir(image_directory):
    if image_filename.endswith('.png'):
        # skip if image already has a label
        if not data[data['Image_Name'] == image_filename].empty:
            print(f"Skipping {image_filename}, already labeled.")
            continue

        # Display the image
        img = cv2.imread(os.path.join(image_directory, image_filename))
        plt.imshow(img)
        plt.axis('off')
        plt.show()
        
        # Ask for label
        label = input(f"Enter the label for {image_filename}: ")
        
        # Create new row of the DataFrame
        new_row = pd.DataFrame({'Image_Name': [image_filename], 'Label': [label]})
        data = pd.concat([data, new_row], ignore_index=True)

        # Save the updated DataFrame after each input
        data.to_csv(annotations_file, index=False)
        print(f"Annotation for {image_filename} saved.")

print(f"All annotations are saved to {annotations_file}.")

# Find coordinates of images

In this task, we use the labelimg package. When launched from the terminal, it opens an interface where we can upload a directory of images. For each image, we use the “Add RectBox” feature to create bounding boxes around each character, ensuring they are saved in the correct sequence. Once completed, an XML file is generated for each image, containing the coordinates of each character’s bounding box.

# Creating the final images displaying their labels and the coordinates of each of their characters

First method (not used):

In [33]:
# Function to draw boxes and add character labels on the image
def draw_boxes_with_labels(image_path, annotation_path, label, output_path):
    # Load image
    image = cv2.imread(image_path)
    
    # Parse the XML file to get bounding box coordinates
    tree = ET.parse(annotation_path)
    root = tree.getroot()
    
    # Track index of characters from label
    char_index = 0
    y = 0
    
    # Iterate over each object and draw the bounding boxes
    for obj in root.findall('object'):
        # Stop if there are more boxes than characters
        if char_index >= len(label):
            break 
        
        bbox = obj.find('bndbox')
        x_min = int(bbox.find('xmin').text)
        y_min = int(bbox.find('ymin').text)
        x_max = int(bbox.find('xmax').text)
        y_max = int(bbox.find('ymax').text)
        
        # Draw rectangle on the image
        cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
        
        # Get current character from the label
        char_label = label[char_index]
        
        # Put character label on the bounding box
        label_coord_text = f"({char_label} , {x_min},{y_min}), ({x_max},{y_max})"
        cv2.putText(image, label_coord_text, (100, 100+y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        
        # Move to next character
        char_index += 1
        y+= 30
    
    # Save the image with bounding boxes
    cv2.imwrite(output_path, image)
    print(f"Saved annotated image: {output_path}")

Second method (used):

In [None]:
def draw_points_with_labels(image_path, annotation_path, label, output_path):
    # Load image
    image = cv2.imread(image_path)
    
    # Parse the XML file to get bounding box coordinates
    tree = ET.parse(annotation_path)
    root = tree.getroot()
    
    # Track index of characters from label
    char_index = 0
    centers = []
    y = 0
    
    # Iterate over each object and calculate the center points
    for obj in root.findall('object'):
        # Stop if there are more boxes than characters
        if char_index >= len(label):
            break 
        
        bbox = obj.find('bndbox')
        x_min = int(bbox.find('xmin').text)
        y_min = int(bbox.find('ymin').text)
        x_max = int(bbox.find('xmax').text)
        y_max = int(bbox.find('ymax').text)
        
        # Calculate center point of the bounding box
        center_x = (x_min + x_max) // 2
        center_y = (y_min + y_max) // 2
        centers.append((center_x, center_y))
        
        # Draw center point on the image
        cv2.circle(image, (center_x, center_y), 5, (0, 255, 0), -1)

        # Get current character from the label
        char_label = label[char_index]
        # Put character label on the bounding box
        label_coord_text = f"({char_label} , {x_min},{y_min}), ({x_max},{y_max})"
        cv2.putText(image, label_coord_text, (100, 100+y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        
        # Move to next character
        char_index += 1
        y+= 30
    
    # Detect the rotation of the image using the line between xmin of char 1 and xmin of char 11
    (x1, y1) = centers[0]
    (x2, y2) = centers[-1]
    # Calculate the angle of rotation
    angle = math.atan2((y2 - y1), (x2 - x1)) * 180.0 / math.pi
    
    # Write labels on each character following the detected orientation
    for i, (center_x, center_y) in enumerate(centers):
        char_label = label[i]
        
        # Calculate the offset for the text based on the detected angle
        offset_distance = 15
        offset_x = int(offset_distance * math.cos(math.radians(angle)))
        offset_y = int(offset_distance * math.sin(math.radians(angle)))
        
        # Position for the label text
        text_x = center_x + offset_x + 60
        text_y = center_y + offset_y + 10
        
        # Create rotation matrix to rotate the text
        M = cv2.getRotationMatrix2D((text_x, text_y), -angle, 1.0)
        
        # Create a transparent overlay for the text and put the text on the overlay
        font_scale = 1
        thickness = 2
        overlay = np.zeros_like(image, dtype=np.uint8)
        cv2.putText(overlay, char_label, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 255, 0), thickness, lineType=cv2.LINE_AA)
        
        # Rotate the overlay and add it to the original image
        overlay_rotated = cv2.warpAffine(overlay, M, (image.shape[1], image.shape[0]))
        mask = overlay_rotated > 0
        image[mask] = overlay_rotated[mask]
    
    # Save image with annotated points and labels
    cv2.imwrite(output_path, image)
    print(f"Saved annotated image: {output_path}")

In [None]:
# Create output directory if it does not exist
os.makedirs(output_directory, exist_ok=True)

# Loop through images and XML files
for xml_filename in os.listdir(annotation_directory):
    if xml_filename.endswith('.xml'):
        # Image has same name as XML
        image_name = xml_filename.replace('.xml', '.png')  
        image_path = os.path.join(image_directory, image_name)
        annotation_path = os.path.join(annotation_directory, xml_filename)
        output_path = os.path.join(output_directory, image_name)
        
        if os.path.exists(image_path):
            # Find corresponding label from the annotations CSV
            row = data[data['Image_Name'] == image_name]
            if not row.empty:
                # Access first row of the filtered DataFrame because it will contain only one row
                label = row.iloc[0]['Label']
                
                # Ensure label has exactly 11 characters
                if len(label) == 11:
                    draw_points_with_labels(image_path, annotation_path, label, output_path)
                else:
                    print(len(label))
                    print(label)
                    print(f"Skipping {image_name}: label does not have 11 characters.")
            else:
                print(f"No label found in CSV for {image_name}.")

Saved annotated image: New_billets/New_billets_marked_new/img_0000.png
Saved annotated image: New_billets/New_billets_marked_new/img_2398.png
Saved annotated image: New_billets/New_billets_marked_new/img_1518.png
Saved annotated image: New_billets/New_billets_marked_new/img_1243.png
Saved annotated image: New_billets/New_billets_marked_new/img_1914.png
Saved annotated image: New_billets/New_billets_marked_new/img_0363.png
Saved annotated image: New_billets/New_billets_marked_new/img_2574.png
Saved annotated image: New_billets/New_billets_marked_new/img_1727.png
Saved annotated image: New_billets/New_billets_marked_new/img_1848.png
Saved annotated image: New_billets/New_billets_marked_new/img_0968.png
Saved annotated image: New_billets/New_billets_marked_new/img_0759.png
Saved annotated image: New_billets/New_billets_marked_new/img_1309.png
Saved annotated image: New_billets/New_billets_marked_new/img_0407.png
Saved annotated image: New_billets/New_billets_marked_new/img_0605.png
Saved 

# Display overall character frequency and frequency of characters by position

In [27]:
# Extract all labels from the annotations file
labels = data['Label']

# Analyze character frequency
char_counter = Counter()
# Each label has 11 characters
position_counter = [Counter() for _ in range(11)]  

# Iterate over labels to collect frequency information
for label in labels:
    for i, char in enumerate(label):
        char_counter[char] += 1
        if i < len(position_counter):
            position_counter[i][char] += 1

# Display overall character frequency
print("Overall Character Frequency:")
for char, count in char_counter.items():
    print(f"'{char}': {count}")

# Display frequency of characters by position
print("\nCharacter Frequency by Position:")
for i, counter in enumerate(position_counter):
    print(f"Position {i + 1}:")
    for char, count in counter.items():
        print(f"'{char}': {count}")

# Saving results to CSV files
char_frequency_df = pd.DataFrame.from_dict(char_counter, orient='index', columns=['Frequency'])
char_frequency_df.to_csv("char_frequency.csv", index_label="Character")

position_frequency_df = pd.DataFrame([{char: count for char, count in counter.items()} for counter in position_counter])
position_frequency_df.to_csv("position_frequency.csv", index_label="Position")

Overall Character Frequency:
'5': 662
'8': 344
'9': 112
'0': 371
'7': 98
'6': 337
'1': 193
'3': 177
'2': 167
'C': 36
'X': 39
'4': 139
'E': 31

Character Frequency by Position:
Position 1:
'5': 246
Position 2:
'5': 246
Position 3:
'8': 246
Position 4:
'5': 28
'6': 218
Position 5:
'9': 29
'0': 53
'1': 48
'2': 53
'3': 50
'4': 13
Position 6:
'5': 26
'0': 28
'1': 23
'7': 27
'8': 25
'2': 26
'9': 24
'6': 26
'3': 20
'4': 21
Position 7:
'5': 59
'1': 39
'3': 45
'4': 41
'6': 26
'2': 33
'X': 2
'9': 1
Position 8:
'0': 221
'1': 23
'X': 2
Position 9:
'7': 29
'1': 25
'6': 27
'2': 25
'0': 15
'3': 26
'8': 22
'5': 24
'9': 25
'4': 25
'X': 2
'C': 1
Position 10:
'6': 23
'3': 12
'7': 18
'X': 20
'C': 16
'0': 33
'E': 15
'8': 32
'9': 16
'5': 19
'1': 11
'4': 15
'2': 16
Position 11:
'6': 17
'7': 24
'9': 17
'C': 19
'5': 14
'3': 24
'X': 13
'1': 24
'4': 24
'0': 21
'E': 16
'2': 14
'8': 19
