In [1]:
import os
import pandas as pd
from PIL import Image
import ast
import sys

In [2]:
## Paths

In [66]:
image_folder = "images/images"  # Folder containing your images
label_folder = "first10k/kaggle/working/yolov5_train/detect_test/labels"  # Folder containing your bounding box text files
csv_path = "train.csv"    # Path to your CSV file

# Load the CSV file
df = pd.read_csv(csv_path)

In [59]:
counter = 0
for _, row in df.iterrows():
    filename = row['filename']  # e.g. 'chart_0.png'
    image_path = os.path.join(image_folder, filename)
    label_path = os.path.join(label_folder, filename.replace('.png', '.txt'))
    # Read the bounding box from the corresponding text file
    try:
        with open(label_path, 'r') as f:
            bbox_data = f.readline().strip()
    except FileNotFoundError:
        # If the label file does not exist, skip this image
        print(f"Label file not found for {filename}. Skipping.")
        cropped_image = img
        new_image_path = os.path.join("cropped_images", filename)
        cropped_image.save(new_image_path)
        continue
    bbox = list(map(float, bbox_data.split()[1:]))  # [x_center, y_center, width, height]

    if len(bbox) != 4:
        print(f"Label file for {filename} has more than one box or incorrect format. Skipping.")
        cropped_image = img
        new_image_path = os.path.join("cropped_images", filename)
        cropped_image.save(new_image_path)
        continue
    # Calculate the box coordinates for cropping
    x_center, y_center, width, height = bbox
    img = Image.open(image_path)
    img_width, img_height = img.size

    # Convert YOLO coordinates to PIL coordinates
    x_min = int((x_center - width / 2) * img_width)
    x_max = int((x_center + width / 2) * img_width)
    y_min = int((y_center - height / 2) * img_height)
    y_max = int((y_center + height / 2) * img_height)

    # Crop the image
    cropped_image = img.crop((x_min, y_min, x_max, y_max))
    new_image_path = os.path.join("cropped_images", filename)
    cropped_image.save(new_image_path)
    # Get the new offset due to cropping
    x_offset = x_min
    y_offset = y_min

    # Adjust the box coordinates in the CSV file
    boxes_str = row['boxes']
    boxes = ast.literal_eval(boxes_str)

    # Update the coordinates considering the offset due to cropping
    new_boxes = []
    for box in boxes:
        new_box = [
            box[i] - (x_offset if i % 2 == 0 else y_offset)
            for i in range(len(box))
        ]
        new_boxes.append(new_box)
    # Update the CSV with the new box coordinates
    df.loc[_, 'boxes'] = str(new_boxes)

# Save the updated CSV
df.to_csv("updated_data.csv", index=False)

## Validation Cropping

In [60]:
image_folder = "images/images"  # Folder containing your images
label_folder = "val10k/kaggle/working/yolov5_train/detect_test/labels"  # Folder containing your bounding box text files
csv_path = "updated_data.csv"    # Path to your CSV file

In [62]:
for i in range(10000, 20000):
    filename = "chart_" + str(i) + ".png"
    image_path = os.path.join(image_folder, filename)
    label_path = os.path.join(label_folder, filename.replace('.png', '.txt'))
    # Read the bounding box from the corresponding text file

    try:
        with open(label_path, 'r') as f:
            bbox_data = f.readline().strip()
    except FileNotFoundError:
        # If the label file does not exist, skip this image
        print(f"Label file not found for {filename}. Skipping.")
        cropped_image = img
        new_image_path = os.path.join("cropped_images", filename)
        cropped_image.save(new_image_path)
        continue

    bbox = list(map(float, bbox_data.split()[1:]))  # [x_center, y_center, width, height]

    if len(bbox) != 4:
        print(f"Label file for {filename} has more than one box or incorrect format. Skipping.")
        cropped_image = img
        new_image_path = os.path.join("cropped_images", filename)
        cropped_image.save(new_image_path)
        continue
    # Calculate the box coordinates for cropping
    x_center, y_center, width, height = bbox
    img = Image.open(image_path)
    img_width, img_height = img.size

    # Convert YOLO coordinates to PIL coordinates
    x_min = int((x_center - width / 2) * img_width)
    x_max = int((x_center + width / 2) * img_width)
    y_min = int((y_center - height / 2) * img_height)
    y_max = int((y_center + height / 2) * img_height)

    # Crop the image
    cropped_image = img.crop((x_min, y_min, x_max, y_max))
    new_image_path = os.path.join("cropped_images", filename)
    cropped_image.save(new_image_path)
    # Get the new offset due to cropping
    '''
    x_offset = x_min
    y_offset = y_min

    # Adjust the box coordinates in the CSV file
    boxes_str = row['boxes']
    boxes = ast.literal_eval(boxes_str)

    # Update the coordinates considering the offset due to cropping
    new_boxes = []
    for box in boxes:
        new_box = [
            box[i] - (x_offset if i % 2 == 0 else y_offset)
            for i in range(len(box))
        ]
        new_boxes.append(new_box)
    # Update the CSV with the new box coordinates
    df.loc[_, 'boxes'] = str(new_boxes)
    '''

# Save the updated CSV
#df.to_csv("updated_data.csv", index=False)

Label file not found for chart_10423.png. Skipping.
Label file not found for chart_11893.png. Skipping.
Label file not found for chart_11894.png. Skipping.
Label file not found for chart_11895.png. Skipping.
Label file not found for chart_13326.png. Skipping.
Label file not found for chart_14351.png. Skipping.
Label file not found for chart_14496.png. Skipping.
Label file not found for chart_18136.png. Skipping.
Label file not found for chart_18138.png. Skipping.
Label file not found for chart_18194.png. Skipping.
Label file not found for chart_18253.png. Skipping.
Label file not found for chart_18486.png. Skipping.
Label file not found for chart_18487.png. Skipping.
Label file not found for chart_18488.png. Skipping.
Label file not found for chart_18489.png. Skipping.
Label file not found for chart_18614.png. Skipping.
Label file not found for chart_18617.png. Skipping.
Label file not found for chart_18618.png. Skipping.
Label file not found for chart_18692.png. Skipping.


## Test cropping

In [11]:
image_folder = "images/images"  # Folder containing your images
label_folder = "test10k/kaggle/working/yolov5_train/detect_test2/labels"  # Folder containing your bounding box text files
csv_path = "updated_data.csv"    # Path to your CSV file

In [12]:
for i in range(20000, 30000):
    filename = "chart_" + str(i) + ".png"
    image_path = os.path.join(image_folder, filename)
    label_path = os.path.join(label_folder, filename.replace('.png', '.txt'))
    # Read the bounding box from the corresponding text file
    print(label_path)
    try:
        with open(label_path, 'r') as f:
            bbox_data = f.readline().strip()
    except FileNotFoundError:
        # If the label file does not exist, skip this image
        print(f"Label file not found for {filename}. Skipping.")
        cropped_image = img
        new_image_path = os.path.join("cropped_images_test", filename)
        cropped_image.save(new_image_path)
        continue

    bbox = list(map(float, bbox_data.split()[1:]))  # [x_center, y_center, width, height]

    if len(bbox) != 4:
        print(f"Label file for {filename} has more than one box or incorrect format. Skipping.")
        cropped_image = img
        new_image_path = os.path.join("cropped_images_test", filename)
        cropped_image.save(new_image_path)
        continue
    # Calculate the box coordinates for cropping
    x_center, y_center, width, height = bbox
    img = Image.open(image_path)
    img_width, img_height = img.size

    # Convert YOLO coordinates to PIL coordinates
    x_min = int((x_center - width / 2) * img_width)
    x_max = int((x_center + width / 2) * img_width)
    y_min = int((y_center - height / 2) * img_height)
    y_max = int((y_center + height / 2) * img_height)

    # Crop the image
    cropped_image = img.crop((x_min, y_min, x_max, y_max))
    new_image_path = os.path.join("cropped_images_test", filename)
    cropped_image.save(new_image_path)
    # Get the new offset due to cropping
    '''
    x_offset = x_min
    y_offset = y_min

    # Adjust the box coordinates in the CSV file
    boxes_str = row['boxes']
    boxes = ast.literal_eval(boxes_str)

    # Update the coordinates considering the offset due to cropping
    new_boxes = []
    for box in boxes:
        new_box = [
            box[i] - (x_offset if i % 2 == 0 else y_offset)
            for i in range(len(box))
        ]
        new_boxes.append(new_box)
    # Update the CSV with the new box coordinates
    df.loc[_, 'boxes'] = str(new_boxes)
    '''

# Save the updated CSV
#df.to_csv("updated_data.csv", index=False)

test10k/kaggle/working/yolov5_train/detect_test2/labels\chart_20000.txt
test10k/kaggle/working/yolov5_train/detect_test2/labels\chart_20001.txt
test10k/kaggle/working/yolov5_train/detect_test2/labels\chart_20002.txt
test10k/kaggle/working/yolov5_train/detect_test2/labels\chart_20003.txt
test10k/kaggle/working/yolov5_train/detect_test2/labels\chart_20004.txt
test10k/kaggle/working/yolov5_train/detect_test2/labels\chart_20005.txt
test10k/kaggle/working/yolov5_train/detect_test2/labels\chart_20006.txt
test10k/kaggle/working/yolov5_train/detect_test2/labels\chart_20007.txt
test10k/kaggle/working/yolov5_train/detect_test2/labels\chart_20008.txt
test10k/kaggle/working/yolov5_train/detect_test2/labels\chart_20009.txt
test10k/kaggle/working/yolov5_train/detect_test2/labels\chart_20010.txt
test10k/kaggle/working/yolov5_train/detect_test2/labels\chart_20011.txt
test10k/kaggle/working/yolov5_train/detect_test2/labels\chart_20012.txt
test10k/kaggle/working/yolov5_train/detect_test2/labels\chart_20

## Vizualizing results

In [68]:
import ast
import matplotlib.pyplot as plt
from PIL import Image

# Correct image path
image_path = 'cropped_images_train/chart_8.png'  # Change this to a valid path
image = Image.open(image_path)  # Opening the image file

# Proper list parsing
first_row_list = [
    [40, 7, 76, 45, 40, 43], [76, 45, 30, 78, 40, 43], [30, 78, 27, 77, 40, 43], [27, 77, 9, 24, 40, 43], [9, 24, 31, 8, 40, 43], [31, 8, 39, 7, 40, 43], [39, 7, 40, 7, 40, 43]
]  # This can be directly initialized without ast.literal_eval

# Extract x and y coordinates
x_coords = []
y_coords = []
for sub_list in first_row_list:
    for i in range(0, len(sub_list), 2):  # Step by 2 to get pairs of coordinates
        x_coords.append(sub_list[i])  # Append x coordinate
        y_coords.append(sub_list[i + 1])  # Append y coordinate

# Plotting
plt.imshow(image)  # Display the image
plt.scatter(x_coords, y_coords, color='red', marker='x', s=100)  # Plot the points with red 'x' markers
plt.title('Points on Image')
plt.show()  # Display the plot


FileNotFoundError: [Errno 2] No such file or directory: 'cropped_images_train/chart_8.png'