In [None]:
!unzip archive.zip -d .

In [None]:
import os

import pandas as pd
import matplotlib.pyplot as plt

from coco_to_yolo import convert_annotations_to_yolo
from coco_to_yolo import create_empty_annotations
from coco_to_yolo import read_coco_annotations

from utilities import clean_data
from utilities import create_yaml_from_class_map
from utilities import filter_annotations
from utilities import reduce_background_images
from utilities import remove_small_classes
from utilities import split_data
from utilities import update_yaml_classes

In [None]:
random_seed = 42

In [None]:
image_folder = "rtsd-frames/"
train_anno = read_coco_annotations("train_anno.json")
valid_anno = read_coco_annotations("val_anno.json")
convert_annotations_to_yolo(image_folder, train_anno)
convert_annotations_to_yolo(image_folder, valid_anno)

In [None]:
def analyze_class_distribution_pandas(annotation_folder):
    class_counts = []

    for filename in os.listdir(annotation_folder):
        if filename.endswith(".txt"):
            file_path = os.path.join(annotation_folder, filename)
            with open(file_path, "r") as file:
                annotations = file.readlines()
                for annotation in annotations:
                    class_index = int(annotation.split()[0])
                    class_counts.append(class_index)

    df = pd.DataFrame(class_counts, columns=["Class"])
    df = df["Class"].value_counts().reset_index()
    df.columns = ["Class", "Count"]
    return df


def plot_distribution_pandas(df):
    df.plot(kind="bar", x="Class", y="Count")
    plt.xlabel("Class ID")
    plt.ylabel("Count")
    plt.title("Class Distribution in Dataset")
    plt.show()


annotation_folder = "rtsd-frames2/rtsd-frames/"
class_counts = analyze_class_distribution_pandas(annotation_folder)
plot_distribution_pandas(class_counts)

In [None]:
def plot_images_with_annotations(folder_path, num_images=5):
    images = [
        file
        for file in os.listdir(folder_path)
        if file.endswith(".jpg") or file.endswith(".png")
    ]

    for i, image_file in enumerate(images[:num_images]):
        image_path = os.path.join(folder_path, image_file)
        annotation_path = os.path.join(
            folder_path, image_file.replace(".jpg", ".txt").replace(".png", ".txt")
        )

        image = Image.open(image_path)
        fig, ax = plt.subplots(1)
        ax.imshow(image)

        if os.path.exists(annotation_path):
            with open(annotation_path, "r") as file:
                for line in file.readlines():
                    class_id, x_center, y_center, width, height = map(
                        float, line.split()
                    )

                    x = (x_center - width / 2) * image.width
                    y = (y_center - height / 2) * image.height
                    width *= image.width
                    height *= image.height

                    rect = patches.Rectangle(
                        (x, y),
                        width,
                        height,
                        linewidth=1,
                        edgecolor="r",
                        facecolor="none",
                    )
                    ax.add_patch(rect)

        plt.show()


plot_images_with_annotations(annotation_folder, 5)

In [None]:
create_empty_annotations(annotation_folder)

In [None]:
def count_empty_txt_files(folder_path):
    empty_file_count = 0

    for filename in os.listdir(folder_path):
        if filename.endswith(".txt"):
            file_path = os.path.join(folder_path, filename)
            if os.path.getsize(file_path) == 0:
                empty_file_count += 1

    return empty_file_count


folder_path = annotation_folder
print("Количество пустых .txt файлов:", count_empty_txt_files(folder_path))

In [None]:
remove_small_classes(annotation_folder, 700)

In [None]:
clean_data(annotation_folder, 500, 3000)

In [None]:
class_counts = analyze_class_distribution_pandas(annotation_folder)
classes = class_counts["Class"].tolist()

In [None]:
mapping = filter_annotations(annotation_folder, classes, 1500)

In [None]:
print("Количество пустых .txt файлов:", count_empty_txt_files(folder_path))

In [None]:
train_folder = "datasets/train"
valid_folder = "datasets/valid"

split_data(annotation_folder, train_folder, valid_folder, random_seed, train_size=0.8)

In [None]:
create_yaml_from_class_map(
    class_map_path="label_map.json", yaml_output_path="data.yaml"
)
update_yaml_classes("data.yaml", class_mapping=mapping)