In [1]:
#!curl -L "https://public.roboflow.com/ds/f4QCbpT1wT?key=kumLfI87kN" > roboflow.zip; unzip roboflow.zip; rm roboflow.zip

In [4]:
import os
import pandas as pd
import numpy as np
from PIL import Image
from matplotlib import pyplot as plt

In [5]:
ANNOTATION_FILE_URI = "../data/raw/_annotations.csv"
IMAGE_FILE_URI = "../data/raw/images"
df_annotations = pd.read_csv(ANNOTATION_FILE_URI)

number_of_annotations = df_annotations.shape[0]
annotation_headers = df_annotations.columns

list_of_unique_files = df_annotations["filename"].unique()

print(
f"""Number of unique files: {len(list_of_unique_files)}
Number of annotations: {number_of_annotations}
Headers: {list(annotation_headers)}"""
)

Number of unique files: 718
Number of annotations: 1934
Headers: ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']


In [6]:
class Counter:
    count = 0
    
    def get_count(self):
        self.count += 1
        return self.count

counter = Counter()

def get_box_image(file_uri, filename, width, height, class_, xmin, ymin, xmax, ymax):
    img = Image.open(file_uri)
    img_width, img_height = img.size
    img_array = np.asarray(img)
    
    factor = 4
    mean_x, mean_y = int((xmin+xmax)/2), int((ymin+ymax)/2)
    range_x, range_y = int(factor*(xmax-xmin)/2), int(factor*(ymax-ymin)/2)
    
    new_img_array = img_array[mean_y-range_y: mean_y+range_y, mean_x-range_x:mean_x+range_x]
    return new_img_array
    
    #plot_image(img, mean_x, mean_y, range_x, range_y)
    
def plot_image(img, mean_x, mean_y, range_x, range_y):        
    plt.figure(figsize=(10,10))
    plt.imshow(img)
    plt.plot([mean_x-range_x, mean_x+range_x, mean_x+range_x, mean_x-range_x, mean_x-range_x], 
             [mean_y-range_y, mean_y-range_y, mean_y+range_y, mean_y+range_y, mean_y-range_y], c="k")
    plt.scatter(mean_x, mean_y, c="r")
    plt.show()
    
   

In [7]:
for i, file_name in enumerate(list_of_unique_files):
    file_uri = IMAGE_FILE_URI + "/" + file_name
    annotations = df_annotations[df_annotations["filename"] == file_name]

    if (number_of_annotations_in_image := len(annotations)) > 1:
        continue

    new_file_name = f"image_{counter.get_count()}.jpeg"

    new_img_array = get_box_image(file_uri, *(annotations.iloc[0].values))
    img_class = annotations.iloc[0]["class"]
    new_img = Image.fromarray(new_img_array)
    new_img.save(f"../data/semi-processed/"+new_file_name, "JPEG")

    with open("../data/semi-processed/_annotations.txt", "a") as file:
        appendix = " \n" if i != len(list_of_unique_files) else ""
        file.write(f"{new_file_name} {img_class}"+appendix)

    print(f"File: {new_file_name} added.")     

File: image_1.jpeg added.
File: image_2.jpeg added.
File: image_3.jpeg added.
File: image_4.jpeg added.
File: image_5.jpeg added.
File: image_6.jpeg added.
File: image_7.jpeg added.
File: image_8.jpeg added.
File: image_9.jpeg added.
File: image_10.jpeg added.
File: image_11.jpeg added.
File: image_12.jpeg added.
File: image_13.jpeg added.
File: image_14.jpeg added.
File: image_15.jpeg added.
File: image_16.jpeg added.
File: image_17.jpeg added.
File: image_18.jpeg added.
File: image_19.jpeg added.
File: image_20.jpeg added.
File: image_21.jpeg added.
File: image_22.jpeg added.
File: image_23.jpeg added.
File: image_24.jpeg added.
File: image_25.jpeg added.
File: image_26.jpeg added.
File: image_27.jpeg added.
File: image_28.jpeg added.
File: image_29.jpeg added.
File: image_30.jpeg added.
File: image_31.jpeg added.
File: image_32.jpeg added.
File: image_33.jpeg added.
File: image_34.jpeg added.
File: image_35.jpeg added.
File: image_36.jpeg added.
File: image_37.jpeg added.
File: imag