In [1]:
# %%

import os
import shutil
import json
import pandas as pd
from sklearn.model_selection import train_test_split
import albumentations as A
import random
from loguru import logger
import cv2
import numpy as np
import matplotlib.pyplot as plt

GOAL = 300
REGION_ATTRIBUTE = "Defects"
IMG_PATH = r"train"
VIA_PATH = r"train.json"

RANDOM_STATE = 0
random.seed(RANDOM_STATE)


# %%
BOX_COLOR = (255, 0, 0) # Red
TEXT_COLOR = (255, 255, 255) # White

sample_output = {
    "_via_settings":{"ui":{"annotation_editor_height":25,"annotation_editor_fontsize":0.8,"leftsidebar_width":18,"image_grid":{"img_height":80,"rshape_fill":"none","rshape_fill_opacity":0.3,"rshape_stroke":"yellow","rshape_stroke_width":2,"show_region_shape":True,"show_image_policy":"all"},"image":{"region_label":"class","region_color":"class","region_label_font":"10pxSans","on_image_annotation_editor_placement":"NEAR_REGION"}},"core":{"buffer_size":18,"filepath":{},"default_filepath":""},"project":{"name":"via_project_val.5.12"}},
    "_via_img_metadata": {}, 
    "_via_attributes":{"regions":{REGION_ATTRIBUTE:{"type":"radio","description":"","options":{},"default_options":{}}},"file":{}}
}


def visualize_bbox(img, bbox, class_name, color=BOX_COLOR, thickness=2):
    """Visualizes a single bounding box on the image"""
    x_min, y_min, w, h = bbox
    x_min, x_max, y_min, y_max = int(x_min), int(x_min + w), int(y_min), int(y_min + h)
   
    cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color=color, thickness=thickness)
    
    ((text_width, text_height), _) = cv2.getTextSize(class_name, cv2.FONT_HERSHEY_SIMPLEX, 0.35, 1)    
    cv2.rectangle(img, (x_min, y_min - int(1.3 * text_height)), (x_min + text_width, y_min), BOX_COLOR, -1)
    cv2.putText(
        img,
        text=class_name,
        org=(x_min, y_min - int(0.3 * text_height)),
        fontFace=cv2.FONT_HERSHEY_SIMPLEX,
        fontScale=0.35, 
        color=TEXT_COLOR, 
        lineType=cv2.LINE_AA,
    )
    return img


def visualize(image, bboxes):
    img = image.copy()
    for bbox in bboxes:
        class_name = bbox[-1]
        img = visualize_bbox(img, bbox[:-1], class_name)
    plt.figure(figsize=(12, 12))
    plt.axis('off')
    plt.imshow(img)

def generate_annotation(filename, bboxes, transformed_img_path):
    filesize = os.stat(transformed_img_path).st_size 

    regions = []
    for bbox in bboxes: 
        region = {
            "shape_attributes": {
                "name": "rect", 
                "x": int(bbox[0]), 
                "y": int(bbox[1]),
                "width": int(bbox[2]), 
                "height": int(bbox[3])
            }, 
            "region_attributes": {
                "Defects": bbox[-1]
            }
        }
        regions.append(region)

    annot = {
        f"{filename}{filesize}": {
            "filename": filename, 
            "size": filesize, 
            "regions": regions,
            "file_attributes": {}
        }
    }

    return annot

def get_features(via_json): 
    feature = []
    for key, value in via_json.items(): 
        for regions in value["regions"]: 
            feat = regions["region_attributes"][REGION_ATTRIBUTE]
            if feat not in feature:
                feature.append(feat)

    return feature



# %%
with open(VIA_PATH, "r") as fs: 
    annot = json.load(fs)["_via_img_metadata"]

features = get_features(annot)

# update via output
sample_output["_via_attributes"]["regions"]["options"] = {feature: feature for feature in features}

image_files = {
    "filekey":  [],
    "filename": [], 
    "filesize": [], 
    "annot": []
}

for feature in features: 
    image_files.update({feature:[]})


for key, value in annot.items(): 
    print(value)

    features_count = {feature: 0 for feature in features}

    bbox = []

    for regions in value["regions"]: 
        feat = regions["region_attributes"][REGION_ATTRIBUTE]
        features_count[feat] += 1

        coor = regions["shape_attributes"]

        bbox.append([coor["x"], coor["y"], coor["width"], coor["height"], feat])

    # print(features_count)

    image_files["filekey"].append(key)
    image_files["filename"].append(value["filename"])
    image_files["filesize"].append(value["size"])
    image_files["annot"].append(json.dumps(bbox))
    for feature, cnt in features_count.items(): 
        image_files[feature].append(cnt)

image_files
# %%
image_files_df = pd.DataFrame(image_files)
# image_files_df = pd.read_csv("annotationsv2.csv")

# %%

final_annotation = []
for feature in features: 

    goal =  GOAL

    # print(feature)
    # print(goal)
    # print(image_files[feature])

    # create albumentations pipeline

    # get relavant file names
    feature_images_df = image_files_df[image_files_df[feature] != 0]
    feature_images_df.reset_index(drop=True, inplace=True)
    feature_count = feature_images_df[feature].sum()
    # print(len(feature_images_df))

    aug = goal - feature_count

    logger.info(f"Feature: {feature}, Goal: {goal}, Images to augment: {aug}")

    img_save_path = os.path.join(IMG_PATH, "augmentation", feature)

    # print(aug)
    if not os.path.exists(img_save_path): 
        os.makedirs(img_save_path)
        logger.info(f"Path for {feature} created. ")

    _via_img_metadata = {}

    while aug > 0: 

        try: 

            # randomly choose files to perform augmentation
            key = random.randint(0, len(feature_images_df)-1)
            filename = feature_images_df.loc[key, "filename"]
            annots = json.loads(feature_images_df.loc[key, "annot"])

            print(filename)
            print(annots)

            file_path = os.path.join(IMG_PATH, filename)

            if os.path.exists(file_path): 

                img = cv2.imread(file_path)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                img_height = img.shape[0]
                img_width = img.shape[1]

                defects = [annot[-1] for annot in annots]
                print(defects)

                bbox = [annot for annot in annots if feature in annot]
                
                for box in bbox: 
                    for i in range(3): 
                        if box[i] < 0: 
                            box[i] = 0

                print(bbox)

                # augmentation pipeline
                transform = A.Compose([
                    A.BBoxSafeRandomCrop(erosion_rate=0), 
 #                  A.RandomRotate90(p=0.5), 
                    A.VerticalFlip(p=0.5), 
                    A.HorizontalFlip(p=0.5), 
                    A.RandomBrightnessContrast(p=0.5), 
                    A.RandomGamma(p=0.5)
                ], bbox_params=A.BboxParams(format="coco"))

                transformed = transform(image=img, bboxes=bbox)

                # visualize(
                #     transformed["image"],
                #     transformed["bboxes"]
                # )
                transformed_img_file = f"{aug}_{feature}_{filename}"
                transformed_img_path = os.path.join(img_save_path, transformed_img_file)

                if cv2.imwrite(transformed_img_path, cv2.cvtColor(transformed["image"], cv2.COLOR_BGR2RGB)): 
                    print("after_cv", transformed["bboxes"])
                    
                    transformed_annotation = generate_annotation(transformed_img_file, transformed["bboxes"], transformed_img_path)
                    _via_img_metadata.update(transformed_annotation)
                    aug -= len(bbox)

            else: 
                pass

        except Exception as e: 
            raise e

    final_json = sample_output

    final_json["_via_img_metadata"] = _via_img_metadata

    final_json_path = os.path.join(img_save_path, f"{feature}.json")

    with open(final_json_path, "w") as f:
        json.dump(final_json, f)


# %% join all json and move all files into one new folder
import shutil

final_annotation = annot.copy()

if not os.path.exists("final"):
    os.mkdir("final")

final_folder = os.path.join("final", IMG_PATH)

if not os.path.exists(final_folder): 
    for i in range(len(IMG_PATH.split(os.sep))): 
        folder = os.path.join("final", *IMG_PATH.split(os.sep)[:i+1])
        try: 
            os.mkdir(folder)
        except Exception as e: 
            raise e


for top, dirs, files in os.walk(IMG_PATH): 
    for file in files: 
        if ".jpg" in file: 
            from_file = os.path.join(top, file)
            to_file = os.path.join(final_folder, file)
            shutil.copy(from_file, to_file)

for feature in features: 
    annot_file = os.path.join(IMG_PATH, "augmentation", feature, f"{feature}.json")

    with open(annot_file, "r") as f:
        aug_annot = json.load(f)["_via_img_metadata"]

    final_annotation.update(aug_annot)

final_json = sample_output

final_json["_via_img_metadata"] = final_annotation

final_json_path = os.path.join(final_folder, "final.json")

with open(final_json_path, "w") as f:
    json.dump(final_json, f)

[32m2024-10-08 17:06:41.411[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m177[0m - [1mFeature: Component knockoff, Goal: 300, Images to augment: 56[0m
[32m2024-10-08 17:06:41.413[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m184[0m - [1mPath for Component knockoff created. [0m


{'filename': '22047C801910_142711_part1.jpg', 'size': 200354, 'regions': [{'shape_attributes': {'name': 'rect', 'x': 379, 'y': 95, 'width': 37, 'height': 59}, 'region_attributes': {'Defects': 'Component knockoff'}}], 'file_attributes': {}}
{'filename': '22213S802744_214402_part1.jpg', 'size': 174634, 'regions': [{'shape_attributes': {'name': 'rect', 'x': 157, 'y': 50, 'width': 28, 'height': 55}, 'region_attributes': {'Defects': 'Component knockoff'}}], 'file_attributes': {}}
{'filename': '22213S803350_231145_part1.jpg', 'size': 184286, 'regions': [{'shape_attributes': {'name': 'rect', 'x': 153, 'y': 44, 'width': 32, 'height': 62}, 'region_attributes': {'Defects': 'Component knockoff'}}], 'file_attributes': {}}
{'filename': '22213S812250_110525_part1.jpg', 'size': 216751, 'regions': [{'shape_attributes': {'name': 'rect', 'x': 403, 'y': 98, 'width': 28, 'height': 59}, 'region_attributes': {'Defects': 'Component knockoff'}}], 'file_attributes': {}}
{'filename': '22214H800095_022716_part1.

[32m2024-10-08 17:06:42.312[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m177[0m - [1mFeature: Pass, Goal: 300, Images to augment: 218[0m
[32m2024-10-08 17:06:42.312[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m184[0m - [1mPath for Pass created. [0m


after_cv [(354.0, 12.0, 35.0, 18.0, 'Component knockoff')]
241009801086_144725_part3.jpg
[[119, 560, 47, 28, 'Component knockoff'], [57, 559, 48, 30, 'Component knockoff']]
['Component knockoff', 'Component knockoff']
[[119, 560, 47, 28, 'Component knockoff'], [57, 559, 48, 30, 'Component knockoff']]
after_cv [(311.0, 369.0, 47.0, 28.0, 'Component knockoff'), (372.0, 368.0, 48.0, 30.0, 'Component knockoff')]
24093A802874_061618_part3.jpg
[[319, 565, 21, 14, 'Component knockoff']]
['Component knockoff']
[[319, 565, 21, 14, 'Component knockoff']]
after_cv [(224.0, 16.999999999999996, 20.99999999999997, 14.000000000000007, 'Component knockoff')]
23334M802358_051023_A3_part3.jpg
[[75, 526, 41, 24, 'Component knockoff']]
['Component knockoff']
[[75, 526, 41, 24, 'Component knockoff']]
after_cv [(55.0, 173.0, 40.0, 24.0, 'Component knockoff')]
233468803552_025735_part1.jpg
[[131, 71, 28, 57, 'Component knockoff']]
['Component knockoff']
[[131, 71, 28, 57, 'Component knockoff']]
after_cv [(18