Imports

In [None]:
import os
from PIL import Image
from tqdm import tqdm
import json as js
import cv2
import matplotlib.pyplot as plt

Involving paths

In [None]:
with open("config_CNN.json",'r') as file:
    paths = js.load(file)

Image Resizing

In [None]:
def resize_images(input_folder, output_folder, size=(256, 256)):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for filename in tqdm(os.listdir(input_folder)):
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, filename)
        
        try:
            with Image.open(input_path) as img:
                img = img.resize(size, Image.LANCZOS)
                img.save(output_path)
        except Exception as e:
            print(f"Skipping {filename}: {e}")


input_folder_train = paths["Train_folder"] 
output_folder_train = paths["Train_resized"]
input_folder_Val = paths["Validation_folder"] 
output_folder_Val = paths["Validation_resized"]

if os.path.exists(input_folder_train):
    resize_images(input_folder_train, output_folder_train)
else: 
    print("Wrong path")

if os.path.exists(input_folder_Val):
    resize_images(input_folder_Val, output_folder_Val)
else: 
    print("Wrong path")

Counter

In [None]:
train_json_path = paths["Preprocessed_Train_json"]
val_json_path = paths["Preprocessed_Validation_json"]
Train_Label_Frequency = paths["Train_Label_Frequency"]
Validation_Label_Frequency = paths["Validation_Label_Frequency"]

with open(train_json_path, "r") as file:
    train_json = js.load(file)
    
with open(val_json_path, "r") as file:
    val_json = js.load(file)
    
train_label_freq = {}
val_label_freq = {}

for ann in train_json:
    if ann["label"] not in train_label_freq.keys():
        train_label_freq[ann["label"]] = 1
    else:
        train_label_freq[ann["label"]] += 1
        
print(f"Unique labels in Train: {len(train_label_freq.keys())}")
with open(Train_Label_Frequency, "w") as f:
    js.dump(train_label_freq, f)
    
for ann in val_json:
    if ann["label"] not in val_label_freq.keys():
        val_label_freq[ann["label"]] = 1
    else:
        val_label_freq[ann["label"]] += 1
        
print(f"Unique labels in Validation: {len(val_label_freq.keys())}")
with open(Validation_Label_Frequency, "w") as f:
    js.dump(val_label_freq, f)

Preprocessing util train

In [None]:
def get_img_name(id:int):
    for i in data['images']:
        if i["id"]==id:
            return i["file_name"]

def get_category_name(id:int):
    for i in data['categories']:
        if i['id']== id:
            return i["name"]
        
def get_image_size(id:int):
    for i in data['images']:
        if i["id"]==id:
            return (i["width"], i["height"])
        
def get_largest_bounding_box(id:int):
    area_list,bbox_list,cat_list = [0],[0],[0]
    
    for i in data['annotations']:
        if i['image_id']==id:
            area_list.append(i["area"])
            bbox_list.append(i["bbox"])
            cat_list.append(i["category_id"])
            
        else:
            continue
        
    if len(area_list) != 0: 
        max_area = max(area_list)
        bbox = bbox_list[area_list.index(max_area)]
        cat = cat_list[area_list.index(max_area)]
        return bbox,cat

with open("config_CNN.json",'r') as file:
    paths = js.load(file)

with open(paths["Max_Area_Train_Json"], "r") as file:
    data = js.load(file)  

l = []
for i in tqdm(data['images']):
    img = {}
    img["img_id"] = i["file_name"]
    img['size'] = get_image_size(i["id"])
    img["bbox"],img["category_id"] = get_largest_bounding_box(i["id"])
    img["label"] = get_category_name(img["category_id"])
    if img["bbox"] != 0:
        l.append(img)

with open(paths["Preprocessed_Train_json"],'w+') as file:
    js.dump(l, file, indent=4)

Preprocessing util Validation

In [None]:
def get_img_name(id:int):
    for i in data['images']:
        if i["id"]==id:
            return i["file_name"]

def get_category_name(id:int):
    for i in data['categories']:
        if i['id']== id:
            return i["name"]
        
def get_image_size(id:int):
    for i in data['images']:
        if i["id"]==id:
            return (i["width"], i["height"])
        
def get_largest_bounding_box(id:int):
    area_list,bbox_list,cat_list = [0],[0],[0]
    
    for i in data['annotations']:
        if i['image_id']==id:
            area_list.append(i["area"])
            bbox_list.append(i["bbox"])
            cat_list.append(i["category_id"])
            
        else:
            continue
        
    if len(area_list) != 0: 
        max_area = max(area_list)
        bbox = bbox_list[area_list.index(max_area)]
        cat = cat_list[area_list.index(max_area)]
        return bbox,cat

with open("config_CNN.json",'r') as file:
    paths = js.load(file)

with open(paths["Max_Area_Val_Json"], "r") as file:
    data = js.load(file)  

l = []
for i in data['images']:
    img = {}
    img["img_id"] = i["file_name"]
    img['size'] = get_image_size(i["id"])
    img["bbox"],img["category_id"] = get_largest_bounding_box(i["id"])
    img["label"] = get_category_name(img["category_id"])
    if img["bbox"] != 0:
        l.append(img)

with open(paths["Preprocessed_Validation_json"],'w+') as file:
    js.dump(l, file, indent=4)

Visualization

In [None]:
json_path = paths["Preprocessed_Train_json"]
train_path = paths["Train_resized"]

with open(json_path, 'r') as f:
    coco_data = js.load(f)

for img in coco_data:
    img_path = os.path.join(train_path, img["img_id"])
    if not os.path.exists(img_path):
        print(f"Image not found: {img_path}")
        continue
    
    image = cv2.imread(img_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    x, y, w, h = img["bbox"]
    cv2.rectangle(image, (int(x), int(y)), (int(x + w), int(y + h)), (255, 0, 0), 2)

    plt.figure(figsize=(8, 8))
    plt.imshow(image)
    plt.axis("off")
    plt.show()