In [49]:
import os 
import tensorflow 
from PIL import Image, ImageDraw
import math
import pandas as pd 
from tqdm import tqdm 
import random 
import shutil
import numpy as np 

from statistics import mean 

In [50]:
folder_sources = []
Orig_Dataset_path = "Segmented Herbal Leaf images"
New_Dataset_path = "Transformed"

for file in os.listdir(Orig_Dataset_path):
    folder_sources.append(file)
        
print(folder_sources)

['Vitex Negundo (Lagundi)']


In [51]:
for folder in folder_sources:
    f_ = os.path.join(New_Dataset_path, folder)
    if not os.path.exists(f_):
        os.mkdir(f_)
        print("Created Path:", f_)

Created Path: Transformed\Vitex Negundo (Lagundi)


In [52]:
# Degrees is in radians
def calculate_new_coords(degrees, x, y, iw, ih):
    # qx = ox + math.cos(angle) * (px - ox) - math.sin(angle) * (py - oy)
    # qy = oy + math.sin(angle) * (px - ox) + math.cos(angle) * (py - oy)
    # res_x = iw / 2 + (x - iw / 2) * math.cos(degrees) + (y - ih / 2) * math.sin(degrees)
    # res_y = ih / 2 - (x - iw / 2) * math.sin(degrees) + (y - ih / 2) * math.cos(degrees)

    res_x = iw / 2 + (x - iw / 2) * math.cos(degrees) - (y - ih / 2) * math.sin(degrees)
    res_y = ih / 2 + (x - iw / 2) * math.sin(degrees) + (y - ih / 2) * math.cos(degrees)
    
    return (res_x, res_y)

def mean_upper_limit(limit, mean): return mean + (limit - mean) / 2

def mean_lower_limit(limit, mean): return mean - (limit - mean) / 2 

In [53]:
BASE_IMAGE_SIZE = ((100, 100))
BASE_CANVAS_SIZE = ((200, 200))
X_VARIATIONS = 1
Y_VARIATIONS = 1
ROTATION_VARIATION = 16
ROTATION_DEGREES = 360 / ROTATION_VARIATION
DRAW_BOUNDING_BOX = False
BORDER_WIDTH = 5 
BORDER_COLOR = "Blue"

In [54]:
for folder in tqdm(folder_sources):
    current_folder_dict = {
        "image_name" : [],
        "x0" : [],
        "y0" : [],
        "x1" : [],
        "y1" : [],
        "class" : []
    }

    for image in os.listdir(os.path.join(Orig_Dataset_path, folder)):
        print(image)

        current_image = Image.open(os.path.join(Orig_Dataset_path, folder, image))
        _ = current_image.copy()
        r = list(_.getdata(0))
        g = list(_.getdata(1))
        b = list(_.getdata(2))

        
        upper_bounds = (mean_upper_limit(255, mean(r)), mean_upper_limit(255, mean(g)), mean_upper_limit(255, mean(b)))
        lower_bounds = (mean_lower_limit(20, mean(r)), mean_lower_limit(0, mean(g)), mean_lower_limit(0, mean(b)))
        upper_bounds = tuple([int(x) for x in upper_bounds])
        lower_bounds = tuple([int(x) for x in lower_bounds])
        # print(max(upper_bounds))
        # print(max(lower_bounds))
        upper_bounds = tuple([x if x == max(upper_bounds) else 255 for x in upper_bounds])
        lower_bounds = tuple([x if x == min(lower_bounds) else 0 for x in lower_bounds])
        
        current_image.thumbnail(BASE_IMAGE_SIZE)
        
        x0, y0 = (0, 0)
        x1, y1 = current_image.size 
        iw, ih = current_image.size 
        
        # Rotate Image
        for i in range(ROTATION_VARIATION + 1):
            current_degrees = int(i * ROTATION_DEGREES)

            rotated_image = current_image.copy()
            rotated_image = rotated_image.rotate(current_degrees, expand=True)

            r_i_width, r_i_height = rotated_image.size

            original_bbox_coords = [(x0, y0), (x1, y0), (x1, y1), (x0, y1)]
            _degrees = math.radians(current_degrees)
            

            new_coords = [calculate_new_coords(_degrees, x, y, iw, ih) for x, y in original_bbox_coords]
            x_min, x_max = 0, iw 
            y_min, y_max = 0, ih 

            for x, y in new_coords:
                x_min = min(x, x_min)
                y_min = min(y, y_min)
                x_max = max(x, x_max)
                y_max = max(y, y_max)

            excess_neg_x = abs(min(0, x_min))
            excess_neg_y = abs(min(0, y_min))
            
            final_bbox_coords = [(x_min, y_min), (x_max, y_max)]
            final_bbox_coords = [(x + excess_neg_x, y + excess_neg_y) for x, y in final_bbox_coords]
            #final_bbox_coords = new_coords
                
            # Transposition
            c_w , c_h = BASE_CANVAS_SIZE

            x_steps = abs(r_i_width - c_w) / X_VARIATIONS
            y_steps = abs(r_i_height - c_h) / Y_VARIATIONS
            
            for y in range(Y_VARIATIONS + 1):
                for x in range(X_VARIATIONS + 1):
                    

                    min_r, min_g, min_b = lower_bounds
                    max_r, max_g, max_b = upper_bounds
                    
                    r_arr = np.random.randint(min_r, max_r + 1, size=(BASE_CANVAS_SIZE[0], BASE_CANVAS_SIZE[1], 1))
                    g_arr = np.random.randint(min_g, max_g + 1, size=(BASE_CANVAS_SIZE[0], BASE_CANVAS_SIZE[1], 1))
                    b_arr = np.random.randint(min_b, max_b + 1, size=(BASE_CANVAS_SIZE[0], BASE_CANVAS_SIZE[1], 1))

                    img_arr = np.concatenate((r_arr, g_arr, b_arr), axis=2)

                    canvas = Image.fromarray(img_arr.astype('uint8')).convert('RGB')

                    x_movement, y_movement = int(x_steps) * x, int(y_steps) * y

                    canvas.paste(rotated_image, (x_movement, y_movement))
                    

                    transposed_bbox_coords = [(x + x_movement, y + y_movement) for x, y in final_bbox_coords]
                    #print(transposed_bbox_coords)

                    if DRAW_BOUNDING_BOX:
                        _d = ImageDraw.Draw(canvas)
                        _d.rectangle(transposed_bbox_coords, width=BORDER_WIDTH, outline=BORDER_COLOR)

                    # display(canvas)

                    c0, c1 = transposed_bbox_coords
                    _x0, _y0 = c0 
                    _x1, _y1 = c1

                    
                    current_folder_dict["x0"].append(_x0 / c_w) 
                    current_folder_dict["y0"].append(_y0 / c_h)
                    current_folder_dict["x1"].append(_x1 / c_w) 
                    current_folder_dict["y1"].append(_y1 / c_h)
                    current_folder_dict["class"].append(folder)
                    
                    image_name = f"{New_Dataset_path}/{folder}/{image.replace('.jpg', '')}_{current_degrees}_{x}_{y}.jpg"
                    canvas.save(image_name)
                    current_folder_dict["image_name"].append(image_name)

                    # Only 1
                    # break   

                # Only 1
                # break         
            # Only 1 Rotation
            # break
        # Test Only 1 Picture
        # break
        
    
    df_current_folder = pd.DataFrame(current_folder_dict)
    #print(df_current_folder.head())
    df_current_folder.to_csv(f"{New_Dataset_path}/{folder}/annotations.csv")

    # Test Only 1 Folder
    # break
     


  0%|          | 0/1 [00:00<?, ?it/s]

1.jpg
10.jpg
11.jpg
12.jpg
14.jpg
15.jpg
16.jpg
17.jpg
18.jpg
19.jpg
2.jpg
20.jpg
21.jpg
22.jpg
23.jpg
24.jpg
25.jpg
26.jpg
27.jpg
28.jpg
29.jpg
3.jpg
30.jpg
31.jpg
32.jpg
33.jpg
34.jpg
35.jpg
36.jpg
37.jpg
38.jpg
39.jpg
4.jpg
40.jpg
41.jpg
42.jpg
43.jpg
44.jpg
45.jpg
46.jpg
47.jpg
48.jpg
49.jpg
5.jpg
50.jpg
51.jpg
52.jpg
53.jpg
6.jpg
7.jpg
8.jpg
9.jpg


100%|██████████| 1/1 [03:44<00:00, 224.07s/it]


In [55]:
for folder in tqdm(os.listdir(New_Dataset_path)):
    train_folder = os.path.join(New_Dataset_path, folder, "train")
    test_folder = os.path.join(New_Dataset_path, folder, "test")

    if os.path.exists(train_folder):
        continue

    print(folder)
    current_annotation_csv = list(filter(lambda x: "csv" in x, os.listdir(os.path.join(New_Dataset_path, folder))))[0]
    #print(current_annotation_csv)

    raw_df = pd.read_csv(os.path.join(New_Dataset_path, folder, current_annotation_csv))
    current_image_size = len(raw_df["image_name"])
    train_size = int(0.8 * current_image_size)
    
    df = raw_df.copy()

    train_df = df.sample(train_size)
    test_df = df.drop(train_df.index, axis=0)
    
    #print(len(train_df), train_size, len(test_df))


    
    
    if not os.path.exists(train_folder):
        os.mkdir(train_folder)

    if not os.path.exists(test_folder):
        os.mkdir(test_folder)

    for file in train_df["image_name"].tolist():
        image_file = file.split("/")[-1]
        new_image_path = os.path.join(train_folder, image_file)
        shutil.move(file, new_image_path)

    train_df["image_name"] = train_df["image_name"].map(lambda x: os.path.join(train_folder, x.split("/")[-1]))
    train_df.to_csv(os.path.join(New_Dataset_path, folder, "train_annotations.csv"))
    
    

    for file in test_df["image_name"].tolist():
        image_file = file.split("/")[-1]
        new_image_path = os.path.join(test_folder, image_file)

        shutil.move(file, new_image_path)
    
    test_df["image_name"] = test_df["image_name"].map(lambda x: os.path.join(test_folder, x.split("/")[-1]))
    test_df.to_csv(os.path.join(New_Dataset_path, folder, "test_annotations.csv"))

    
    
    

  0%|          | 0/10 [00:00<?, ?it/s]

Artocarpus Heterophyllus (Jackfruit)


 10%|█         | 1/10 [00:08<01:12,  8.10s/it]

Blumea Balsamifera (Sambong)


 20%|██        | 2/10 [00:11<00:42,  5.35s/it]

Citrus Limon (Lemon)


 30%|███       | 3/10 [00:14<00:29,  4.25s/it]

Jasminum (Jasmine)


 40%|████      | 4/10 [00:17<00:22,  3.83s/it]

Mangifera Indica (Mango)


 50%|█████     | 5/10 [00:20<00:17,  3.58s/it]

Mentha (Mint)


 60%|██████    | 6/10 [00:24<00:14,  3.50s/it]

Momordica Charantia (Ampalaya)


 70%|███████   | 7/10 [00:27<00:10,  3.48s/it]

Moringa Oleifera (Malunggay)


 80%|████████  | 8/10 [00:32<00:07,  3.92s/it]

Psidium Guajava (Guava)


 90%|█████████ | 9/10 [00:35<00:03,  3.68s/it]

Vitex Negundo (Lagundi)


100%|██████████| 10/10 [00:38<00:00,  3.87s/it]


In [56]:
appended_files = []
for folder in os.listdir(New_Dataset_path):
    csv_files = []
    for file in os.listdir(os.path.join(New_Dataset_path, folder)):
        if ("train" in file or "test" in file) and ".csv" in file:
            df = pd.read_csv(os.path.join(New_Dataset_path, folder, file))
            csv_files.append(df)

    x = pd.concat(csv_files)
    appended_files.append(x)

In [57]:
final_csv = pd.concat(appended_files)
final_csv

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,image_name,x0,y0,x1,y1,class
0,2,2,Transformed\Artocarpus Heterophyllus (Jackfrui...,0.000,0.500,0.375000,1.000000,Artocarpus Heterophyllus (Jackfruit)
1,3,3,Transformed\Artocarpus Heterophyllus (Jackfrui...,0.625,0.500,1.000000,1.000000,Artocarpus Heterophyllus (Jackfruit)
2,7,7,Transformed\Artocarpus Heterophyllus (Jackfrui...,0.465,0.390,0.999997,0.994069,Artocarpus Heterophyllus (Jackfruit)
3,11,11,Transformed\Artocarpus Heterophyllus (Jackfrui...,0.375,0.380,0.993718,0.998718,Artocarpus Heterophyllus (Jackfruit)
4,17,17,Transformed\Artocarpus Heterophyllus (Jackfrui...,0.500,0.000,1.000000,0.500000,Artocarpus Heterophyllus (Jackfruit)
...,...,...,...,...,...,...,...,...
2823,1098,1098,Transformed\Vitex Negundo (Lagundi)\train\25_4...,0.000,0.375,0.618718,0.993718,Vitex Negundo (Lagundi)
2824,3419,3419,Transformed\Vitex Negundo (Lagundi)\train\8_90...,0.500,0.620,1.000000,1.120000,Vitex Negundo (Lagundi)
2825,3328,3328,Transformed\Vitex Negundo (Lagundi)\train\6_36...,0.000,0.000,0.500000,0.375000,Vitex Negundo (Lagundi)
2826,2219,2219,Transformed\Vitex Negundo (Lagundi)\train\4_22...,0.410,0.410,0.996899,0.996899,Vitex Negundo (Lagundi)


In [58]:
final_csv.to_csv("Full Dataset Annotations.csv")

In [59]:
final_csv = pd.read_csv("Full Dataset Annotations.csv")
allowed_columns = ["image_name","x0","y0","x1","y1","class"]
unrecognized_columns = list(filter(lambda x: x not in allowed_columns, final_csv.columns.tolist()))
final_csv.drop(labels=unrecognized_columns, axis=1, inplace=True)
final_csv.to_csv("Full Dataset Annotations.csv", index=False)

In [60]:
fixed_csv = pd.read_csv("Full Dataset Annotations.csv")
fixed_csv

Unnamed: 0,image_name,x0,y0,x1,y1,class
0,Transformed\Artocarpus Heterophyllus (Jackfrui...,0.000,0.500,0.375000,1.000000,Artocarpus Heterophyllus (Jackfruit)
1,Transformed\Artocarpus Heterophyllus (Jackfrui...,0.625,0.500,1.000000,1.000000,Artocarpus Heterophyllus (Jackfruit)
2,Transformed\Artocarpus Heterophyllus (Jackfrui...,0.465,0.390,0.999997,0.994069,Artocarpus Heterophyllus (Jackfruit)
3,Transformed\Artocarpus Heterophyllus (Jackfrui...,0.375,0.380,0.993718,0.998718,Artocarpus Heterophyllus (Jackfruit)
4,Transformed\Artocarpus Heterophyllus (Jackfrui...,0.500,0.000,1.000000,0.500000,Artocarpus Heterophyllus (Jackfruit)
...,...,...,...,...,...,...
36307,Transformed\Vitex Negundo (Lagundi)\train\25_4...,0.000,0.375,0.618718,0.993718,Vitex Negundo (Lagundi)
36308,Transformed\Vitex Negundo (Lagundi)\train\8_90...,0.500,0.620,1.000000,1.120000,Vitex Negundo (Lagundi)
36309,Transformed\Vitex Negundo (Lagundi)\train\6_36...,0.000,0.000,0.500000,0.375000,Vitex Negundo (Lagundi)
36310,Transformed\Vitex Negundo (Lagundi)\train\4_22...,0.410,0.410,0.996899,0.996899,Vitex Negundo (Lagundi)


In [61]:
tf_Dataset_path = "TF_DATASET"

# -- Data
# ----- Train
# ------- Class A
# ------- Class B
# ------- Class C
# ----- Val
# ------- Class A
# ------- Class B
# ------- Class C


if not os.path.exists(tf_Dataset_path):
    os.mkdir(tf_Dataset_path)

if not os.path.exists(os.path.join(tf_Dataset_path, "train")):
    os.mkdir(os.path.join(tf_Dataset_path, "train"))

if not os.path.exists(os.path.join(tf_Dataset_path, "valid")):
    os.mkdir(os.path.join(tf_Dataset_path, "valid"))

if not os.path.exists(os.path.join(tf_Dataset_path, "test")):
    os.mkdir(os.path.join(tf_Dataset_path, "test"))

for folder in folder_sources:
    if not os.path.exists(os.path.join(tf_Dataset_path, "train", folder)):
        os.mkdir(os.path.join(tf_Dataset_path, "train", folder))

    if not os.path.exists(os.path.join(tf_Dataset_path, "test", folder)):
        os.mkdir(os.path.join(tf_Dataset_path, "test", folder))


In [62]:
for folder in tqdm(os.listdir(os.path.join(New_Dataset_path))):
    for sub_folder in os.listdir(os.path.join(New_Dataset_path, folder)):
        if sub_folder == "test" or sub_folder == "train":
            for file in os.listdir(os.path.join(New_Dataset_path, folder, sub_folder)):
                src_path = os.path.join(New_Dataset_path, folder, sub_folder, file)
                dst_path = os.path.join(tf_Dataset_path, sub_folder, folder, file)
                shutil.copy(src_path, dst_path)

        

100%|██████████| 10/10 [04:06<00:00, 24.69s/it]


In [8]:
full_train_annotations = []
full_test_annotations = []

for folder in os.listdir(New_Dataset_path):
    for file in os.listdir(os.path.join(New_Dataset_path, folder)):
        if file == "test_annotations.csv":
            df = pd.read_csv(os.path.join(New_Dataset_path, folder, file))
            full_test_annotations.append(df)

        elif file == "train_annotations.csv":
            df = pd.read_csv(os.path.join(New_Dataset_path, folder, file))
            full_train_annotations.append(df)

full_train_df = pd.concat(full_train_annotations)
full_train_df.to_csv("Full Train Annotations.csv", index=False)

full_test_df = pd.concat(full_test_annotations)
full_test_df.to_csv("Full Test Annotations.csv", index=False)