In [1]:
import os
from pathlib import Path
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np

root = Path("C:/Users/razer/Documents/workspace/AI F.R.I.E.N.D.S/data_raw/")

classes = sorted([d.name for d in root.iterdir() if d.is_dir()])

len(classes), classes[:10]

(47,
 ['African Violet (Saintpaulia ionantha)',
  'Aloe Vera',
  'Anthurium (Anthurium andraeanum)',
  'Areca Palm (Dypsis lutescens)',
  'Asparagus Fern (Asparagus setaceus)',
  'Begonia (Begonia spp.)',
  'Bird of Paradise (Strelitzia reginae)',
  'Birds Nest Fern (Asplenium nidus)',
  'Boston Fern (Nephrolepis exaltata)',
  'Calathea'])

In [2]:
class_counts = {}

for cls in classes:
    class_path = root / cls
    count = len(list(class_path.glob("*")))
    class_counts[cls] = count

class_counts

{'African Violet (Saintpaulia ionantha)': 337,
 'Aloe Vera': 252,
 'Anthurium (Anthurium andraeanum)': 455,
 'Areca Palm (Dypsis lutescens)': 189,
 'Asparagus Fern (Asparagus setaceus)': 169,
 'Begonia (Begonia spp.)': 236,
 'Bird of Paradise (Strelitzia reginae)': 180,
 'Birds Nest Fern (Asplenium nidus)': 290,
 'Boston Fern (Nephrolepis exaltata)': 307,
 'Calathea': 330,
 'Cast Iron Plant (Aspidistra elatior)': 266,
 'Chinese Money Plant (Pilea peperomioides)': 382,
 'Chinese evergreen (Aglaonema)': 514,
 'Christmas Cactus (Schlumbergera bridgesii)': 312,
 'Chrysanthemum': 209,
 'Ctenanthe': 347,
 'Daffodils (Narcissus spp.)': 421,
 'Dracaena': 261,
 'Dumb Cane (Dieffenbachia spp.)': 541,
 'Elephant Ear (Alocasia spp.)': 332,
 'English Ivy (Hedera helix)': 240,
 'Hyacinth (Hyacinthus orientalis)': 318,
 'Iron Cross begonia (Begonia masoniana)': 266,
 'Jade plant (Crassula ovata)': 353,
 'Kalanchoe': 130,
 'Lilium (Hemerocallis)': 480,
 'Lily of the valley (Convallaria majalis)': 416,

In [3]:
w_list, h_list = [], []

for cls in classes:
    class_path = root / cls
    for img_path in class_path.glob("*"):
        img = Image.open(img_path)
        w, h = img.size
        w_list.append(w)
        h_list.append(h)

print(f"Average width: {np.mean(w_list)}")
print(f"Average height: {np.mean(h_list)}")

print(f"Min Size: ({min(w_list)}, {min(h_list)}")
print(f"Max Size: ({max(w_list)}, {max(h_list)}")

Average width: 1306.9533468559837
Average height: 1334.4279242731575
Min Size: (90, 120
Max Size: (6720, 8021


In [4]:
import json
import re

clean_names = []

for name in classes:
    clean = name.lower()
    clean = clean.replace("(", "").replace(")", "")
    clean = clean.replace("/", "-")
    clean = clean.replace(" ", "_")
    clean = clean.replace("__", "_")
    clean = clean.replace(".", "")
    clean = re.sub(r"[^a-z0-9_]", "", clean)
    clean_names.append(clean)

mapping = {orig: clean for orig, clean in zip(classes, clean_names)}
mapping


{'African Violet (Saintpaulia ionantha)': 'african_violet_saintpaulia_ionantha',
 'Aloe Vera': 'aloe_vera',
 'Anthurium (Anthurium andraeanum)': 'anthurium_anthurium_andraeanum',
 'Areca Palm (Dypsis lutescens)': 'areca_palm_dypsis_lutescens',
 'Asparagus Fern (Asparagus setaceus)': 'asparagus_fern_asparagus_setaceus',
 'Begonia (Begonia spp.)': 'begonia_begonia_spp',
 'Bird of Paradise (Strelitzia reginae)': 'bird_of_paradise_strelitzia_reginae',
 'Birds Nest Fern (Asplenium nidus)': 'birds_nest_fern_asplenium_nidus',
 'Boston Fern (Nephrolepis exaltata)': 'boston_fern_nephrolepis_exaltata',
 'Calathea': 'calathea',
 'Cast Iron Plant (Aspidistra elatior)': 'cast_iron_plant_aspidistra_elatior',
 'Chinese Money Plant (Pilea peperomioides)': 'chinese_money_plant_pilea_peperomioides',
 'Chinese evergreen (Aglaonema)': 'chinese_evergreen_aglaonema',
 'Christmas Cactus (Schlumbergera bridgesii)': 'christmas_cactus_schlumbergera_bridgesii',
 'Chrysanthemum': 'chrysanthemum',
 'Ctenanthe': 'c

In [5]:
with open("class_names_clean.txt", "w") as f:
    for name in clean_names:
        f.write(name + "\n")

with open("class_name_mapping.json", "w") as f:
    json.dump(mapping, f, indent=4)
