In [43]:
import os
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split

## consts

In [27]:
path = '../Maize or Corn/data'
TARGET_SIZE = (224, 224)

## working

In [14]:
labels = []
for p in os.listdir(path):
    labels.append(p)
print(labels)
df_labels = []
df_images = []

for lab in labels:
    dir_path = os.path.join(path, lab)

    for file in os.listdir(dir_path):
        df_images.append(file)
        df_labels.append(lab)

data = pd.DataFrame({'image': df_images, 'label': df_labels})
data

['Gray_Leaf_Spot', 'Common_Rust', 'Blight', 'Healthy']


Unnamed: 0,image,label
0,Corn_Gray_Spot (211).JPG,Gray_Leaf_Spot
1,Corn_Gray_Spot (222).JPG,Gray_Leaf_Spot
2,Corn_Gray_Spot (528).JPG,Gray_Leaf_Spot
3,Corn_Gray_Spot (99).JPG,Gray_Leaf_Spot
4,Corn_Gray_Spot (80).JPG,Gray_Leaf_Spot
...,...,...
4183,Corn_Health (668).jpg,Healthy
4184,Corn_Health (294).jpg,Healthy
4185,Corn_Health (700).jpg,Healthy
4186,Corn_Health (269).jpg,Healthy


In [23]:
strat = data['label']
df_train, df_dummy = train_test_split(data, shuffle=True, train_size=0.8, stratify=strat)

strat = df_dummy['label']
df_valid, df_test = train_test_split(df_dummy, train_size=0.5, shuffle=True, stratify=strat)
df_train, df_valid, df_test

(                           image           label
 2286       Corn_Blight (941).JPG          Blight
 1804   Corn_Common_Rust (60).jpg     Common_Rust
 3185       Corn_Health (470).jpg         Healthy
 2367      Corn_Blight (1026).JPG          Blight
 781   Corn_Common_Rust (653).JPG     Common_Rust
 ...                          ...             ...
 2559       Corn_Blight (104).jpg          Blight
 1934       Corn_Blight (194).JPG          Blight
 1042  Corn_Common_Rust (235).JPG     Common_Rust
 1411   Corn_Common_Rust (70).jpg     Common_Rust
 220     Corn_Gray_Spot (348).jpg  Gray_Leaf_Spot
 
 [3350 rows x 2 columns],
                            image           label
 1518  Corn_Common_Rust (190).JPG     Common_Rust
 3980       Corn_Health (401).jpg         Healthy
 4151       Corn_Health (423).jpg         Healthy
 9        Corn_Gray_Spot (55).jpg  Gray_Leaf_Spot
 4104       Corn_Health (585).jpg         Healthy
 ...                          ...             ...
 3954       Corn_Healt

In [41]:
def df_to_folder(df: pd.DataFrame, dest_path: str):
    for _, row in df.iterrows():
        image_name, label = row['image'], row['label']
        image_path = os.path.join(path, label, image_name)
        img = Image.open(image_path)

        img = img.resize(TARGET_SIZE)
        #img.show()

        if img.mode == 'RGBA':
            img = img.convert('RGB')

        label_path = os.path.join(dest_path, label)
        if not os.path.exists(label_path):
            os.mkdir(label_path)

        result_path = os.path.join(label_path, image_name)
        img.save(result_path)
        

In [42]:
dataset_path = '../datasets/corn_or_leaf'
dest_train_path = '../datasets/corn_or_leaf/train/'
dest_val_path = '../datasets/corn_or_leaf/val/'
dest_test_path = '../datasets/corn_or_leaf/test/'

if not os.path.exists(dataset_path):
    os.mkdir(dataset_path)
if not os.path.exists(dest_train_path):     
    os.mkdir(dest_train_path)
if not os.path.exists(dest_val_path):
    os.mkdir(dest_val_path)
if not os.path.exists(dest_test_path):
    os.mkdir(dest_test_path)

df_to_folder(df_train, dest_path=dest_train_path)
df_to_folder(df_valid, dest_path=dest_val_path)
df_to_folder(df_test, dest_path=dest_test_path)


## data.yaml

In [44]:
with open('../datasets/corn_or_leaf/dataset.yaml', 'w') as f:
  f.write('path: corn_or_leaf\n')
  f.write('names:\n')
  for j in labels:
    f.write(f'- {j}\n')

  f.write('train: train\n')
  f.write('val: val\n')
  f.write('test: test\n')

  