In [18]:
import os
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup as bs

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import load_img, img_to_array

In [4]:
root_dic = '../data/new data/coffee/Annotations/RoCoLe-voc/export'
len(os.listdir(os.path.join(root_dic)))

3120

In [11]:
image_labels = []

for file in os.listdir(os.path.join(root_dic)):
    with open(os.path.join(root_dic, file), 'r') as xml:
        content = xml.read()

    soup = bs(content, 'lxml')
    name = soup.find('name')
    image_labels.append([file, str(name).replace('<name>', '').replace('</name>', '')])

In [13]:
train_data = pd.DataFrame(image_labels, columns=['file', 'label'])
train_data

Unnamed: 0,file,label
0,plantdoc0.xml,Tomato Septoria leaf spot
1,plantdoc1.xml,Tomato leaf late blight
2,plantdoc10.xml,Peach leaf
3,plantdoc100.xml,Corn rust leaf
4,plantdoc1000.xml,Apple leaf
...,...,...
2325,plantdoc995.xml,Tomato leaf
2326,plantdoc996.xml,Tomato leaf mosaic virus
2327,plantdoc997.xml,Tomato leaf mosaic virus
2328,plantdoc998.xml,Tomato Septoria leaf spot


In [15]:
train_data.label.unique()

array(['Tomato Septoria leaf spot', 'Tomato leaf late blight',
       'Peach leaf', 'Corn rust leaf', 'Apple leaf', 'Corn leaf blight',
       'Potato leaf late blight', 'Bell_pepper leaf spot',
       'Apple rust leaf', 'Tomato mold leaf',
       'Squash Powdery mildew leaf', 'Tomato leaf yellow virus',
       'Potato leaf early blight', 'Cherry leaf',
       'Tomato leaf mosaic virus', 'Soyabean leaf',
       'Tomato leaf bacterial spot', 'Blueberry leaf', 'Bell_pepper leaf',
       'Apple Scab Leaf', 'Tomato Early blight leaf', 'Raspberry leaf',
       'Corn Gray leaf spot', 'grape leaf', 'Strawberry leaf',
       'grape leaf black rot', 'Tomato leaf', 'None',
       'Tomato two spotted spider mites leaf', 'Soybean leaf'],
      dtype=object)

In [16]:
train_data.label.value_counts()

Corn leaf blight                        178
Tomato Septoria leaf spot               137
Squash Powdery mildew leaf              124
Raspberry leaf                          112
Corn rust leaf                          106
Potato leaf early blight                105
Blueberry leaf                          103
Peach leaf                              102
Tomato leaf bacterial spot              101
Tomato leaf late blight                  99
Potato leaf late blight                  95
Strawberry leaf                          88
Tomato mold leaf                         86
Apple Scab Leaf                          83
Apple leaf                               82
Apple rust leaf                          78
Tomato Early blight leaf                 77
Tomato leaf yellow virus                 68
Bell_pepper leaf spot                    62
Corn Gray leaf spot                      61
grape leaf                               57
Soyabean leaf                            57
grape leaf black rot            

In [17]:
train_data[train_data.label == 'None']

Unnamed: 0,file,label
166,plantdoc1147.xml,
234,plantdoc1208.xml,
693,plantdoc1621.xml,
1248,plantdoc2120.xml,
1375,plantdoc2235.xml,
1623,plantdoc362.xml,
1664,plantdoc4.xml,
1789,plantdoc511.xml,
1863,plantdoc579.xml,
1873,plantdoc588.xml,


In [23]:
train_data.drop(labels=train_data[train_data.label == 'None'].index, axis=0, inplace=True)

In [24]:
train_data_labels_other = {'apple apple scab': 0,
                           'apple black rot': 1,
                           'apple cedar apple rust': 2,
                           'apple healthy': 3,
                           'blueberry healthy': 4,
                           'cherry (including sour) healthy': 5,
                           'cherry (including sour) powdery mildew': 6,
                           'corn (maize) cercospora leaf spot gray leaf spot': 7,
                           'corn (maize) common rust ': 8,
                           'corn (maize) healthy': 9,
                           'corn (maize) northern leaf blight': 10,
                           'grape black rot': 11,
                           'grape esca (black measles)': 12,
                           'grape healthy': 13,
                           'grape leaf blight (isariopsis leaf spot)': 14,
                           'orange haunglongbing (citrus greening)': 15,
                           'peach bacterial spot': 16,
                           'peach healthy': 17,
                           'pepper, bell bacterial spot': 18,
                           'pepper, bell healthy': 19,
                           'potato early blight': 20,
                           'potato healthy': 21,
                           'potato late blight': 22,
                           'raspberry healthy': 23,
                           'rice brownspot': 24,
                           'rice healthy': 25,
                           'rice hispa': 26,
                           'rice leafblast': 27,
                           'soybean healthy': 28,
                           'squash powdery mildew': 29,
                           'strawberry healthy': 30,
                           'strawberry leaf scorch': 31,
                           'tomato bacterial spot': 32,
                           'tomato early blight': 33,
                           'tomato healthy': 34,
                           'tomato late blight': 35,
                           'tomato leaf mold': 36,
                           'tomato septoria leaf spot': 37,
                           'tomato spider mites two-spotted spider mite': 38,
                           'tomato target spot': 39,
                           'tomato tomato mosaic virus': 40,
                           'tomato tomato yellow leaf curl virus': 41}

In [25]:
train_data.label.unique()

array(['Tomato Septoria leaf spot', 'Tomato leaf late blight',
       'Peach leaf', 'Corn rust leaf', 'Apple leaf', 'Corn leaf blight',
       'Potato leaf late blight', 'Bell_pepper leaf spot',
       'Apple rust leaf', 'Tomato mold leaf',
       'Squash Powdery mildew leaf', 'Tomato leaf yellow virus',
       'Potato leaf early blight', 'Cherry leaf',
       'Tomato leaf mosaic virus', 'Soyabean leaf',
       'Tomato leaf bacterial spot', 'Blueberry leaf', 'Bell_pepper leaf',
       'Apple Scab Leaf', 'Tomato Early blight leaf', 'Raspberry leaf',
       'Corn Gray leaf spot', 'grape leaf', 'Strawberry leaf',
       'grape leaf black rot', 'Tomato leaf',
       'Tomato two spotted spider mites leaf', 'Soybean leaf'],
      dtype=object)

In [13]:
generator = ImageDataGenerator(rotation_range=90,
                               width_shift_range=0.3,
                               height_shift_range=0.3,
                               brightness_range=(0.1, 1.0),
                               shear_range=0.3,
                               zoom_range=(0.3, 1.0),
                               channel_shift_range=150,
                               horizontal_flip=True,
                               vertical_flip=True,
                               fill_mode='wrap')

In [30]:
for sub in os.listdir('../data/new data/experiment'):
    files_in_sub = []
    dst_path = os.path.join('../data/new data/augmented', sub)
    os.mkdir(dst_path)

    for img_file in os.listdir(os.path.join('../data/new data/experiment', sub)):
        img = img_to_array(
            load_img(os.path.join('../data/new data/experiment/', sub, img_file), target_size=(256, 256)))
        files_in_sub.append(img)

    files_in_sub = np.asarray(files_in_sub)

    img_gen = generator.flow(files_in_sub,
                             save_to_dir=dst_path,
                             save_format='jpg',
                             save_prefix='aug-')

    for i in range(3):
        for j in range(len(img_gen)):
            img_gen.next()