In [3]:
# to handle datasets
import pandas as pd
import numpy as np

from glob import glob
import os

import matplotlib.pyplot as plt
%matplotlib inline
import cv2

# to display all the columns of the dataframe in the notebook
pd.pandas.set_option('display.max_columns', None)

In [4]:
# data_preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# evaluate model and separate train and test
from sklearn.metrics import confusion_matrix

In [5]:
import warnings
warnings.filterwarnings('ignore')

In [6]:
# for the convolutional network
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, MaxPool2D, Flatten
from keras.optimizers import Adam
from keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from keras.preprocessing import image
import np_utils

# Load Images / Data

In [10]:
# here are all our images
DATA_FOLDER = 'v2-plant-seedling-dataset'

In [11]:
# each wee class is in a dedicated folder
os.listdir(DATA_FOLDER)

['Cleavers',
 'Sugar beet',
 'Common Chickweed',
 'Loose Silky-bent',
 'Scentless Mayweed',
 'Shepherd’s Purse',
 'Fat Hen',
 'Common wheat',
 'Black-grass',
 'nonsegmentedv2',
 'Small-flowered Cranesbill',
 'Charlock',
 'Maize']

In [12]:
# Let's walk over the directory structure, so we understand
# how the images are stored

for class_folder_name in os.listdir(DATA_FOLDER):
    class_folder_path = os.path.join(DATA_FOLDER, class_folder_name)
    for image_path in glob(os.path.join(class_folder_path, '*.png')):
        print(image_path)

v2-plant-seedling-dataset/Cleavers/348.png
v2-plant-seedling-dataset/Cleavers/176.png
v2-plant-seedling-dataset/Cleavers/88.png
v2-plant-seedling-dataset/Cleavers/162.png
v2-plant-seedling-dataset/Cleavers/189.png
v2-plant-seedling-dataset/Cleavers/77.png
v2-plant-seedling-dataset/Cleavers/63.png
v2-plant-seedling-dataset/Cleavers/228.png
v2-plant-seedling-dataset/Cleavers/200.png
v2-plant-seedling-dataset/Cleavers/214.png
v2-plant-seedling-dataset/Cleavers/215.png
v2-plant-seedling-dataset/Cleavers/201.png
v2-plant-seedling-dataset/Cleavers/229.png
v2-plant-seedling-dataset/Cleavers/62.png
v2-plant-seedling-dataset/Cleavers/188.png
v2-plant-seedling-dataset/Cleavers/76.png
v2-plant-seedling-dataset/Cleavers/163.png
v2-plant-seedling-dataset/Cleavers/177.png
v2-plant-seedling-dataset/Cleavers/89.png
v2-plant-seedling-dataset/Cleavers/149.png
v2-plant-seedling-dataset/Cleavers/161.png
v2-plant-seedling-dataset/Cleavers/175.png
v2-plant-seedling-dataset/Cleavers/60.png
v2-plant-seedling-

In [14]:
# Let's creare a dataframe:
# the dataframe stores the path to the image in one column
# and the class of the weed (the target) in the next column

images_df = []

# navigate within each folder
for class_folder_name in os.listdir(DATA_FOLDER):
    class_folder_path = os.path.join(DATA_FOLDER, class_folder_name)

    # collect everry image path
    for images_path in glob(os.path.join(class_folder_path, '*.png')):
        tmp = pd.DataFrame([image_path, class_folder_name]).T
        images_df.append(tmp)

# concatenate the final df
images_df = pd.concat(images_df, axis=0, ignore_index=True)
images_df.columns = ['image', 'target']
images_df.head(10)

Unnamed: 0,image,target
0,v2-plant-seedling-dataset/Maize/153.png,Cleavers
1,v2-plant-seedling-dataset/Maize/153.png,Cleavers
2,v2-plant-seedling-dataset/Maize/153.png,Cleavers
3,v2-plant-seedling-dataset/Maize/153.png,Cleavers
4,v2-plant-seedling-dataset/Maize/153.png,Cleavers
5,v2-plant-seedling-dataset/Maize/153.png,Cleavers
6,v2-plant-seedling-dataset/Maize/153.png,Cleavers
7,v2-plant-seedling-dataset/Maize/153.png,Cleavers
8,v2-plant-seedling-dataset/Maize/153.png,Cleavers
9,v2-plant-seedling-dataset/Maize/153.png,Cleavers
