In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from  PIL import Image
import numpy as np
from tensorflow.keras import Input, Model
from tensorflow.keras.applications.efficientnet import EfficientNetB0
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import split_dataset

RSEED = 42
dataset_path = '../data/images/'

In [None]:


#import data

data = []

# Specify the path to your dataset
dataset_path = '../data/images/'

# Iterate through each plant folder
for plant_class in os.listdir(dataset_path):
    class_path = os.path.join(dataset_path, plant_class)
    
    # Iterate through each image in the plant folder
    for image_file in os.listdir(class_path):
        image_path = os.path.join(class_path, image_file)
        data.append({'Image_Path': image_path, 'Class': plant_class})

# Create a DataFrame from the list of dictionaries
df = pd.DataFrame(data)

df

In [None]:
# Check the distribution of classes
class_counts = df['Class'].value_counts()
plt.figure(figsize=(12, 12))
sns.countplot(y='Class', data=df, order=class_counts.index)
plt.title('Class Distribution')
plt.show()

In [None]:
# Extract plant types from class names
df['plant_type'] = df['Class'].apply(lambda x: x.split('___')[0])

# Count the occurrences of each plant type
plant_type_counts = df['plant_type'].value_counts()

# Display the count of each plant type
print(plant_type_counts)

In [None]:
problematic_rows = df[df['Class'].apply(lambda x: len(x.split('___')) < 2)]
print("Problematic Rows:")
print(problematic_rows)

In [None]:

#problematic_rows = df[df['Class'].str.contains('Grape_leaf_blight')]
df['Class'] = df['Class'].replace('Grape_leaf_blight', 'Grape___leaf_blight', regex=True)


In [None]:
# Extract disease types from class names
df['disease_type'] = df['Class'].apply(lambda x: x.split('___')[1])

# Count the occurrences of each disease type
disease_type_counts = df['disease_type'].value_counts()

# Display the count of each disease type
print(disease_type_counts)

In [None]:
df['disease_type'].nunique()

In [None]:

# Display sample images from each class
'''
class_folders = os.listdir(dataset_path)
for class_folder in class_folders:
    class_path = os.path.join(dataset_path, class_folder)
    sample_image = os.listdir(class_path)[0]
    image_path = os.path.join(class_path, sample_image)
    # Display the image
    img = Image.open(image_path)
    plt.imshow(img)
    plt.title(f"Class: {class_folder}")
    plt.show()
'''

In [None]:
labels = np.unique(df['disease_type']).tolist()

labels

In [None]:
test_data, train_data = ts.keras.utils.split_dataset()

In [None]:
import tensorflow as tf
from tensorflow import keras


In [None]:
def load_preprocess_train_data(data_path):
    ''' 
    Function needs filefath as parameter, it will create a training dataset of 80% of the total df, 
    Needs an RSEED as global variable,
    Image will be cropped to 1:1 and altered to 224 x 224
    '''
    image = tf.keras.utils.image_dataset_from_directory(
        data_path, 
        validation_split = 0.3,
        subset = "training", 
        seed = RSEED,
        image_size = (224, 224),
        crop_to_aspect_ratio = True,
    )
    return image
    #image = tf.image.flip_left_right(image)
    #

In [None]:
train_ds = load_preprocess_train_data(dataset_path)


In [None]:
print("Type of test_ds:", type(train_ds))

dataset_shape = tf.data.experimental.cardinality(train_ds).numpy()
print("Shape of test_ds:", dataset_shape)

In [None]:
def load_preprocess_validation_data_and_test(data_path):
    ''' 
    Function needs filefath as parameter, it will create a testing dataset of 20% of the total df, 
    Needs an RSEED as global variable,
    Image will be cropped to 1:1 and altered to 224 x 224
    '''
    image = tf.keras.utils.image_dataset_from_directory(
        data_path, 
        validation_split = 0.3,
        subset = "validation", 
        seed = RSEED,
        image_size = (224, 224),
        crop_to_aspect_ratio = True
    )
    return image


In [None]:
val_ds = load_preprocess_validation_data_and_test(dataset_path)

In [None]:
def extract_test_from_val(val_ds):
    image = tf.keras.utils.split_dataset(val_ds, left_size=0.5, shuffle=True, seed=RSEED)
    return image    

In [None]:
test_ds = extract_test_from_val(val_ds)

In [None]:
#train_ds = prepare(train_ds, shuffle=True, augment=True)
#val_ds = prepare(val_ds)
#test_ds = prepare(test_ds)

In [None]:
model = keras.applications.EfficientNetB0(
    include_top=False
)

In [None]:
model.summary()

In [None]:
#freezing of the weights in order not to retrain

model.trainable = False 

In [None]:
inputs = Input(shape = (224, 224, 3))

base = model(inputs)

flatten = GlobalAveragePooling2D()(base)

outputs = Dense(61, activation='softmax')(flatten)

model_enB0 = Model(inputs,outputs)


In [None]:
model_enB0.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])


In [None]:
#train_np = np.asarray(train)
#train_y = np.asarray(train_y)
#test_np = np.asarray(test)
#validation_y = np.asarray(validation_y)

In [None]:
history = model_enB0.fit(train,
          verbose=2, # how the training log should get printed 
          epochs=10,
          validation_data=test)

In [None]:
import tensorflow as tf

def extract_test_from_val(val_ds):
    # Shuffle the validation dataset
    val_ds = val_ds.shuffle(buffer_size=len(val_ds), seed=RSEED)

    # Split the validation dataset into two subsets
    test_ds = val_ds.take(len(val_ds) // 2)
    
    return test_ds

# Example usage
# Assuming val_ds is your validation dataset
test_ds = extract_test_from_val(val_ds)

In [None]:
# Assuming test_ds is your TensorFlow dataset object
# You can check the type of the dataset
print("Type of test_ds:", type(test_ds))

# You can check the shape of the dataset
dataset_shape = tf.data.experimental.cardinality(test_ds).numpy()
print("Shape of test_ds:", dataset_shape)