In [39]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Data Preparation

In [40]:
# Create dictionary for category classes
output_dict = {0 : 'aloevera',
                1 : 'banana',
                2 : 'bilimbi',
                3 : 'cantaloupe',
                4 : 'cassava',
                5 : 'coconut',
                6 : 'corn',
                7 : 'cucumber',
                8 : 'curcuma',
                9 : 'eggplant',
                10 : 'galangal',
                11 : 'ginger',
                12 : 'guava',
                13 : 'kale',
                14 : 'longbeans',
                15 : 'mango',
                16 : 'melon',
                17 : 'orange',
                18 : 'paddy',
                19 : 'papaya',
                20 : 'peperchili',
                21 : 'pineapple',
                22 : 'pomelo',
                23 : 'shallot',
                24 : 'soybeans',
                25 : 'spinach',
                26 : 'sweetpotatoes',
                27 : 'tobacco',
                28 : 'waterapple',
                29 : 'watermelon'}

In [41]:
def map_values(category_num):
    return output_dict.get(category_num)

In [42]:
# Load and preprocess images from file paths
def load_image(file_path, target_size=(32, 32)):
    image = load_img(file_path, target_size=target_size)
    image_array = img_to_array(image) / 255.0
    return image_array

In [43]:
def prepare_data(file_path):
    # Read the csv file
    df = pd.read_csv(file_path)

    # Trim dataset for faster testing for now
    df = df.sample(n=100, random_state=42)

    # Add new column for name of class category
    df['label'] = df['category'].apply(map_values)

    # Append data/ in front of every image file path
    df['image:FILE'] = 'data/' + df['image:FILE']
    
    # Add new column with loaded image
    df['image'] = df['image:FILE'].apply(lambda x: load_image(x))

    # Convert images to numpy arrays
    X = np.array(df['image'])
    y = np.array(df['label'])

    return X, y


In [51]:
X_train, y_train = prepare_data('data/test.csv')
X_val, y_val = prepare_data('data/val.csv')
X_test, y_test = prepare_data('data/test.csv')

(100,)

# Train Model

In [45]:
def CNN_train():
    # Define the CNN architecture
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(30)  # 30 unique labels in dataset
    ])

    # Compile the model
    model.compile(optimizer='adam',
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=['accuracy'])
    
    # Train the model
    model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))

    # Evaluate the model
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
    print(f'Test accuracy: {test_acc}')

    # Save the model
    #model.save('CNN_model.h5')

In [47]:
CNN_train()

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type numpy.ndarray).