In [None]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras import Input
from tensorflow.keras.layers import Conv2D, BatchNormalization, MaxPooling2D, GlobalAveragePooling2D, Dense, Dropout, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy, SparseCategoricalCrossentropy
from tensorflow.keras.metrics import BinaryAccuracy, FalseNegatives
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import root_mean_squared_error
from matplotlib import pyplot as plt

In [None]:
"""
Get data
"""
def loader(dataset_locs: str | list[str]) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Load dataset
    Args:
        dataset_locs: Location of the CSV file containing information regarding the dataset.
    Return:
        soil_images: (n, 120, 120, 3) nparray of rgb values for each image
        lux_values: (n, 1) nparray of lux value of each image
        moisture_labels: (n, 1) nparray of moisture value of each image
    """
    soil_images, lux_values, moisture_labels = [], [], []

    if isinstance(dataset_locs, str):
        dataset_locs = [dataset_locs]
    for dataset_loc in dataset_locs:
        dataset = pd.read_csv(dataset_loc)

        paths = dataset.iloc[:, 2] # Get image paths
        moistures = dataset.iloc[:, 3] # Get moisture levels
        luxs = dataset.iloc[:, 4] # Get lux levels

        # Get data
        n = len(paths)
        for i in range(n):
            path = paths[i]
            moisture = moistures[i]
            lux = luxs[i]

            if not os.path.exists(path):
                print(path)
                continue

            image = tf.io.read_file(path)
            image = tf.io.decode_png(image, channels=3)
            
            # Crop image to center 120x120 px
            height, width, _ = tf.shape(image)
            offset_height = (height - 120) // 2
            offset_width = (width - 120) // 2
            image = tf.image.crop_to_bounding_box(image, offset_height, offset_width, 120, 120)

            image = tf.cast(image, tf.uint8)
            
            soil_images.append(image)
            moisture_labels.append(moisture)
            lux_values.append(lux)

    soil_images, lux_values, moisture_labels = np.array(soil_images), np.array(lux_values).reshape(-1, 1), np.array(moisture_labels).reshape(-1, 1)
    return soil_images, lux_values, moisture_labels

paths = ['Data_i11_ds/dataset_i11_ds.csv']
# paths = ['Data_i11_is/dataset_i11_is.csv']
# paths = ['Data_i11_ds/dataset_i11_ds.csv', 'Data_i11_is/dataset_i11_is.csv']
# paths = ['Data_fpbicc/Dataset_fpbicc.csv']
# paths = ['Data_fpbicc/Dataset_fpbicc_filtered.csv']
soil_images, lux_values, moisture_labels = loader(paths)

print('Image data: {}\nLux data: {}\nMoisture data: {}'.format(soil_images.shape, lux_values.shape, moisture_labels.shape))
plt.hist(moisture_labels)
plt.title('Moisture')
plt.show()

In [None]:
"""
Preprocess data
"""
scaler_images = MinMaxScaler()
scaler_lux = StandardScaler()
scaler_moisture = StandardScaler()

def clean_iqr(soil_images_train, lux_values_train, moisture_labels_train):
    """
    Remove outliers from the dataset.
    """
    Q1 = np.percentile(moisture_labels_train, 25)
    Q3 = np.percentile(moisture_labels_train, 75)
    mask = (moisture_labels_train >= Q1) & (moisture_labels_train <= Q3)
    mask = mask.ravel()
    soil_images_train = soil_images_train[mask]
    lux_values_train = lux_values_train[mask]
    moisture_labels_train = moisture_labels_train[mask]
    return soil_images_train, lux_values_train, moisture_labels_train

def preprocess(soil_images_train, soil_images_test, lux_values_train, lux_values_test, moisture_labels_train):
    """
    Preprocess the data and labels.
    """
    # soil_images_train, lux_values_train, moisture_labels_train = clean_iqr(soil_images_train, lux_values_train, moisture_labels_train)

    n1 = soil_images_train.shape[0]
    soil_images_train = soil_images_train.reshape(-1, 3)
    soil_images_train = scaler_images.fit_transform(soil_images_train)
    soil_images_train = soil_images_train.reshape(n1, 120, 120, 3)

    n2 = soil_images_test.shape[0]
    soil_images_test = soil_images_test.reshape(-1, 3)
    soil_images_test = scaler_images.transform(soil_images_test)
    soil_images_test = soil_images_test.reshape(n2, 120, 120, 3)
    
    lux_values_train = scaler_lux.fit_transform(lux_values_train)
    lux_values_test = scaler_lux.transform(lux_values_test)
    
    moisture_labels_train = scaler_moisture.fit_transform(moisture_labels_train)

    return soil_images_train, soil_images_test, lux_values_train, lux_values_test, moisture_labels_train

soil_images_train, soil_images_test, lux_values_train, lux_values_test, moisture_labels_train, moisture_labels_test = train_test_split(soil_images, lux_values, moisture_labels, test_size=0.2)
soil_images_train, soil_images_test, lux_values_train, lux_values_test, moisture_labels_train = preprocess(soil_images_train, soil_images_test, lux_values_train, lux_values_test, moisture_labels_train)

In [None]:
def soil_moisture_model():
    """
    CNN model architecture
    """
    # Input layers
    soil_img_input = Input(shape=(120, 120, 3))
    lux_value_input = Input(shape=(1,))

    # Layers for soil image
    x1 = Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same')(soil_img_input)
    x1 = BatchNormalization()(x1)
    x1 = MaxPooling2D(pool_size=(2, 2))(x1)
    x1 = Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(x1)
    x1 = BatchNormalization()(x1)
    x1 = MaxPooling2D(pool_size=(2, 2))(x1)
    x1 = Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same')(x1)
    x1 = BatchNormalization()(x1)
    x1 = GlobalAveragePooling2D()(x1)

    # Layers for lux value
    x2 = Dense(units=128, activation='relu')(lux_value_input)
    x2 = Dropout(rate=0.3)(x2)
    x2 = Dense(units=64, activation='relu')(x2)
    x2 = Dense(units=32, activation='relu')(x2)

    # Output layer
    concatenated = Concatenate()([x1, x2])
    output = Dense(units=1, activation='tanh')(concatenated)

    # Create model
    model = Model(inputs=[soil_img_input, lux_value_input], outputs=output)
    return model

model = soil_moisture_model()
model.summary()

In [5]:
"""
Train model
"""
# Compile and train
model.compile(
    optimizer=Adam(learning_rate=1e-3),
    loss='mse',
    metrics=['accuracy']
)

history = model.fit(
    x=[soil_images_train, lux_values_train],
    y=moisture_labels_train,
    batch_size=32,
    epochs=100,
    validation_data=([soil_images_test, lux_values_test], moisture_labels_test),
)

model.save_weights('cnn.weights.h5')

KeyboardInterrupt: 

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
"""
Evaluate model
"""
model.load_weights('cnn.weights.h5')

moisture_pred_scaled = model.predict(
    x=[soil_images_test, lux_values_test]
)
moisture_pred = scaler_moisture.inverse_transform(moisture_pred_scaled)
rmse = root_mean_squared_error(moisture_labels_test, moisture_pred)
percent_error = (moisture_pred.ravel() - moisture_labels_test.ravel()) / moisture_labels_test.ravel()
average_percent_error = np.mean(np.abs(percent_error))

print('Pred: {}'.format(moisture_pred.ravel()))
print('Actual: {}'.format(moisture_labels_test.ravel()))
print('Root Mean Squared Error: {}'.format(rmse))
print('Percent Error: {}'.format(np.round(percent_error, 2)))
print('Average Percent Error: {}'.format(average_percent_error))