In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, Conv2D, MaxPooling2D, Concatenate, Dropout
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import PIL
from PIL import Image

# ---- Load Dataset ----

image_folder = "/content/drive/MyDrive/fishpond_dataset/images"  # Folder containing images

df = pd.read_csv('/content/drive/MyDrive/fishpond_dataset/pond_dataset.csv', encoding='unicode_escape')

# ---- Separate Features ----
numerical_features = ["Temp", "TDS"]
image_column = "images"
target_column = "pH"

X_numerical = df[numerical_features].values  # Extract numerical data
image_paths = df[image_column].values  # Extract image filenames
y = df[target_column].values  # Extract target (pH values)

# ---- Train-Test Split ----
X_n_train, X_n_test, X_img_train, X_img_test, y_train, y_test = train_test_split(
    X_numerical, image_paths, y, test_size=0.2, random_state=42
)

# ---- Scale Numerical Features ----
scaler = StandardScaler()
X_n_train_scaled = scaler.fit_transform(X_n_train)
X_n_test_scaled = scaler.transform(X_n_test)

# ---- Load & Process Images ----
img_size = (128, 128)  # Image size for CNN input
def load_and_preprocess_image(image_name):
    # Include .jpg extension in image path
    img_path = os.path.join(image_folder, str(image_name) + ".jpg")

    # Check if file exists to prevent FileNotFoundError
    if os.path.exists(img_path):
        img = load_img(img_path, target_size=img_size)  # Load and resize image
        img = img_to_array(img) / 255.0  # Normalize pixel values
        return img
    else:
        print(f"Warning: Image file not found: {img_path}")
        return None  # Or handle the missing image appropriately

# Filter out missing images to ensure correct data alignment
valid_indices = [i for i, img in enumerate(X_img_train) if load_and_preprocess_image(img) is not None]

X_img_train_processed = np.array([load_and_preprocess_image(X_img_train[i]) for i in valid_indices])
X_n_train_filtered = X_n_train_scaled[valid_indices]
y_train_filtered = y_train[valid_indices]

valid_indices_test = [i for i, img in enumerate(X_img_test) if load_and_preprocess_image(img) is not None]

X_img_test_processed = np.array([load_and_preprocess_image(X_img_test[i]) for i in valid_indices_test])
X_n_test_filtered = X_n_test_scaled[valid_indices_test]
y_test_filtered = y_test[valid_indices_test]
# ---- Define CNN for Image Data ----
image_input = Input(shape=(img_size[0], img_size[1], 3), name="Image_Input")

x = Conv2D(32, (3,3), activation='relu', padding='same')(image_input)
x = MaxPooling2D((2,2))(x)
x = Conv2D(64, (3,3), activation='relu', padding='same')(x)
x = MaxPooling2D((2,2))(x)
x = Flatten()(x)

cnn_output = Dense(64, activation='relu')(x)

# ---- Define DNN for Numerical Data ----
numerical_input = Input(shape=(len(numerical_features),), name="Numerical_Input")

y = Dense(32, activation='relu')(numerical_input)
y = Dense(16, activation='relu')(y)

# ---- Merge CNN & DNN Outputs ----
merged = Concatenate()([cnn_output, y])
final_output = Dense(1, activation='linear')(merged)  # Linear activation for regression

# ---- Build & Compile Model ----
model = Model(inputs=[image_input, numerical_input], outputs=final_output)
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# ---- Train Model ----
history = model.fit(
    [X_img_train_processed, X_n_train_filtered], y_train_filtered,
    validation_data=([X_img_test_processed, X_n_test_filtered], y_test_filtered),
    epochs=50, batch_size=32, verbose=1
)

# ---- Evaluate Model ----
y_pred = model.predict([X_img_test_processed, X_n_test_filtered])
mae = mean_absolute_error(y_test_filtered, y_pred)
mse = mean_squared_error(y_test_filtered, y_pred)
rmse = np.sqrt(mse)

print(f"MAE: {mae:.3f}, MSE: {mse:.3f}, RMSE: {rmse:.3f}")

# ---- Plot Training History ----
plt.figure(figsize=(10,5))
plt.subplot(1,2,1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1,2,2)
plt.plot(history.history['mae'], label='Train MAE')
plt.plot(history.history['val_mae'], label='Validation MAE')
plt.title('Mean Absolute Error Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('MAE')
plt.legend()

plt.show()

# ---- Save Model ----
model.save("hybrid_ph_prediction.h5")
print("Model saved as 'hybrid_ph_prediction.h5'.")

FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/fishpond_dataset/pond_dataset.csv'

In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, Dense, Flatten, Conv2D, MaxPooling2D, Concatenate
)
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

# ---- Load Dataset ----
image_folder = "/content/drive/MyDrive/fishpond_dataset/images"

df = pd.read_csv('/content/drive/MyDrive/fishpond_dataset/pond_dataset.csv',
                 encoding='unicode_escape')

# ---- Separate Features ----
numerical_features = ["Temp", "TDS"]
image_column = "images"
target_column = "pH"

X_numerical = df[numerical_features].values  # Extract numerical data
image_paths = df[image_column].values  # Extract image filenames
y = df[target_column].values  # Extract target (pH values)

# ---- Train-Test Split ----
X_n_train, X_n_test, X_img_train, X_img_test, y_train, y_test = train_test_split(
    X_numerical, image_paths, y, test_size=0.2, random_state=42
)

# ---- Scale Numerical Features ----
scaler = StandardScaler()
X_n_train_scaled = scaler.fit_transform(X_n_train)
X_n_test_scaled = scaler.transform(X_n_test)

# ---- Load & Process Images ----
img_size = (128, 128)  # Image size for CNN input


def load_and_preprocess_image(image_name):
    img_path = os.path.join(image_folder, str(image_name) + ".jpg")

    if os.path.exists(img_path):
        img = load_img(img_path, target_size=img_size)
        img = img_to_array(img) / 255.0  # Normalize pixel values
        return img
    else:
        print(f"Warning: Image file not found: {img_path}")
        return None


# Filter out missing images to ensure correct data alignment
valid_indices = [i for i, img in enumerate(X_img_train) if load_and_preprocess_image(img) is not None]

X_img_train_processed = np.array([load_and_preprocess_image(X_img_train[i]) for i in valid_indices])
X_n_train_filtered = X_n_train_scaled[valid_indices]
y_train_filtered = y_train[valid_indices]

valid_indices_test = [i for i, img in enumerate(X_img_test) if load_and_preprocess_image(img) is not None]

X_img_test_processed = np.array([load_and_preprocess_image(X_img_test[i]) for i in valid_indices_test])
X_n_test_filtered = X_n_test_scaled[valid_indices_test]
y_test_filtered = y_test[valid_indices_test]

# ---- Define CNN for Image Data ----
image_input = Input(shape=(img_size[0], img_size[1], 3), name="Image_Input")

x = Conv2D(32, (3, 3), activation='relu', padding='same')(image_input)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2))(x)
x = Flatten()(x)

cnn_output = Dense(64, activation='relu')(x)

# ---- Define DNN for Numerical Data ----
numerical_input = Input(shape=(len(numerical_features),), name="Numerical_Input")

y = Dense(32, activation='relu')(numerical_input)
y = Dense(16, activation='relu')(y)

# ---- Merge CNN & DNN Outputs ----
merged = Concatenate()([cnn_output, y])
final_output = Dense(1, activation='linear')(merged)  # Linear activation for regression

# ---- Build & Compile Model ----
model = Model(inputs=[image_input, numerical_input], outputs=final_output)
model.compile(optimizer='adam', loss='mse', metrics=['mae', 'mse'])

# ---- Train Model ----
history = model.fit(
    [X_img_train_processed, X_n_train_filtered], y_train_filtered,
    validation_data=([X_img_test_processed, X_n_test_filtered], y_test_filtered),
    epochs=10, batch_size=32, verbose=1
)

# ---- Evaluate Model ----
y_pred = model.predict([X_img_test_processed, X_n_test_filtered])
mae = mean_absolute_error(y_test_filtered, y_pred)
mse = mean_squared_error(y_test_filtered, y_pred)
rmse = np.sqrt(mse)

print(f"MAE: {mae:.3f}, MSE: {mse:.3f}, RMSE: {rmse:.3f}")

# ---- Plot Training History ----
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['mae'], label='Train MAE')
plt.plot(history.history['val_mae'], label='Validation MAE')
plt.title('Mean Absolute Error Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('MAE')
plt.legend()

plt.show()

# ---- Save Model ----
#model.save("hybrid_ph_prediction.h5")
#print("Model saved as 'hybrid_ph_prediction.h5'.")


In [None]:
# Final Training and Validation Loss
final_train_loss = history.history['loss'][-1]
final_val_loss = history.history['val_loss'][-1]

print(f"Final Training Loss: {final_train_loss:.4f}")
print(f"Final Validation Loss: {final_val_loss:.4f}")


In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, Dense, Flatten, Conv2D, MaxPooling2D, Concatenate
)
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

# ---- Load Dataset ----
image_folder = "/content/drive/MyDrive/fishpond_dataset/images"

df = pd.read_csv('/content/drive/MyDrive/fishpond_dataset/pond_dataset.csv',
                 encoding='unicode_escape')

# ---- Separate Features ----
numerical_features = ["Temp", "TDS"]
image_column = "images"
target_columns = ["pH", "TDS"]

X_numerical = df[numerical_features].values  # Extract numerical data
image_paths = df[image_column].values  # Extract image filenames
y = df[target_columns].values  # Extract target (pH and TDS values)

# ---- Train-Test Split ----
X_n_train, X_n_test, X_img_train, X_img_test, y_train, y_test = train_test_split(
    X_numerical, image_paths, y, test_size=0.2, random_state=42
)

# ---- Scale Numerical Features ----
scaler = StandardScaler()
X_n_train_scaled = scaler.fit_transform(X_n_train)
X_n_test_scaled = scaler.transform(X_n_test)

# ---- Load & Process Images ----
img_size = (128, 128)  # Image size for CNN input

def load_and_preprocess_image(image_name):
    img_path = os.path.join(image_folder, str(image_name) + ".jpg")

    if os.path.exists(img_path):
        img = load_img(img_path, target_size=img_size)
        img = img_to_array(img) / 255.0  # Normalize pixel values
        return img
    else:
        print(f"Warning: Image file not found: {img_path}")
        return None

# Filter out missing images to ensure correct data alignment
valid_indices = [i for i, img in enumerate(X_img_train) if load_and_preprocess_image(img) is not None]
X_img_train_processed = np.array([load_and_preprocess_image(X_img_train[i]) for i in valid_indices])
X_n_train_filtered = X_n_train_scaled[valid_indices]
y_train_filtered = y_train[valid_indices]

valid_indices_test = [i for i, img in enumerate(X_img_test) if load_and_preprocess_image(img) is not None]
X_img_test_processed = np.array([load_and_preprocess_image(X_img_test[i]) for i in valid_indices_test])
X_n_test_filtered = X_n_test_scaled[valid_indices_test]
y_test_filtered = y_test[valid_indices_test]

# ---- Define CNN for Image Data ----
image_input = Input(shape=(img_size[0], img_size[1], 3), name="Image_Input")

x = Conv2D(32, (3, 3), activation='relu', padding='same')(image_input)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2))(x)
x = Flatten()(x)

cnn_output = Dense(64, activation='relu')(x)

# ---- Define DNN for Numerical Data ----
numerical_input = Input(shape=(len(numerical_features),), name="Numerical_Input")

y = Dense(32, activation='relu')(numerical_input)
y = Dense(16, activation='relu')(y)

# ---- Merge CNN & DNN Outputs ----
merged = Concatenate()([cnn_output, y])

# Define two outputs: one for pH and one for TDS
pH_output = Dense(1, activation='linear', name='pH')(merged)
TDS_output = Dense(1, activation='linear', name='TDS')(merged)

# ---- Build & Compile Model ----
model = Model(inputs=[image_input, numerical_input], outputs=[pH_output, TDS_output])
model.compile(optimizer='adam', loss='mse', metrics={'pH': ['mae', 'mse'], 'TDS': ['mae', 'mse']}) # Modified line

# ---- Train Model ----
history = model.fit(
    [X_img_train_processed, X_n_train_filtered], [y_train_filtered[:, 0], y_train_filtered[:, 1]],
    validation_data=([X_img_test_processed, X_n_test_filtered], [y_test_filtered[:, 0], y_test_filtered[:, 1]]),
    epochs=25, batch_size=32, verbose=1
)

# ---- Evaluate Model ----
y_pred = model.predict([X_img_test_processed, X_n_test_filtered])
pH_pred, TDS_pred = y_pred[0], y_pred[1]

# Calculate MAE for both pH and TDS
pH_mae = mean_absolute_error(y_test_filtered[:, 0], pH_pred)
TDS_mae = mean_absolute_error(y_test_filtered[:, 1], TDS_pred)

print(f"pH MAE: {pH_mae:.3f}, TDS MAE: {TDS_mae:.3f}")

# ---- Plot Training History ----
# ---- Plot Training History ----
plt.figure(figsize=(12, 6))

# Plot pH loss
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss (pH)', color='blue')
plt.plot(history.history['val_loss'], label='Validation Loss (pH)', color='orange')
plt.title('pH Loss Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

# Plot TDS loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss (TDS)', color='green')
plt.plot(history.history['val_loss'], label='Validation Loss (TDS)', color='red')
plt.title('TDS Loss Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

# ---- Plot MAE History ----
plt.figure(figsize=(12, 6))

# Plot pH MAE
plt.subplot(1, 2, 1)
plt.plot(history.history['pH_mae'], label='Train MAE (pH)', color='blue')
plt.plot(history.history['val_pH_mae'], label='Validation MAE (pH)', color='orange')
plt.title('pH MAE Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('MAE')
plt.legend()

# Plot TDS MAE
plt.subplot(1, 2, 2)
plt.plot(history.history['TDS_mae'], label='Train MAE (TDS)', color='green')
plt.plot(history.history['val_TDS_mae'], label='Validation MAE (TDS)', color='red')
plt.title('TDS MAE Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('MAE')
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
# Evaluate model on test data
final_mse, pH_mse, TDS_mse, pH_mae, pH_var, TDS_mae, TDS_var = model.evaluate(
    [X_img_test_processed, X_n_test_filtered],
    [y_test_filtered[:, 0], y_test_filtered[:, 1]],
    verbose=1
)

print(f"Final pH MSE: {pH_mse:.4f}, Final pH MAE: {pH_mae:.4f}")
print(f"Final TDS MSE: {TDS_mse:.4f}, Final TDS MAE: {TDS_mae:.4f}")