In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
base_dir = '../data/planttraits2024'
train_images_path = '/train_images'
test_images_path = '/test_images'
data_dict = '/target_name_meta.tsv'
test_data = '/test.csv'
train_data = '/train.csv'

In [3]:
df_dict = pd.read_csv(base_dir+data_dict, sep='\t')
df_train = pd.read_csv(base_dir+train_data)
df_test = pd.read_csv(base_dir+test_data)

In [4]:
for i in range(df_dict.shape[0]):
    print(f'{df_dict.values[i][0]}: {df_dict.values[i][1]}')

X4: Stem specific density (SSD) or wood density (stem dry mass per stem fresh volume) 
X11: Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA) 
X18: Plant height 
X26: Seed dry mass 
X50: Leaf nitrogen (N) content per leaf area 
X3112: Leaf area (in case of compound leaves: leaf, undefined if petiole in- or excluded) 


In [5]:
train_images = os.listdir(base_dir+train_images_path)
test_images = os.listdir(base_dir+train_images_path)

In [6]:
len(train_images)

55489

## CNN

In [7]:
target_vars = ['X4_mean', 'X11_mean', 
               'X18_mean', 'X26_mean', 
               'X50_mean', 'X3112_mean']

# X = df_train.drop(X_remove, axis=1)
y = df_train[target_vars]

In [12]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import os
import glob
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.callbacks import Callback
from PIL import ImageOps

# Define a custom callback to print metrics at the end of each epoch
class PrintEpochMetrics(Callback):
    def on_epoch_end(self, epoch, logs=None):
        # `logs` contains all metrics at the end of an epoch
        loss = logs.get('loss', 0)
        mae = logs.get('mae', 0)
        val_loss = logs.get('val_loss', 0)
        val_mae = logs.get('val_mae', 0)

        print(f"Epoch {epoch + 1}: Loss={loss:.4f}, MAE={mae:.4f}, "
              f"Val Loss={val_loss:.4f}, Val MAE={val_mae:.4f}")

print('starting')
# Constants
IMG_SIZE = (512, 512)
NUM_CHANNELS = 3  # Assuming RGB images
BATCH_SIZE = 64
EPOCHS = 10  # You can adjust this based on your needs

# Data loading and preprocessing
def load_data(image_folder, labels, img_size):
    # Get all image file paths
    image_paths = glob.glob(os.path.join(image_folder, '*.jpeg'))
    images = []
    
    total = len(image_paths)
    
    # Load and resize images
    for i, path in enumerate(image_paths):
        img = load_img(path)  # Load and resize the image
#         img = ImageOps.grayscale(img)
        img = img.resize(img_size)
        img_array = img_to_array(img) / 255.0  # Normalize
        images.append(img_array)
        print(f'{i} of {total}', end='\r')
    
    return np.array(images), labels

print('data loading')
# Load images and labels
image_folder = base_dir+train_images_path+"/"  # Your folder containing images
y_train = df_train[target_vars]

# Ensure images and labels are in the same order
images, labels = load_data(image_folder, y_train, IMG_SIZE)

# Split data into training and validation sets
x_train, x_val, y_train, y_val = train_test_split(images, labels, test_size=0.2, random_state=42)

# Define the CNN model
class SimpleCNN(keras.Model):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')
        self.pool1 = layers.MaxPooling2D((2, 2))
        self.conv2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')
        self.pool2 = layers.MaxPooling2D((2, 2))
        self.flatten = layers.Flatten()
        self.dense1 = layers.Dense(128, activation='relu')
        self.dense2 = layers.Dense(6, activation='linear')  # Output layer with 6 outputs

    def call(self, inputs):
        x = self.conv1(inputs)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.flatten(x)
        x = self.dense1(x)
        return self.dense2(x)

print(' Initialize the model')
model = SimpleCNN()

print('Compile the model')
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

print('Training with the custom callback')
model.fit(x_train, y_train, validation_data=(x_val, y_val), 
          epochs=EPOCHS, batch_size=BATCH_SIZE, 
          callbacks=[PrintEpochMetrics()])

print('Predict on the validation set')
y_pred = model.predict(x_val)

print('Calculate R² score')
r2 = r2_score(y_val, y_pred)

print('Calculate MSE and MAE')
mse = mean_squared_error(y_val, y_pred)
mae = mean_absolute_error(y_val, y_pred)

print(f"R² Score: {r2:.2f}")
print(f"Mean Squared Error: {mse:.2f}")
print(f"Mean Absolute Error: {mae:.2f}")


starting
data loading
7806 of 55489

: 

aa