In [None]:
import pandas as pd
import numpy as np
import os
import torch
from torchvision import transforms
from PIL import Image
from sklearn.svm import SVR
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
from sklearn.preprocessing import StandardScaler, MinMaxScaler, normalize
from sklearn.model_selection import train_test_split
from sklearn.metrics import root_mean_squared_error
from matplotlib import pyplot as plt

In [None]:
"""
Get data
"""
def loader(dataset_locs: str | list[str]) -> tuple[np.ndarray, np.ndarray]:
    """
    Load dataset
    Args:
        dataset_locs: Location of the CSV file containing information regarding the dataset.
    Return:
        X: (n, 4) data nparray with columns average red, average green, average blue, and lux in moisture
        y: (n,) label nparray with column moisture
    """
    X, y = [], []

    transform = transforms.ToTensor()

    if isinstance(dataset_locs, str):
        dataset_locs = [dataset_locs]
    for dataset_loc in dataset_locs:
        dataset = pd.read_csv(dataset_loc)

        paths = dataset.iloc[:, 2] # Get image paths
        moistures = dataset.iloc[:, 3] # Get moisture levels
        luxs = dataset.iloc[:, 4] # Get lux levels

        # Get data
        n = len(paths)
        for i in range(n):
            path = paths[i]
            moitsure = moistures[i]
            lux = luxs[i]

            # Get image
            if not os.path.exists(path):
                continue
            image = Image.open(path)

            # Crop image to center 120x120 px
            height, width = image.size
            left = (width - 120) / 2
            top = (height - 120) / 2
            right = (width + 120) / 2
            bottom = (height + 120) / 2
            image = image.crop((left, top, right, bottom))

            # Get average RGB
            image_tensor = transform(image)
            avg_rgb = (torch.mean(image_tensor, dim=(1, 2)) * 255).numpy()
            avg_r, avg_g, avg_b = avg_rgb
            
            datapoint = [avg_r, avg_g, avg_b, lux]
            X.append(datapoint)
            y.append(moitsure)
    
    X, y = np.array(X), np.array(y)
    return X, y

# paths = ['Data_i11_ds/dataset_i11_ds.csv']
# paths = ['Data_i11_is/dataset_i11_is.csv']
paths = ['Data_i11_ds/dataset_i11_ds.csv', 'Data_i11_is/dataset_i11_is.csv']
# paths = ['Data_fpbicc/Dataset_fpbicc_filtered.csv']
X, y = loader(paths)

In [None]:
"""
Preprocess data
"""
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

def preprocess(X_train, X_test, y_train, y_test):
    """
    Preprocess the data and labels.
    """
    y_train = y_train.reshape(-1, 1)
    y_test = y_test.reshape(-1, 1)

    X_train = scaler_X.fit_transform(X_train)
    X_test = scaler_X.transform(X_test)
    y_train = scaler_y.fit_transform(y_train)
    y_test = scaler_y.transform(y_test)

    return X_train, X_test, y_train.ravel(), y_test.ravel()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_train, X_test, y_train, y_test = preprocess(X_train, X_test, y_train, y_test)

In [None]:
fig, (ax1, ax2, ax3) = plt.subplots(1, 3)
ax1.scatter(y_train, X_train[:, 0], c='red')
ax1.set_xlabel("avg r")
ax1.set_ylabel("moisture")
ax2.scatter(y_train, X_train[:, 1], c='green')
ax2.set_xlabel("avg g")
ax3.scatter(X_train[:, 2], , c='blue')
ax3.set_xlabel("avg b")

In [None]:
"""
SVR model
"""
svr = SVR(kernel='rbf')
svr.fit(X_train, y_train)

In [None]:
"""
SVR test
"""
y_pred = svr.predict(X_test)

r2 = svr.score(X_test, y_test)
rmse = root_mean_squared_error(y_test, y_pred)

print('SVR r2: {}'.format(r2))
print('SVR Root Mean Squared Error: {}'.format(rmse))

# n = y_pred.shape[0]
# for i in range(n):
#     pred = y_pred[i]
#     actual = y_test[i]
#     print(f'Pred: {pred}, Actual: {actual}')