# Characterization of Inverse Problem in Color Reproduction Using RFR and SVR

In [1]:
import pandas as pd
from sklearn.metrics import mean_squared_error
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.model_selection import KFold

In [2]:
print('In your folder, there should be a TrainingRGB.xls, TrainingXYZ.xls, ValidationRGB.xls, and ValidationXYZ.xls.')

TrainingRGB = pd.read_excel('TrainingRGB.xls', header=None)
TrainingXYZ = pd.read_excel('TrainingXYZ.xls', header=None)

ValidationRGB = pd.read_excel('ValidationRGB.xls', header=None)
ValidationXYZ = pd.read_excel('ValidationXYZ.xls', header=None)

In your folder, there should be a TrainingRGB.xls, TrainingXYZ.xls, ValidationRGB.xls, and ValidationXYZ.xls.


# RFR

In [3]:
def custom_gradient_boosting_rfr(rf_regressor, samples, labels, hyperparameters, k=5):
    best_mse = float('inf')
    best_hyperparameters = {}
    prev_mse = float('inf')
    min_gradient_step = float('inf')
    tolerance = 1  # Tolerancia para el criterio de parada

    # Configurar K-Folds para la validación cruzada
    kf = KFold(n_splits=k, shuffle=True, random_state=42)

    while True:
        # Muestrear hiperparámetros de los rangos
        sampled_params = {
            param: np.random.randint(param_range[0], param_range[1] + 1)
            for param, param_range in hyperparameters.items()
        }

        rf_regressor.set_params(**sampled_params)

        # Realizar la validación cruzada
        mse_folds = []
        for train_index, val_index in kf.split(samples):
            X_train_fold, X_val_fold = samples.iloc[train_index], samples.iloc[val_index]
            y_train_fold, y_val_fold = labels.iloc[train_index], labels.iloc[val_index]

            rf_regressor.fit(X_train_fold, y_train_fold)
            predictions = rf_regressor.predict(X_val_fold)
            mse_fold = mean_squared_error(y_val_fold, predictions)
            mse_folds.append(mse_fold)

        mse_mean = np.mean(mse_folds)

        # Verificar si esta combinación de hiperparámetros es la mejor hasta ahora
        if mse_mean < best_mse:
            best_mse = mse_mean
            best_hyperparameters = sampled_params

        # Calcular el paso del gradiente
        gradient_step = abs(prev_mse - mse_mean)
        if gradient_step < min_gradient_step:
            min_gradient_step = gradient_step

        # Verificar el criterio de parada
        if gradient_step < tolerance:
            break

        prev_mse = mse_mean

    return best_hyperparameters, best_mse, min_gradient_step


In [9]:
# Crear diccionario para almacenar las predicciones
predictions_dict = {}

# Obtener el número de iteraciones del usuario
iterations = int(input('How many iterations do you want? '))

channels = ['Red', 'Green', 'Blue']
best_results = {}

for i, channel in enumerate(channels):
    best_overall_hyperparameters = None
    best_overall_mse = float('inf')
    best_overall_min_gradient_step = None

    for j in range(iterations):
        print(f"Iteration {j+1}/{iterations} for {channel} channel")
        
        x_train = TrainingXYZ.iloc[:, :3]  
        y_train = TrainingRGB.iloc[:, i] 

        x_val = ValidationXYZ.iloc[:, :3] 
        y_val = ValidationRGB.iloc[:, i] 

        # Inicializar Random Forest Regressor
        random_forest = RandomForestRegressor(random_state=42)

        # Definir el espacio de búsqueda de hiperparámetros (rangos)
        hyperparameters = {
            'n_estimators': [10, 100],  # Rango de 10 a 100
            'max_depth': [10, 100],  # Rango de 10 a 100
            'min_samples_split': [2, 10],  # Rango de 2 a 10
            'min_samples_leaf': [1, 10]  # Rango de 1 a 10
        }

        # Explorar hiperparámetros y seleccionar el mejor resultado
        best_hyperparameters, best_mse, min_gradient_step = custom_gradient_boosting_rfr(random_forest, x_train, y_train, hyperparameters, k=5)
        
        # Verificar si esta iteración tiene el mejor MSE hasta ahora
        if best_mse < best_overall_mse:
            best_overall_hyperparameters = best_hyperparameters
            best_overall_mse = best_mse
            best_overall_min_gradient_step = min_gradient_step

    # Entrenar Random Forest con los mejores hiperparámetros obtenidos en todas las iteraciones
    best_rf = RandomForestRegressor(**best_overall_hyperparameters, random_state=42)
    best_rf.fit(x_train, y_train)

    # Hacer predicciones en el conjunto de validación
    predictions_RFR = best_rf.predict(x_val)
    predictions_dict[channel] = predictions_RFR.round()
    
    # Guardar los mejores resultados
    best_results[channel] = {
        'Best Hyperparameters': best_overall_hyperparameters,
        'Mean Squared Error': best_overall_mse,
        'Minimum Gradient Step': best_overall_min_gradient_step
    }

# Crear el DataFrame con las predicciones
predictions_df = pd.DataFrame(predictions_dict)

# Guardar el DataFrame en un archivo Excel
predictions_df.to_excel('Predictions_RFR.xlsx', index=False)

# Mostrar los mejores resultados para cada canal
for channel, result in best_results.items():
    print(f"\nBest results for RFR {channel} channel:")
    print("Best RFR Hyperparameters:", result['Best Hyperparameters'])
    print("Mean Squared Error for each RFR RGB channel:", result['Mean Squared Error'])

print("File 'Predictions_RFR.xlsx' successfully created.")


How many iterations do you want? 2
Iteration 1/2 for Red channel
Iteration 2/2 for Red channel
Iteration 1/2 for Green channel
Iteration 2/2 for Green channel
Iteration 1/2 for Blue channel
Iteration 2/2 for Blue channel

Best results for RFR Red channel:
Best RFR Hyperparameters: {'n_estimators': 83, 'max_depth': 20, 'min_samples_split': 4, 'min_samples_leaf': 1}
Mean Squared Error for each RFR RGB channel: 50.10007569775935

Best results for RFR Green channel:
Best RFR Hyperparameters: {'n_estimators': 98, 'max_depth': 41, 'min_samples_split': 3, 'min_samples_leaf': 3}
Mean Squared Error for each RFR RGB channel: 17.18352938393928

Best results for RFR Blue channel:
Best RFR Hyperparameters: {'n_estimators': 92, 'max_depth': 12, 'min_samples_split': 8, 'min_samples_leaf': 10}
Mean Squared Error for each RFR RGB channel: 6.383870039085738
Archivo 'Predictions_RFR.xlsx' creado con éxito.


# SVR

In [3]:
def custom_gradient_boosting_svr(svr_regressor, samples, labels, param_ranges, k=5):
    best_mse = float('inf')
    best_hyperparameters = {}
    prev_mse = float('inf')
    min_gradient_step = float('inf')
    tolerance = 1  # Tolerancia para el criterio de parada

    # Configurar K-Folds para la validación cruzada
    kf = KFold(n_splits=k, shuffle=True, random_state=42)

    while True:
        # Muestrear hiperparámetros de los rangos
        sampled_params = {
            param: np.random.uniform(param_range[0], param_range[1])
            for param, param_range in param_ranges.items() if param != 'kernel'
        }

        # Muestrear kernel de la lista de posibles kernels
        sampled_params['kernel'] = np.random.choice(param_ranges['kernel'])

        svr_regressor.set_params(**sampled_params)

        # Realizar la validación cruzada
        mse_folds = []
        for train_index, val_index in kf.split(samples):
            X_train_fold, X_val_fold = samples.iloc[train_index], samples.iloc[val_index]
            y_train_fold, y_val_fold = labels.iloc[train_index], labels.iloc[val_index]

            svr_regressor.fit(X_train_fold, y_train_fold)
            predictions = svr_regressor.predict(X_val_fold)
            mse_fold = mean_squared_error(y_val_fold, predictions)
            mse_folds.append(mse_fold)

        mse_mean = np.mean(mse_folds)

        # Verificar si esta combinación de hiperparámetros es la mejor hasta ahora
        if mse_mean < best_mse:
            best_mse = mse_mean
            best_hyperparameters = sampled_params

        # Calcular el paso del gradiente
        gradient_step = abs(prev_mse - mse_mean)
        if gradient_step < min_gradient_step:
            min_gradient_step = gradient_step

        # Verificar el criterio de parada
        if gradient_step < tolerance:
            break

        prev_mse = mse_mean

    return best_hyperparameters, best_mse, min_gradient_step

In [4]:
# Crear diccionario para almacenar las predicciones
predictions_dict = {}

# Obtener el número de iteraciones del usuario
iterations = int(input('How many iterations do you want? '))

channels = ['Red', 'Green', 'Blue']
best_results = {}

for i, channel in enumerate(channels):
    best_overall_hyperparameters = None
    best_overall_mse = float('inf')
    best_overall_min_gradient_step = None

    for j in range(iterations):
        print(f"Iteration {j+1}/{iterations} for {channel} channel")
        
        x_train = TrainingXYZ.iloc[:, :3]  
        y_train = TrainingRGB.iloc[:, i] 

        x_val = ValidationXYZ.iloc[:, :3] 
        y_val = ValidationRGB.iloc[:, i] 

        # Inicializar SVR Regressor
        svm_regressor = SVR()

        # Definir el espacio de búsqueda de hiperparámetros (rangos)
        param_ranges = {
            'C': [1, 15],        # Rango de 1 a 15
            'epsilon': [0.1, 1],  # Rango de 0.1 a 1
            'kernel': ['linear', 'poly', 'rbf', 'sigmoid']  # Lista de posibles kernels 
        }

        # Explorar hiperparámetros y seleccionar el mejor resultado
        best_hyperparameters, best_mse, min_gradient_step = custom_gradient_boosting_svr(svm_regressor, x_train, y_train, param_ranges, k=5)
        
        # Verificar si esta iteración tiene el mejor MSE hasta ahora
        if best_mse < best_overall_mse:
            best_overall_hyperparameters = best_hyperparameters
            best_overall_mse = best_mse
            best_overall_min_gradient_step = min_gradient_step

    # Entrenar SVR con los mejores hiperparámetros obtenidos en todas las iteraciones
    best_svr = SVR(**best_overall_hyperparameters)
    best_svr.fit(x_train, y_train)

    # Hacer predicciones en el conjunto de validación
    predictions_SVR = best_svr.predict(x_val)
    predictions_dict[channel] = predictions_SVR.round()
    
    # Guardar los mejores resultados
    best_results[channel] = {
        'Best Hyperparameters': best_overall_hyperparameters,
        'Mean Squared Error': best_overall_mse,
        'Minimum Gradient Step': best_overall_min_gradient_step
    }

# Crear el DataFrame con las predicciones
predictions_df = pd.DataFrame(predictions_dict)

# Guardar el DataFrame en un archivo Excel
predictions_df.to_excel('Predictions_SVR.xlsx', index=False)

# Mostrar los mejores resultados para cada canal
for channel, result in best_results.items():
    print(f"\nBest results for SVR {channel} channel:")
    print("Best SVR Hyperparameters:", result['Best Hyperparameters'])
    print("Mean Squared Error for each SVR RGB channel:", result['Mean Squared Error'])

print("File 'Predictions_SVR.xlsx' successfully created.")


How many iterations do you want? 2
Iteration 1/2 for Red channel
Iteration 2/2 for Red channel
Iteration 1/2 for Green channel
Iteration 2/2 for Green channel
Iteration 1/2 for Blue channel
Iteration 2/2 for Blue channel

Best results for SVR Red channel:
Best SVR Hyperparameters: {'C': 1.0, 'epsilon': 0.46453976670634456, 'kernel': 'linear'}
Mean Squared Error for each SVR RGB channel: 318.35765597526586

Best results for SVR Green channel:
Best SVR Hyperparameters: {'C': 1.0, 'epsilon': 0.24883025887488486, 'kernel': 'rbf'}
Mean Squared Error for each SVR RGB channel: 190.56194140536527

Best results for SVR Blue channel:
Best SVR Hyperparameters: {'C': 1.0, 'epsilon': 0.306728178409453, 'kernel': 'rbf'}
Mean Squared Error for each SVR RGB channel: 78.17538316786737
File 'Predictions_SVR.xlsx' successfully created.
