# Characterization of Inverse Problem in Color Reproduction Using RFR and SVR

In [1]:
import pandas as pd
from sklearn.metrics import mean_squared_error
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR

In [2]:
print('In your folder, there should be a TrainingRGB.xls, TrainingXYZ.xls, ValidationRGB.xls, and ValidationXYZ.xls.')

TrainingRGB = pd.read_excel('TrainingRGB.xls', header=None)
TrainingXYZ = pd.read_excel('TrainingXYZ.xls', header=None)

ValidationRGB = pd.read_excel('ValidationRGB.xls', header=None)
ValidationXYZ = pd.read_excel('ValidationXYZ.xls', header=None)

In your folder, there should be a TrainingRGB.xls, TrainingXYZ.xls, ValidationRGB.xls, and ValidationXYZ.xls.


# RFR

In [3]:
def custom_gradient_boosting_rfr(rf_regressor, samples, labels, hyperparameters):
    best_mse = float('inf')
    best_hyperparameters = {}
    prev_mse = float('inf')
    min_gradient_step = float('inf')
    tolerance = 1  # Tolerance for stopping criteria

    while True:
        # Sample hyperparameters from the ranges
        sampled_params = {
            param: np.random.randint(param_range[0], param_range[1] + 1)
            for param, param_range in hyperparameters.items()
        }
        
        rf_regressor.set_params(**sampled_params)
            
        # Train the Random Forest with the sampled hyperparameters
        rf_regressor.fit(samples, labels)
            
        # Predictions
        predictions = rf_regressor.predict(samples)
        mse = mean_squared_error(labels, predictions, multioutput='raw_values')
            
        # Check if this combination of hyperparameters is the best so far
        if np.mean(mse) < best_mse:
            best_mse = np.mean(mse)
            best_hyperparameters = sampled_params
        
        # Calculate gradient step
        gradient_step = abs(prev_mse - np.mean(mse))
        if gradient_step < min_gradient_step:
            min_gradient_step = gradient_step
        
        # Check stopping criteria
        if gradient_step < tolerance:
            break
            
        prev_mse = np.mean(mse)
    
    return best_hyperparameters, best_mse, min_gradient_step

In [12]:
# Crear diccionario para almacenar las predicciones
predictions_dict = {}

# Obtener el número de iteraciones del usuario
iterations = int(input('How many iterations do you want? '))

channels = ['Red', 'Green', 'Blue']
best_results = {}

for i, channel in enumerate(channels):
    best_overall_hyperparameters = None
    best_overall_mse = float('inf')
    best_overall_min_gradient_step = None

    for j in range(iterations):
        print(f"Iteration {j+1}/{iterations} for {channel} channel")
        
        x_train = TrainingXYZ.iloc[:, :3]  
        y_train = TrainingRGB.iloc[:, i] 

        x_val = ValidationXYZ.iloc[:, :3] 
        y_val = ValidationRGB.iloc[:, i] 

        # Inicializar Random Forest Regressor
        random_forest = RandomForestRegressor(random_state=42)

        # Definir el espacio de búsqueda de hiperparámetros (rangos)
        hyperparameters = {
            'n_estimators': [10, 10],  # Rango de 10 a 100
            'max_depth': [10, 10],  # Rango de 10 a 100
            'min_samples_split': [2, 2],  # Rango de 2 a 10
            'min_samples_leaf': [1, 10]  # Rango de 1 a 10
        }

        # Explorar hiperparámetros y seleccionar el mejor resultado
        best_hyperparameters, best_mse, min_gradient_step = custom_gradient_boosting_rfr(random_forest, x_train, y_train, hyperparameters)
        
        # Verificar si esta iteración tiene el mejor MSE hasta ahora
        if best_mse < best_overall_mse:
            best_overall_hyperparameters = best_hyperparameters
            best_overall_mse = best_mse
            best_overall_min_gradient_step = min_gradient_step

    # Entrenar Random Forest con los mejores hiperparámetros obtenidos en todas las iteraciones
    best_rf = RandomForestRegressor(**best_overall_hyperparameters, random_state=42)
    best_rf.fit(x_train, y_train)

    # Hacer predicciones en el conjunto de validación
    predictions_RFR = best_rf.predict(x_val)
    predictions_dict[channel] = predictions_RFR.round()
    
    # Guardar los mejores resultados
    best_results[channel] = {
        'Best Hyperparameters': best_overall_hyperparameters,
        'Mean Squared Error': best_overall_mse,
        'Minimum Gradient Step': best_overall_min_gradient_step
    }

# Crear el DataFrame con las predicciones
predictions_df = pd.DataFrame(predictions_dict)

# Guardar el DataFrame en un archivo Excel
predictions_df.to_excel('Predictions_RFR.xlsx', index=False)

# Mostrar los mejores resultados para cada canal
for channel, result in best_results.items():
    print(f"\nBest results for RFR {channel} channel:")
    print("Best RFR Hyperparameters:", result['Best Hyperparameters'])
    print("Mean Squared Error for each RFR RGB channel:", result['Mean Squared Error'])

print("Archivo 'Predictions_RFR.xlsx' creado con éxito.")


How many iterations do you want? 2
Iteration 1/2 for Red channel
Iteration 2/2 for Red channel
Iteration 1/2 for Green channel
Iteration 2/2 for Green channel
Iteration 1/2 for Blue channel
Iteration 2/2 for Blue channel

Best results for RFR Red channel:
Best RFR Hyperparameters: {'n_estimators': 10, 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 2}
Mean Squared Error for each RFR RGB channel: 56.970838632622666

Best results for RFR Green channel:
Best RFR Hyperparameters: {'n_estimators': 10, 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 1}
Mean Squared Error for each RFR RGB channel: 15.568856693192258

Best results for RFR Blue channel:
Best RFR Hyperparameters: {'n_estimators': 10, 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 4}
Mean Squared Error for each RFR RGB channel: 3.9090087262706574
Archivo 'Predictions_RFR.xlsx' creado con éxito.


# SVR

In [5]:
def custom_gradient_boosting_svr(svr_regressor, samples, labels, param_ranges):
    best_mse = float('inf')
    best_hyperparameters = {}
    prev_mse = float('inf')
    min_gradient_step = float('inf')
    tolerance = 1  # Tolerance for stopping criteria

    while True:
        # Sample hyperparameters from the ranges
        sampled_params = {
            param: np.random.uniform(param_range[0], param_range[1])
            for param, param_range in param_ranges.items() if param != 'kernel'
        }
        
        # Sample kernel from list of possible kernels
        sampled_params['kernel'] = np.random.choice(param_ranges['kernel'])
        
        svr_regressor.set_params(**sampled_params)
            
        # Train the SVR with the sampled hyperparameters
        svr_regressor.fit(samples, labels)
            
        # Predictions
        predictions = svr_regressor.predict(samples)
        mse = mean_squared_error(labels, predictions)
            
        # Check if this combination of hyperparameters is the best so far
        if mse < best_mse:
            best_mse = mse
            best_hyperparameters = sampled_params
        
        # Calculate gradient step
        gradient_step = abs(prev_mse - mse)
        if gradient_step < min_gradient_step:
            min_gradient_step = gradient_step
        
        # Check stopping criteria
        if gradient_step < tolerance:
            break
            
        prev_mse = mse
    
    return best_hyperparameters, best_mse, min_gradient_step

In [13]:
import pandas as pd
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

# Crear diccionario para almacenar las predicciones
predictions_dict = {}

# Obtener el número de iteraciones del usuario
iterations = int(input('How many iterations do you want? '))

channels = ['Red', 'Green', 'Blue']
best_results = {}

for i, channel in enumerate(channels):
    best_overall_hyperparameters = None
    best_overall_mse = float('inf')
    best_overall_min_gradient_step = None

    for j in range(iterations):
        print(f"Iteration {j+1}/{iterations} for {channel} channel")
        
        x_train = TrainingXYZ.iloc[:, :3]  
        y_train = TrainingRGB.iloc[:, i] 

        x_val = ValidationXYZ.iloc[:, :3] 
        y_val = ValidationRGB.iloc[:, i] 

        # Inicializar SVR Regressor
        svm_regressor = SVR()

        # Definir el espacio de búsqueda de hiperparámetros (rangos)
        param_ranges = {
            'C': [1, 1],        # Rango de 1 a 15
            'epsilon': [0.1, 1],  # Rango de 0.1 a 1
            'kernel': ['rbf']  # Lista de posibles kernels 'linear', 'poly', 'rbf', 'sigmoid'
        }

        # Explorar hiperparámetros y seleccionar el mejor resultado
        best_hyperparameters, best_mse, min_gradient_step = custom_gradient_boosting_svr(svm_regressor, x_train, y_train, param_ranges)
        
        # Verificar si esta iteración tiene el mejor MSE hasta ahora
        if best_mse < best_overall_mse:
            best_overall_hyperparameters = best_hyperparameters
            best_overall_mse = best_mse
            best_overall_min_gradient_step = min_gradient_step

    # Entrenar SVR con los mejores hiperparámetros obtenidos en todas las iteraciones
    best_svr = SVR(**best_overall_hyperparameters)
    best_svr.fit(x_train, y_train)

    # Hacer predicciones en el conjunto de validación
    predictions_SVR = best_svr.predict(x_val)
    predictions_dict[channel] = predictions_SVR.round()
    
    # Guardar los mejores resultados
    best_results[channel] = {
        'Best Hyperparameters': best_overall_hyperparameters,
        'Mean Squared Error': best_overall_mse,
        'Minimum Gradient Step': best_overall_min_gradient_step
    }

# Crear el DataFrame con las predicciones
predictions_df = pd.DataFrame(predictions_dict)

# Guardar el DataFrame en un archivo Excel
predictions_df.to_excel('Predictions_SVR.xlsx', index=False)

# Mostrar los mejores resultados para cada canal
for channel, result in best_results.items():
    print(f"\nBest results for SVR {channel} channel:")
    print("Best SVR Hyperparameters:", result['Best Hyperparameters'])
    print("Mean Squared Error for each SVR RGB channel:", result['Mean Squared Error'])

print("Archivo 'Predictions_SVR.xlsx' creado con éxito.")


How many iterations do you want? 2
Iteration 1/2 for Red channel
Iteration 2/2 for Red channel
Iteration 1/2 for Green channel
Iteration 2/2 for Green channel
Iteration 1/2 for Blue channel
Iteration 2/2 for Blue channel

Best results for SVR Red channel:
Best SVR Hyperparameters: {'C': 1.0, 'epsilon': 0.16038953693371025, 'kernel': 'rbf'}
Mean Squared Error for each SVR RGB channel: 415.5912218083325

Best results for SVR Green channel:
Best SVR Hyperparameters: {'C': 1.0, 'epsilon': 0.6626809393615963, 'kernel': 'rbf'}
Mean Squared Error for each SVR RGB channel: 163.51941301215356

Best results for SVR Blue channel:
Best SVR Hyperparameters: {'C': 1.0, 'epsilon': 0.6569616220319419, 'kernel': 'rbf'}
Mean Squared Error for each SVR RGB channel: 67.5203282235224
Archivo 'Predictions_SVR.xlsx' creado con éxito.
