<a href="https://colab.research.google.com/github/hsandaver/hsandaver/blob/main/Machine_Learning_Fading_Simulator_Complex_Model_with_Hyperparameters_V4_Added_Feature_Engineering_with_Polynomial_Features-no-autodownload.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# -*- coding: utf-8 -*-
"""
Updated Script with Enhanced Machine Learning Model:
- Replaced Random Forest with XGBoost
- Added Hyperparameter Tuning with GridSearchCV
- Implemented Cross-Validation
- Enhanced Synthetic Data Generation
- Added Feature Engineering with Polynomial Features
"""

import sys
import subprocess
import importlib
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')

# Function to install packages
def install_packages(packages):
    for package in packages:
        if package == 'google.colab':
            continue  # Skip installation as it's already available in Colab
        try:
            importlib.import_module(package)
            logging.info(f"Package '{package}' is already installed.")
        except ImportError:
            print(f"Installing package: {package}")
            subprocess.check_call([sys.executable, "-m", "pip", "install", package])

# List of required packages
required_packages = [
    'scikit-image', 'numpy', 'pandas', 'matplotlib',
    'Pillow', 'scipy', 'sklearn', 'ipywidgets', 'xgboost'
]
install_packages(required_packages)

# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from skimage import color
from skimage.color import deltaE_ciede2000
from sklearn.model_selection import train_test_split, GridSearchCV, KFold, cross_val_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder, PolynomialFeatures, MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
import ipywidgets as widgets
from IPython.display import display, HTML
from ipywidgets import VBox, Layout
from xgboost import XGBRegressor
from sklearn.multioutput import MultiOutputRegressor
from tqdm.notebook import tqdm

# For file uploads and downloads in Colab
try:
    from google.colab import files
except ImportError:
    # Define dummy functions if not in Colab
    class DummyFiles:
        def upload(self):
            print("File upload functionality is not implemented in this environment.")
            return {}
        def download(self, filename):
            print(f"File download functionality is not available. File '{filename}' saved locally.")
    files = DummyFiles()

# -----------------------------
# Step 1: Data Upload and Preprocessing
# -----------------------------

def upload_file(prompt_message, file_types=None):
    print(prompt_message)
    uploaded = files.upload()
    if not uploaded:
        logging.error("No file uploaded.")
        sys.exit(1)
    filename = next(iter(uploaded))
    if file_types and not filename.lower().endswith(file_types):
        logging.error(f"Uploaded file must be one of the following types: {file_types}")
        sys.exit(1)
    logging.info(f"Uploaded file: {filename}")
    return filename

def load_and_clean_dataset(csv_filename):
    try:
        dataset = pd.read_csv(csv_filename)
        required_columns = {'L', 'A', 'B', 'Color Name'}
        if not required_columns.issubset(dataset.columns):
            missing = required_columns - set(dataset.columns)
            logging.error(f"Dataset is missing required columns: {missing}")
            sys.exit(1)
        dataset = dataset.replace([np.inf, -np.inf], np.nan).dropna(subset=['L', 'A', 'B'])
        logging.info(f"Dataset loaded with {len(dataset)} entries after cleaning.")
        return dataset
    except Exception as e:
        logging.error(f"Failed to load dataset: {e}")
        sys.exit(1)

def upload_and_process_image():
    image_filename = upload_file("Please upload the image file you want to analyze.", file_types=('.png', '.jpg', '.jpeg'))
    try:
        image = Image.open(image_filename).convert('RGB')
        image_array = np.array(image).astype(np.float32) / 255.0
        lab_image = color.rgb2lab(image_array)
        logging.info(f"Image '{image_filename}' loaded and converted to LAB color space.")
        return image, lab_image
    except Exception as e:
        logging.error(f"Failed to process image: {e}")
        sys.exit(1)

# -----------------------------
# Step 2: Machine Learning Model Training
# -----------------------------

def create_synthetic_data(art_types, material_types, dye_types, valid_combinations, num_samples_per_combination=500):
    """
    Create synthetic data with pollution and UV exposure for different art, material, and dye types.
    """
    np.random.seed(42)  # Ensure reproducibility
    data_list = []
    for art_type in art_types:
        for material_type in material_types:
            dye_type_options = [dye for art, material, dye in valid_combinations if art == art_type and material == material_type]
            for dye_type in dye_type_options:
                # Generate environmental factors
                lux_hours = np.random.uniform(low=1000, high=100000, size=num_samples_per_combination)
                uv_exposure = np.random.uniform(low=0.0, high=1.0, size=num_samples_per_combination)
                temperature = np.random.uniform(low=-10, high=50, size=num_samples_per_combination)
                humidity = np.random.uniform(low=0, high=100, size=num_samples_per_combination)
                pollution = np.random.uniform(low=0, high=1.0, size=num_samples_per_combination)
                year_of_manufacture = np.random.randint(low=1455, high=2020, size=num_samples_per_combination)

                # Generate fading data using the updated function
                L_fading, A_fading, B_fading = generate_fading_data(
                    art_type, material_type, dye_type, lux_hours, uv_exposure, temperature, humidity, pollution, year_of_manufacture, num_samples_per_combination
                )

                # Create a DataFrame for each combination
                data = pd.DataFrame({
                    'art_type': art_type,
                    'material_type': material_type,
                    'dye_type': dye_type,
                    'lux_hours': lux_hours,
                    'uv_exposure': uv_exposure,
                    'temperature': temperature,
                    'humidity': humidity,
                    'pollution': pollution,
                    'year_of_manufacture': year_of_manufacture,
                    'L_fading': L_fading,
                    'A_fading': A_fading,
                    'B_fading': B_fading
                })
                data_list.append(data)

    # Return the combined DataFrame of all synthetic samples
    return pd.concat(data_list, ignore_index=True)

def generate_fading_data(art_type, material_type, dye_type, lux_hours, uv_exposure, temperature, humidity, pollution, year_of_manufacture, num_samples):
    """
    This function simulates fading in LAB color space for various art and material types based on environmental
    factors like light exposure, UV exposure, temperature, humidity, pollution, and year of manufacture.
    """
    # Initialize fading values for LAB color components
    L_fading = np.zeros(num_samples)
    A_fading = np.zeros(num_samples)
    B_fading = np.zeros(num_samples)

    # Normalize lux_hours, uv_exposure, pollution (scale from 0 to 1)
    lux_normalized = lux_hours / 100000  # Now ranges from 0.0 to 1.0
    uv_normalized = np.minimum(uv_exposure, 1.0)  # Element-wise comparison to cap values at 1.0
    pollution_normalized = pollution  # Pollution is normalized between 0.0 and 1.0

    # UV threshold for safe exposure (based on research)
    uv_threshold = 0.075  # 75 µW/lm, maximum safe UV exposure for sensitive materials

    # Exposure factor including light, UV, and pollution
    exposure_factor = lux_normalized + uv_normalized + pollution_normalized

    # Year factor: Older materials are more fragile
    year_factor = (2020 - year_of_manufacture) / 220.0  # Normalize based on ~220 years

    # Apply fading logic for different art and material types
    if material_type == 'Textiles' and dye_type == 'Natural':
        # Textiles with natural dyes fade more from UV and pollution
        L_fading += np.random.normal(loc=-5, scale=1.5, size=num_samples) * exposure_factor * year_factor
        A_fading += np.random.normal(loc=-2, scale=1, size=num_samples) * exposure_factor * year_factor
        B_fading += np.random.normal(loc=-2, scale=1, size=num_samples) * exposure_factor * year_factor

    elif material_type == 'Paper with Black Text':
        # Paper with black text is more sensitive to light and pollution
        L_fading += np.random.normal(loc=-1, scale=0.5, size=num_samples) * lux_normalized * year_factor

    # Adjust fading for UV-sensitive materials if UV exceeds safe levels
    if np.any(uv_exposure > uv_threshold):
        uv_impact = (uv_exposure - uv_threshold) * 10  # Higher impact for UV above the threshold
        L_fading -= uv_impact
        A_fading -= uv_impact / 2
        B_fading -= uv_impact / 2

    # Pollution increases yellowing (B component)
    if 'Acidic' in material_type:
        L_fading -= np.random.normal(loc=-2, scale=1, size=num_samples) * pollution_normalized
        B_fading += np.random.normal(loc=3, scale=1, size=num_samples) * pollution_normalized  # Increase yellowing

    # Ensure fading values stay within LAB limits
    L_fading = np.clip(L_fading, -20, 0)
    A_fading = np.clip(A_fading, -10, 10)
    B_fading = np.clip(B_fading, -10, 10)

    return L_fading, A_fading, B_fading

def prepare_features(synthetic_data):
    X_numeric = synthetic_data[['lux_hours', 'uv_exposure', 'temperature', 'humidity', 'pollution', 'year_of_manufacture']]  # Added pollution and year_of_manufacture
    X_categorical = synthetic_data[['art_type', 'material_type', 'dye_type']]

    X_categorical = X_categorical.fillna('None')
    encoder = OneHotEncoder(sparse_output=False)
    X_categorical_encoded = encoder.fit_transform(X_categorical)

    poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)
    X_numeric_poly = poly.fit_transform(X_numeric)

    X = np.hstack((X_numeric_poly, X_categorical_encoded))
    Y = synthetic_data[['L_fading', 'A_fading', 'B_fading']].values
    return X, Y, encoder, poly

def tqdm_grid_search_cv(grid_search, X, Y):
    """
    This function wraps GridSearchCV with a progress bar using tqdm.
    """
    n_candidates = len(grid_search.param_grid)  # Number of parameter combinations
    n_folds = grid_search.cv  # Number of cross-validation folds
    total_fits = n_candidates * n_folds  # Total number of fits to be done

    with tqdm(total=total_fits, desc="GridSearchCV Progress") as pbar:
        # We update the progress bar after each fit
        def on_fit_progress(**kwargs):
            pbar.update(1)

        # Set the verbose option to provide information on each fold
        grid_search.verbose = 3  # This provides detailed output per fold
        grid_search.fit(X, Y)  # Perform the grid search fitting

    return grid_search.best_estimator_, grid_search.best_params_

def train_ml_model(X, Y):
    # Step 1: Normalize (scale) the data
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)

    # Step 2: Define the model (XGBoost)
    xgb = XGBRegressor(objective='reg:squarederror', random_state=42)
    multi_xgb = MultiOutputRegressor(xgb)

    # Step 3: Define the parameter grid for GridSearchCV
    param_grid = {
        'estimator__n_estimators': [100, 200],
        'estimator__max_depth': [3, 5, 7],
        'estimator__learning_rate': [0.01, 0.1, 0.2]
    }

    # Step 4: Set up GridSearchCV for hyperparameter tuning
    grid_search = GridSearchCV(
        multi_xgb, param_grid, cv=3, scoring='neg_mean_squared_error'
    )

    # Step 5: Use tqdm to show progress bar during GridSearchCV
    best_model, best_params = tqdm_grid_search_cv(grid_search, X_scaled, Y)

    # Step 6: Print the best parameters found by the grid search
    print(f"Best parameters found: {best_params}")

    # Step 7: Perform 5-fold cross-validation with the best model
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mse_scores = cross_val_score(
        best_model, X_scaled, Y, cv=kf, scoring='neg_mean_squared_error'
    )
    avg_mse = -np.mean(mse_scores)

    # Step 8: Print the average MSE from cross-validation
    print(f"Cross-validated MSE: {avg_mse:.4f}")

    # Return the best model and scaler for future use
    return best_model, scaler, avg_mse

# -----------------------------
# Step 3: Art Type, Material Type, and Dye Type Selection and Exposure Simulation Functions
# -----------------------------

# Art Types
art_types = [
    'Chromolithograph Print',
    'Sanguine Etching',
    'Steel Engraving',
    'None',  # Added for materials without artwork
]

# Material Types
material_types = [
    'Acidic Wove Paper',
    'Acidic Rag Paper',
    'Alkaline Wove Paper',
    'Alkaline Rag Paper',
    'Textiles',
    'Paper with Black Text',
]

# Dye Types (for Textiles)
dye_types = [
    'Natural',
    'Synthetic',
]

# Valid combinations of Art Type, Material Type, and Dye Type
valid_combinations = [
    # Art Type, Material Type, Dye Type
    ('Chromolithograph Print', 'Acidic Wove Paper', None),
    ('Sanguine Etching', 'Acidic Wove Paper', None),
    ('Sanguine Etching', 'Acidic Rag Paper', None),
    ('Sanguine Etching', 'Alkaline Wove Paper', None),
    ('Sanguine Etching', 'Alkaline Rag Paper', None),
    ('Steel Engraving', 'Acidic Wove Paper', None),
    ('None', 'Textiles', 'Natural'),
    ('None', 'Textiles', 'Synthetic'),
    ('None', 'Paper with Black Text', None),
    ('None', 'Acidic Wove Paper', None),
    ('None', 'Acidic Rag Paper', None),
    ('None', 'Alkaline Wove Paper', None),
    ('None', 'Alkaline Rag Paper', None),
    # Add other valid combinations as necessary
]

# Create Art Type dropdown
art_type_dropdown = widgets.Dropdown(
    options=art_types,
    value=art_types[0],
    description='Art Type:',
    disabled=False,
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='500px')
)

# Create Material Type dropdown
material_type_dropdown = widgets.Dropdown(
    options=[],
    description='Material Type:',
    disabled=False,
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='500px')
)

# Create Dye Type dropdown (initially hidden)
dye_type_dropdown = widgets.Dropdown(
    options=[],
    value=None,
    description='Dye Type:',
    disabled=False,
    layout=widgets.Layout(visibility='hidden', width='500px'),
    style={'description_width': 'initial'}
)

# Function to update Material Type options based on selected Art Type
def update_material_type_options(*args):
    selected_art_type = art_type_dropdown.value
    valid_materials = [material for art, material, dye in valid_combinations if art == selected_art_type]
    valid_materials = sorted(set(valid_materials))
    material_type_dropdown.options = valid_materials
    if material_type_dropdown.value not in valid_materials:
        material_type_dropdown.value = valid_materials[0] if valid_materials else None
    update_dye_type_visibility()

def update_dye_type_visibility(*args):
    selected_material_type = material_type_dropdown.value
    if selected_material_type == 'Textiles':
        dye_type_dropdown.layout.visibility = 'visible'
        valid_dyes = [dye for art, material, dye in valid_combinations
                      if material == selected_material_type and dye is not None]
        valid_dyes = sorted(set(valid_dyes))
        dye_type_dropdown.options = valid_dyes
        if dye_type_dropdown.value not in valid_dyes:
            dye_type_dropdown.value = valid_dyes[0] if valid_dyes else None
    else:
        dye_type_dropdown.layout.visibility = 'hidden'
        dye_type_dropdown.options = []
        dye_type_dropdown.value = None

# Attach the update functions to the dropdowns
art_type_dropdown.observe(update_material_type_options, names='value')
material_type_dropdown.observe(update_dye_type_visibility, names='value')

# Initial update of Material Type options
update_material_type_options()

# Arrange dropdowns vertically with spacing
dropdown_layout = Layout(display='flex', flex_flow='column', align_items='stretch', width='500px')
dropdowns = VBox([art_type_dropdown, material_type_dropdown, dye_type_dropdown], layout=dropdown_layout)

# Display the dropdowns
display(dropdowns)

# Create the sliders with adjusted style and layout
time_slider = widgets.FloatSlider(
    value=5,
    min=0,
    max=100,
    step=1,
    description='Years of Aging:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='500px')
)
uv_slider = widgets.FloatSlider(
    value=0.5,
    min=0.0,
    max=1.0,
    step=0.01,
    description='UV Exposure:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='500px')
)
lux_slider = widgets.FloatSlider(
    value=50000,
    min=0,
    max=100000,
    step=1000,
    description='Lux Hours:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='500px')
)
humidity_slider = widgets.FloatSlider(
    value=50,
    min=0,
    max=100,
    step=1,
    description='Humidity (%):',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='500px')
)
temp_slider = widgets.FloatSlider(
    value=20,
    min=-10,
    max=50,
    step=1,
    description='Temperature (°C):',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='500px')
)

pollution_slider = widgets.FloatSlider(
    value=0.5,
    min=0,
    max=1.0,
    step=0.01,
    description='Pollution Level:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='500px')
)

# Explanation of the pollution scale
pollution_explanation = """
<h4>Pollution Scale Explanation:</h4>
<ul>
    <li><strong>0.0</strong>: Clean air with minimal pollution (e.g., rural or controlled environments).</li>
    <li><strong>0.1 - 0.3</strong>: Low pollution, typical of areas with good air quality.</li>
    <li><strong>0.4 - 0.6</strong>: Moderate pollution, like urban environments with traffic or industry.</li>
    <li><strong>0.7 - 1.0</strong>: High pollution, found in industrial or heavily polluted areas.</li>
</ul>
<p>Higher pollution levels will increase the rate of fading and degradation of materials.</p>
"""

# Display the pollution slider

year_slider = widgets.IntSlider(
    value=2000,
    min=1455,
    max=2020,
    step=1,
    description='Year of Manufacture:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='500px')
)

# Arrange sliders vertically with spacing
slider_layout = Layout(display='flex', flex_flow='column', align_items='stretch', width='500px')
sliders = VBox([time_slider, uv_slider, lux_slider, humidity_slider, temp_slider, pollution_slider, year_slider], layout=slider_layout)

# Display the sliders
display(sliders)

def simulate_exposure_by_material(lab_image, art_type, material_type, dye_type, exposure_years, uv_exposure, lux_hours, humidity, temperature):
    lab_exposed = lab_image.copy()

    # Normalize lux_hours and uv_exposure
    lux_normalized = lux_hours / 100000  # Now ranges from 0.0 to 1.0
    uv_normalized = uv_exposure  # Already between 0.0 and 1.0

    # Combined exposure factor
    exposure_factor = lux_normalized + uv_normalized
    exposure_factor = np.clip(exposure_factor, 0, 2)  # Max exposure factor is 2

    # Refined Material and Dye Interactions with Light (Lux Hours and UV)
    if art_type == 'Chromolithograph Print':
        lab_exposed[:, :, 0] -= ((lux_normalized * 10) + (uv_normalized * 10))
        lab_exposed[:, :, 1] -= ((lux_normalized * 5) + (uv_normalized * 5))
        lab_exposed[:, :, 2] -= ((lux_normalized * 5) + (uv_normalized * 5))
    elif art_type == 'Sanguine Etching':
        lab_exposed[:, :, 1] -= (lux_normalized * 10)
    elif art_type == 'Steel Engraving':
        lab_exposed[:, :, 0] -= (lux_normalized * 5)
    elif art_type == 'None':
        # No artwork-specific fading
        pass

    # Adjustments based on material type
    if 'Acidic' in material_type:
        lab_exposed[:, :, 0] -= uv_normalized * 10
        lab_exposed[:, :, 2] += uv_normalized * 10  # Increase yellowing
    elif 'Alkaline' in material_type:
        lab_exposed[:, :, 0] -= lux_normalized * 5
    elif material_type == 'Textiles':
        if dye_type == 'Natural':
            fading_multiplier = np.log(lux_hours + 1) / np.log(100000 + 1)
            lab_exposed[:, :, 0] -= uv_normalized * 15 * fading_multiplier
            lab_exposed[:, :, 1] -= uv_normalized * 15 * fading_multiplier
            lab_exposed[:, :, 2] -= uv_normalized * 15 * fading_multiplier
        elif dye_type == 'Synthetic':
            lab_exposed[:, :, 0] -= uv_normalized * 10
            lab_exposed[:, :, 1] -= uv_normalized * 10
            lab_exposed[:, :, 2] -= uv_normalized * 10
    elif material_type == 'Paper with Black Text':
        lab_exposed[:, :, 0] -= lux_normalized * 2

    # Ensure values stay within valid LAB ranges
    lab_exposed[:, :, 0] = np.clip(lab_exposed[:, :, 0], 0, 100)
    lab_exposed[:, :, 1] = np.clip(lab_exposed[:, :, 1], -128, 127)
    lab_exposed[:, :, 2] = np.clip(lab_exposed[:, :, 2], -128, 127)

    logging.info(f"Simulated exposure for {art_type} on {material_type} with dye type {dye_type}.")
    return lab_exposed

# Added missing function: lab_to_rgb
def lab_to_rgb(lab_image):
    rgb_image = color.lab2rgb(lab_image)
    rgb_image = np.clip(rgb_image, 0, 1)
    rgb_image = (rgb_image * 255).astype(np.uint8)
    return rgb_image

# Added missing function: display_image
def display_image(image, title='Image', save_fig=False, filename=None):
    plt.figure(figsize=(8, 6))
    plt.imshow(image)
    plt.title(title)
    plt.axis('off')
    if save_fig and filename:
        plt.savefig(filename, bbox_inches='tight')
        logging.info(f"Image saved as {filename}")
    plt.show()

# Added missing function: apply_fading
def apply_fading(lab_image, predicted_fading):
    lab_faded = lab_image.copy()
    lab_faded[:, :, 0] += predicted_fading[0]
    lab_faded[:, :, 1] += predicted_fading[1]
    lab_faded[:, :, 2] += predicted_fading[2]
    lab_faded[:, :, 0] = np.clip(lab_faded[:, :, 0], 0, 100)
    lab_faded[:, :, 1] = np.clip(lab_faded[:, :, 1], -128, 127)
    lab_faded[:, :, 2] = np.clip(lab_faded[:, :, 2], -128, 127)
    logging.info("Applied predicted fading to the image.")
    return lab_faded

# -----------------------------
# Step 4: Visualization and Delta-E
# -----------------------------

def compute_delta_e(lab1, lab2):
    delta_e = deltaE_ciede2000(lab1, lab2)
    logging.info(f"Delta-E between the two images calculated.")
    return delta_e

def display_color_difference(delta_e, title='Color Difference Map (∆E)', save_fig=False, filename=None):
    plt.figure(figsize=(8, 6))
    plt.imshow(delta_e, cmap='hot')
    plt.colorbar(label='∆E')
    plt.title(title)
    plt.axis('off')
    if save_fig and filename:
        plt.savefig(filename, bbox_inches='tight')
        logging.info(f"Color difference map saved as {filename}")
    plt.show()

def plot_histograms(image1, image2, title_suffix='', save_fig=False, filename=None):
    import textwrap
    image1_array = np.array(image1)
    image2_array = np.array(image2)
    fig, axs = plt.subplots(1, 3, figsize=(15, 5))
    colors = ['Red', 'Green', 'Blue']
    for i, color_name in enumerate(colors):
        axs[i].hist(image1_array[..., i].flatten(), bins=256, alpha=0.5, label=f'{color_name} (Image 1)', color=color_name.lower())
        axs[i].hist(image2_array[..., i].flatten(), bins=256, alpha=0.5, label=f'{color_name} (Image 2)', color=f'dark{color_name.lower()}')
        # Wrap the title to a maximum width
        wrapped_title = textwrap.fill(f'{color_name} Channel {title_suffix}', width=25)
        axs[i].set_title(wrapped_title, fontsize=10)
        axs[i].legend()
    plt.tight_layout()
    plt.subplots_adjust(wspace=0.4)  # Increase horizontal space between subplots
    if save_fig and filename:
        plt.savefig(filename, bbox_inches='tight')
        logging.info(f"Histogram saved as {filename}")
    plt.show()

def display_average_color(image_lab, title='Average Color', save_fig=False, filename=None):
    average_lab = image_lab.mean(axis=(0,1))
    average_rgb = color.lab2rgb(np.reshape(average_lab, (1,1,3))).reshape(1,1,3)
    average_rgb = np.clip(average_rgb, 0, 1)
    plt.figure(figsize=(2,2))
    plt.imshow(np.ones((100,100,3)) * average_rgb)
    plt.title(title)
    plt.axis('off')
    if save_fig and filename:
        plt.savefig(filename, bbox_inches='tight')
        logging.info(f"Average color saved as {filename}")
    plt.show()
    logging.info(f"{title}: L={average_lab[0]:.2f}, A={average_lab[1]:.2f}, B={average_lab[2]:.2f}")
    return average_lab

# New function to compute Delta-E between average colors
def compute_average_delta_e(avg_lab1, avg_lab2):
    lab1 = np.array([avg_lab1])
    lab2 = np.array([avg_lab2])
    delta_e = deltaE_ciede2000(lab1, lab2)[0]
    logging.info(f"Delta-E between average colors: {delta_e:.2f}")
    return delta_e

# -----------------------------
# Step 5: Main Execution Flow
# -----------------------------

def main():
    # Upload and load dataset
    csv_filename = upload_file("Please upload your LAB color dataset CSV file.", file_types=('.csv',))
    dataset = load_and_clean_dataset(csv_filename)

    # Upload and process image
    original_image, original_lab = upload_and_process_image()

    # Display average color before fading
    avg_lab_before = display_average_color(original_lab, title='Average Color - Original Image', save_fig=True, filename='average_color_before.png')

    # Create synthetic data and train model
    synthetic_data = create_synthetic_data(art_types, material_types, dye_types, valid_combinations, num_samples_per_combination=500)
    X, Y, encoder, poly = prepare_features(synthetic_data)
    model, scaler, mse = train_ml_model(X, Y)
    print(f"Cross-validated Mean Squared Error for Fading Prediction: {mse:.4f}")

    # Get environmental parameters
    art_type = art_type_dropdown.value
    material_type = material_type_dropdown.value
    dye_type = dye_type_dropdown.value if dye_type_dropdown.layout.visibility == 'visible' else 'None'
    exposure_years = time_slider.value
    uv_exposure = uv_slider.value
    lux_hours = lux_slider.value
    humidity = humidity_slider.value
    temperature = temp_slider.value
    pollution = pollution_slider.value
    year_of_manufacture = year_slider.value

    # Simulate exposure by material
    lab_exposed = simulate_exposure_by_material(original_lab, art_type, material_type, dye_type, exposure_years, uv_exposure, lux_hours, humidity, temperature)
    exposed_image = lab_to_rgb(lab_exposed)
    display_image(exposed_image, title=f'Simulated Exposure: {art_type} on {material_type}', save_fig=True, filename='exposed_image.png')

    # Display average color after simulated exposure
    avg_lab_exposed = display_average_color(lab_exposed, title='Average Color - Simulated Exposure', save_fig=True, filename='average_color_exposed.png')

    # Compute Delta-E between Original and Simulated Exposure
    delta_e_simulation = compute_delta_e(original_lab, lab_exposed)
    display_color_difference(delta_e_simulation, title='Color Difference Map (∆E) - Original vs Simulated Exposure', save_fig=True, filename='delta_e_simulation.png')
    delta_e_avg_simulation = compute_average_delta_e(avg_lab_before, avg_lab_exposed)
    print(f"Delta-E between average colors (Original vs Simulated Exposure): {delta_e_avg_simulation:.2f}")

    # Plot histograms between Original and Simulated Exposure
    plot_histograms(original_image, exposed_image, title_suffix='Original vs Simulated Exposure', save_fig=True, filename='histograms_simulation.png')

    # Prepare features for prediction
    # One-hot encode the selected art type, material type, and dye type
    categorical_input = pd.DataFrame({'art_type': [art_type], 'material_type': [material_type], 'dye_type': [dye_type]})
    categorical_input = categorical_input.fillna('None')
    categorical_encoded = encoder.transform(categorical_input)

    # Create polynomial features for the numeric input
    X_input_numeric = np.array([[lux_hours, uv_exposure, temperature, humidity, pollution, year_of_manufacture]])
    X_input_numeric_poly = poly.transform(X_input_numeric)

    # Combine numeric and categorical features
    X_input = np.hstack((X_input_numeric_poly, categorical_encoded))
    X_input_scaled = scaler.transform(X_input)

    # Predict fading and apply it
    predicted_fading = model.predict(X_input_scaled)[0]
    lab_faded = apply_fading(lab_exposed, predicted_fading)
    faded_image = lab_to_rgb(lab_faded)
    display_image(faded_image, title=f'Faded Image After ML Prediction', save_fig=True, filename='faded_image.png')

    # Display average color after fading
    avg_lab_after = display_average_color(lab_faded, title='Average Color - After ML Prediction', save_fig=True, filename='average_color_after.png')

    # Compute Delta-E between Simulated Exposure and ML Prediction
    delta_e_ml = compute_delta_e(lab_exposed, lab_faded)
    display_color_difference(delta_e_ml, title='Color Difference Map (∆E) - Simulated Exposure vs ML Prediction', save_fig=True, filename='delta_e_ml.png')
    delta_e_avg_ml = compute_average_delta_e(avg_lab_exposed, avg_lab_after)
    print(f"Delta-E between average colors (Simulated Exposure vs ML Prediction): {delta_e_avg_ml:.2f}")

    # Compute Delta-E between Original and Final Faded Image
    delta_e_total = compute_delta_e(original_lab, lab_faded)
    display_color_difference(delta_e_total, title='Color Difference Map (∆E) - Original vs Final Faded', save_fig=True, filename='delta_e_total.png')
    delta_e_avg_total = compute_average_delta_e(avg_lab_before, avg_lab_after)
    print(f"Delta-E between average colors (Original vs Final Faded): {delta_e_avg_total:.2f}")

    # Plot histograms between Original and Final Faded Image
    plot_histograms(original_image, faded_image, title_suffix='Original vs Final Faded', save_fig=True, filename='histograms_final.png')

# Create a 'Run Simulation' button and output area
run_button = widgets.Button(description='Run Simulation', layout=widgets.Layout(width='200px'))
output = widgets.Output()

def on_button_clicked(b):
    with output:
        output.clear_output()
        main()

run_button.on_click(on_button_clicked)

# Arrange button and output
interface_layout = VBox([run_button, output], layout=Layout(align_items='center'))

# Display the button and output
display(interface_layout)