In [None]:
import os
import json
import random
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import matplotlib.pyplot as plt
from tqdm import tqdm
import logging
import warnings

# Suppress specific warnings
warnings.filterwarnings("ignore", message=r"Your `PyDataset` class should call `super\.__init__$begin:math:text$\\*\\*kwargs$end:math:text$`")

# Set up logging
logging.basicConfig(filename='evolutionary_optimization.log', level=logging.INFO, format='%(asctime)s %(message)s')

# Load configuration from JSON file
config_path = '/Users/andreshofmann/Desktop/Studies/Uol/7t/FP/stage_2/CNN/config_v6_86_acc.json'
with open(config_path, 'r') as f:
    base_config = json.load(f)

# Define the hyperparameter space
hyperparameter_space = {
    "batch_size": [16, 32, 64],
    "epochs": [3, 5, 10],  # Reduced for testing
    "initial_learning_rate": [0.001, 0.0001, 0.00001],
    "dropout_rate": [0.3, 0.5, 0.7],
    "rotation_range": [10, 20, 30],
    "width_shift_range": [0.1, 0.2, 0.3],
    "height_shift_range": [0.1, 0.2, 0.3],
    "zoom_range": [0.1, 0.2, 0.3],
}

def generate_random_hyperparameters():
    return {key: random.choice(values) for key, values in hyperparameter_space.items()}

def generate_initial_population(size=100):
    return [generate_random_hyperparameters() for _ in range(size)]

def evaluate_fitness(hyperparameters, train_data, val_data):
    print(f"Evaluating hyperparameters: {hyperparameters}")
    
    # Create the data generators
    train_datagen = ImageDataGenerator(
        rescale=base_config['data_augmentation']['rescale'],
        rotation_range=hyperparameters['rotation_range'],
        width_shift_range=hyperparameters['width_shift_range'],
        height_shift_range=hyperparameters['height_shift_range'],
        zoom_range=hyperparameters['zoom_range'],
        horizontal_flip=base_config['data_augmentation']['horizontal_flip'],
        vertical_flip=base_config['data_augmentation']['vertical_flip'],
        brightness_range=base_config['data_augmentation']['brightness_range']
    )

    val_datagen = ImageDataGenerator(rescale=base_config['data_augmentation']['rescale'])

    # Create training and validation generators
    train_generator = train_datagen.flow_from_dataframe(
        train_data,
        x_col='path',
        y_col='label',
        target_size=(64, 64),
        batch_size=hyperparameters['batch_size'],
        class_mode='binary'
    )

    validation_generator = val_datagen.flow_from_dataframe(
        val_data,
        x_col='path',
        y_col='label',
        target_size=(64, 64),
        batch_size=hyperparameters['batch_size'],
        class_mode='binary'
    )

    # Define the pre-trained model with ResNet50
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(64, 64, 3))
    for layer in base_model.layers[:-4]:
        layer.trainable = False

    model = Sequential([
        base_model,
        GlobalAveragePooling2D(),
        Dense(512, activation='relu'),
        Dropout(hyperparameters['dropout_rate']),
        Dense(1, activation='sigmoid')
    ])

    optimizer = Adam(learning_rate=hyperparameters['initial_learning_rate'])
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    # Train the model
    print("Starting model training...")
    history = model.fit(
        train_generator,
        epochs=hyperparameters['epochs'],
        validation_data=validation_generator,
        verbose=1  # Enable verbose output
    )
    print("Model training completed.")

    val_accuracy = np.max(history.history['val_accuracy'])
    return val_accuracy

def select_parents(population, fitnesses, num_parents=20):
    parents = np.argsort(fitnesses)[-num_parents:]
    return [population[p] for p in parents]

def crossover(parents, num_offsprings=80):
    offsprings = []
    for _ in range(num_offsprings):
        parent1, parent2 = random.sample(parents, 2)
        child = {}
        for key in hyperparameter_space.keys():
            child[key] = random.choice([parent1[key], parent2[key]])
        offsprings.append(child)
    return offsprings

def mutate(offspring):
    for key in hyperparameter_space.keys():
        if random.random() < 0.1:  # mutation probability
            offspring[key] = random.choice(hyperparameter_space[key])
    return offspring

def generate_new_population(parents, offsprings):
    population = parents + offsprings
    return [mutate(offspring) for offspring in population]

def evolutionary_optimization(train_data, val_data, generations=2, population_size=10):  # Reduced for testing
    population = generate_initial_population(population_size)
    fitness_history = []
    
    for generation in range(generations):
        logging.info(f"Generation {generation+1} started.")
        print(f"Generation {generation+1}")
        
        fitnesses = []
        for ind in tqdm(population, desc=f"Evaluating Generation {generation+1}", position=0, leave=True):
            fitness = evaluate_fitness(ind, train_data, val_data)
            fitnesses.append(fitness)
        
        best_fitness = max(fitnesses)
        fitness_history.append(best_fitness)
        
        logging.info(f"Generation {generation+1} best fitness: {best_fitness}")
        print(f"Best fitness: {best_fitness}")

        parents = select_parents(population, fitnesses)
        offsprings = crossover(parents)
        population = generate_new_population(parents, offsprings)

    return population, fitnesses, fitness_history

def plot_fitness_history(fitness_history):
    plt.figure(figsize=(10, 5))
    plt.plot(range(1, len(fitness_history) + 1), fitness_history, marker='o')
    plt.title('Fitness Over Generations')
    plt.xlabel('Generation')
    plt.ylabel('Best Fitness (Validation Accuracy)')
    plt.grid(True)
    plt.show()

# Load and preprocess the data
data = pd.read_csv(base_config['paths']['csv_path'])

def get_patch_folder(patch_id):
    parts = patch_id.split('_')
    patch_num = parts[-1].split('.')[0]
    return f'Patch_{patch_num}'

def get_full_path(row):
    try:
        patch_folder = get_patch_folder(row['Patch_id'])
        if row['label'] == 1:
            full_path = os.path.join(base_config['paths']['img_dir'], 'mel_patches', patch_folder, row['Patch_id'])
        else:
            full_path = os.path.join(base_config['paths']['img_dir'], 'bkl_patches', patch_folder, row['Patch_id'])

        if not os.path.exists(full_path):
            print(f"Invalid path: {full_path}")

        return full_path
    except Exception as e:
        print(f"Error processing row: {row}")
        print(f"Error message: {str(e)}")
        return None

data['path'] = data.apply(get_full_path, axis=1)
data['path_exists'] = data['path'].apply(os.path.exists)
data = data[data['path_exists']]
data['label'] = data['label'].astype(str)

# Reduce dataset size for testing
data = data.sample(frac=0.1, random_state=42)

train_data, val_data = train_test_split(data, test_size=base_config['training']['validation_split'], stratify=data['label'], random_state=42)
train_data = train_data.sample(frac=base_config['training']['fraction'], random_state=42)
val_data = val_data.sample(frac=base_config['training']['fraction'], random_state=42)

# Perform evolutionary optimization
final_population, final_fitnesses, fitness_history = evolutionary_optimization(train_data, val_data)
best_hyperparameters = final_population[np.argmax(final_fitnesses)]
print("Best Hyperparameters:", best_hyperparameters)

# Plot fitness history
plot_fitness_history(fitness_history)