In [13]:
from sklearn.model_selection import cross_val_score, KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from DataProcessor import DataProcessor
import numpy as np
import math
from Chromosome import *
from Recombination import Recombination
import random
from Fast_NSGAII import NSGAII
import matplotlib.pyplot as plt
from pymoo.indicators.hv import HV
from pymoo.indicators.igd import IGD
import time
import pandas as pd
from datetime import datetime
from sklearn.ensemble import RandomForestClassifier
from Configuration import NSGAConfig

In [14]:
def select_features_rf(X, y, num_features=10):
    model = RandomForestClassifier()
    model.fit(X, y)
    feature_importances = model.feature_importances_
    selected_features = np.argsort(feature_importances)[-num_features:]
    return selected_features

In [15]:
def plot_pareto_front(data):
    x_values = [item.objectives[1] for item in data]
    y_values = [item.objectives[0] for item in data]
    plt.scatter(x_values, y_values, marker='o', color='b')
    plt.xlabel('Solution Size')
    plt.ylabel('Classification Error')
    plt.title('Final Pareto Front')
    plt.show()

In [16]:
def fast_non_dominated_sort( population):
        fronts = [[]]
        for p in population:
            p.domination_count = 0
            p.dominated_solutions = set()

            for q in population:
                if p.dominate(q):
                    p.dominated_solutions.add(q)
                elif q.dominate(p):
                    p.domination_count += 1

            if p.domination_count == 0:
                p.rank = 0
                fronts[0].append(p)

        i = 0
        while fronts[i]:
            next_front = []
            for p in fronts[i]:
                for q in p.dominated_solutions:
                    q.domination_count -= 1
                    if q.domination_count == 0:
                        q.rank = i + 1
                        next_front.append(q)

            i += 1
            fronts.append(next_front)
        return fronts


In [17]:
# for sure values are not correct
def create_chromosome(row, num_features):
    values = np.random.randint(2, size=num_features)
    chromosome = Chromosome(values)
    chromosome.objectives = [row['Classification-error'], row['Solution-size']]
    return chromosome

In [18]:
def calc_true_pareto_front(U_PF_dataset_path, num_features):
    df = pd.read_csv(U_PF_dataset_path)
    population = df.apply(lambda row: create_chromosome(row, num_features), axis=1)
    fronts = fast_non_dominated_sort(population)
    objectives = [chromosome.objectives for chromosome in fronts[0]]
    return objectives

In [19]:
def plot_values(current_solutions, igd_values, hypervolume_values):
    plt.figure(figsize=(15, 5))
    plt.subplot(1, 3, 1)
    x_values = [item.objectives[1] for item in current_solutions]
    y_values = [item.objectives[0] for item in current_solutions]
    plt.scatter(x_values, y_values, marker='o', color='b')
    plt.xlabel('Solution Size')
    plt.ylabel('Classification Error')
    plt.title(f'Final Pareto front for {dataset_name}')

    plt.subplot(1, 3, 2)
    plt.plot(igd_values, color='r', label=f'Mean: {np.mean(igd_values):.2f}\nStd: {np.std(igd_values):.2f}')
    plt.title('IGD')
    plt.legend()
    plt.subplot(1, 3, 3)
    plt.plot(hypervolume_values, color='g', label=f'Mean: {np.mean(hypervolume_values):.2f}\nStd: {np.std(hypervolume_values):.2f}')
    plt.title('Hypervolume')
    plt.legend()
    plt.tight_layout()
    plt.show()

In [20]:
def run_nsga_on_dataset(dataset_name):
    num_objectives = 2
    dataset_path = "Datasets/" + dataset_name
    dp = DataProcessor(dataset_path)
    dp.load_data()
    config_file_path = "config.json"
#     config_data = json.loads(json_string)
    config = NSGAConfig(config_file_path)
#     config.config_data = config_data
#     config = NSGAConfig(config_file_path)
    population_size, Q, LP, igd_threshold, hv_threshold, no_improvement_limit, maxFEs = config.get_parameters(dataset_name)
    y = np.array(dp.dataset["LABEL"])
    X = np.array(dp.dataset.drop("LABEL", axis=1, inplace=False))
    selected_features = select_features_rf(X, y, num_features=100)
    print(len(selected_features))
    num_features = dp.num_features()
    U_PF_dataset_path = "union-pareto/"+ dataset_name
    true_pareto_front = calc_true_pareto_front(U_PF_dataset_path, num_features)    
    print("number of features: ", num_features)
   
    nsga2 = NSGAII(X, y, population_size, LP, num_features, true_pareto_front, selected_features, maxFEs, no_improvement_limit, igd_threshold, hv_threshold)
    current_solutions, igd_values, hypervolume_values = nsga2.nsga2()
    return current_solutions, igd_values, hypervolume_values

In [21]:
def run_different_dataset(dataset_name):
    start_time = time.time()
    current_solutions, igd_values, hypervolume_values = run_nsga_on_dataset(dataset_name)
    generations.append(len(igd_values))
    end_time = time.time()
    elapsed_time = end_time - start_time
    duration_minutes = elapsed_time // 60
    elapsed_times.append(duration_minutes)  
    hv_mean.append(np.mean(hypervolume_values))
    igd_mean.append(np.mean(igd_values))
    hv_std.append(np.std(hypervolume_values))
    igd_std.append(np.std(igd_values))

    plot_values(current_solutions, igd_values, hypervolume_values)
    

In [22]:
datasets = ["DS02.csv", "DS04.csv", "DS05.csv","DS07.csv", "DS08.csv", "DS10.csv"]
elapsed_times = []
start_times = []
end_times = []

In [None]:
run_different_dataset("DS02.csv")

In [20]:
from tabulate import tabulate
data = list(zip(datasets, start_times, end_times, elapsed_times))
table = tabulate(data, headers=['Dataset','Start', 'End', 'Duration'], tablefmt='grid')
print(table)

+-----------+---------------------+---------------------+------------+
| Dataset   | Start               | End                 |   Duration |
| DS02.csv  | 2024-01-05 21:33:16 | 2024-01-05 21:33:56 |   0.666889 |
+-----------+---------------------+---------------------+------------+
| DS04.csv  | 2024-01-05 21:35:03 | 2024-01-05 21:41:04 |   6.01673  |
+-----------+---------------------+---------------------+------------+
