# Mobïus vs. Capacities for fitness functions comparison
In this notebook, we will compare the performance of Mobïus and Capacities for fitness functions. We will test on both dynamic positive gaussian data and `iris` dataset.

In [None]:
# Import basic libraries
from palmerpenguins import load_penguins
from sklearn.datasets import load_iris
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style="darkgrid")
import time
from sklearn.preprocessing import LabelEncoder
import os
os.chdir("/Users/hoangthuyduongvu/Desktop/FuzzSIM")


In [None]:
# Import personalized libraries
from fuzz.src.capacity import *
from fuzz.choquet.choquet import *
from fuzz.src.norm import *
from fuzz.src.knn import KNNFuzz
from fuzz.src.sim import S1, S2, S3
from fuzz.optim import *
from fuzz.utils import *
from fuzz.eval import leave_one_out
from fuzz.dataloader import *
from fuzz.choquet.d_choquet import *

## Dynamic Positive Gaussian Data

In [None]:
data, labels = dynamic_generate_positive_gaussian_data(
    dim=3,
    nb_classes=3,
    nb_points_per_class=50,
    seed=42
)
data = batch_norm(data)
data[:5]

In [None]:
# Generate all possible mobius for training
features = list(range(len(data[0])))    # encode features as integers

nb_total = 5

# Input Mobius (10 mobius currently)
lst_mobius = []
for i in range(nb_total):
    mobius = generate_mobius(features, 2)
    mobius.pop(0)
    lst_mobius.append(mobius)

# Mutate all mobius
mutated = []
for i in range(len(lst_mobius)):
    mobius = lst_mobius[i]
    mutated_mobius = mutate(mobius, mutation_rate=0.8)
    mutated.append(mutated_mobius)

# Add mutated mobius to the list
lst_mobius.extend(mutated)

# Cross over all mobius
crossed_over = []
for i in range(len(lst_mobius)):
    for j in range(i + 1, len(lst_mobius) // 2):
        crossed = crossover(lst_mobius[np.random.randint(len(lst_mobius))], lst_mobius[np.random.randint(len(lst_mobius))])
        crossed_over.append(crossed)

# Add crossed over mobius to the list
lst_mobius.extend(crossed_over)
print(f"Total number of Möbius measures: {len(lst_mobius)}")

# Convert all Möbius measures to capacities
capacities_list = []
for i in range(len(lst_mobius)):
    mobius = lst_mobius[i]
    capacity = mobius_to_capacity(mobius, features)
    capacities_list.append(capacity)    
print('Test Möbius measures completed!')
print(f"Total number of capacities: {len(capacities_list)}")

### Test on Mobïus


In [None]:
# Define hyperparameters
p = np.arange(0.25, 3, 0.25)  # p values for Choquet integral
q = np.arange(0.25, 3, 0.25)  # q values for Choquet integral
sim = S2  # similarity measures
choquet_version = 'linear'
verbose = False 
eval_type = 'crossval'  # evaluation type
sim_agent = 'mobius'  # similarity agent for d_Choquet

# Define training loop
best_indexes = []
best_accuracies = []
best_mobius = []
print("Starting training loop...")
start_time = time.time()
i = 0
for p_val in p:
    for q_val in q:
        i += 1
        print(f"Training iteration {i}/{len(p) * len(q)}: p={p_val}, q={q_val}")
        FF_res = fitness_function(
            capacities_list=lst_mobius,
            DS=(data, labels),
            sim=sim,
            choquet_version=choquet_version,
            p=p_val,
            q=q_val,
            time_counter=False,
            verbose=verbose,
            eval_type=eval_type,
            sim_agent=sim_agent
        )
        best_indexes.append(np.argmin(FF_res))
        best_accuracies.append(-FF_res[np.argmin(FF_res)])
        best_mobius.append(lst_mobius[np.argmin(FF_res)])

end_time = time.time()
print(f"Training loop completed in {end_time - start_time:.2f} seconds.")

In [None]:
# Find the best index and accuracy
best_index = np.argmax(best_accuracies)
best_accuracy = best_accuracies[best_index]
print(f"Best index: {best_index}, Best accuracy: {best_accuracy}")

# get best mobius
mobius = best_mobius[best_index]
print(f"Best Möbius measure: {[f'{m.X}: {m.mu:.3f}' for m in mobius]}")

# Convert mobius to capacity
best_capacity = mobius_to_capacity(mobius, features)
print("\nBest Capacity:")
for c in best_capacity:
    print(f"Capacity of {c.X} is {c.mu:.3f}")

# Evaluate the best capacity
best_accuracy = FuzzLOO(
    DS=(data, labels),
    mu=best_capacity,
    sim=S1,
    choquet_version='d_choquet',
    p=1,
    q=1,
    time_counter=True
)
print(f"Best accuracy: {best_accuracy:.4f}")

# Plot the best accuracy
plt.figure(figsize=(10, 6))
plt.plot(best_accuracies, marker='o', linestyle='-', color='b')
plt.title("Best Accuracies")
plt.xlabel("Iterations")
plt.ylabel("Accuracy")
plt.grid()
plt.show()

### Test on Capacities

In [None]:
# Define hyperparameters
p = np.arange(0.25, 3, 0.25)  # p values for Choquet integral
q = np.arange(0.25, 3, 0.25)  # q values for Choquet integral
sim = S2  # similarity measures
choquet_version = 'd_choquet'
verbose = False 
eval_type = 'crossval'  # evaluation type
sim_agent = 'capacity'  # similarity agent for d_Choquet

# Define training loop
best_indexes = []
best_accuracies = []
best_mobius = []
print("Starting training loop...")
start_time = time.time()
i = 0
for p_val in p:
    for q_val in q:
        i += 1
        print(f"Training iteration {i}/{len(p) * len(q)}: p={p_val}, q={q_val}")
        FF_res = fitness_function(
            capacities_list=capacities_list,
            DS=(data, labels),
            sim=sim,
            choquet_version=choquet_version,
            p=p_val,
            q=q_val,
            time_counter=False,
            verbose=verbose,
            eval_type=eval_type,
            sim_agent=sim_agent,
        )
        best_indexes.append(np.argmin(FF_res))
        best_accuracies.append(-FF_res[np.argmin(FF_res)])
        best_mobius.append(lst_mobius[np.argmin(FF_res)])

end_time = time.time()
print(f"Training loop completed in {end_time - start_time:.2f} seconds.")

In [None]:
# Find the best index and accuracy
best_index = np.argmax(best_accuracies)
best_accuracy = best_accuracies[best_index]
print(f"Best index: {best_index}, Best accuracy: {best_accuracy}")

# get best mobius
mobius = best_mobius[best_index]
print(f"Best Möbius measure: {[f'{m.X}: {m.mu:.3f}' for m in mobius]}")

# Evaluate the best capacity
best_accuracy = FuzzLOO(
    DS=(data, labels),
    mu=mobius,
    sim=S1,
    choquet_version='d_choquet',
    p=1,
    q=1,
    time_counter=True
)
print(f"Best accuracy: {best_accuracy:.4f}")

# Plot the best accuracy
plt.figure(figsize=(10, 6))
plt.plot(best_accuracies, marker='o', linestyle='-', color='b')
plt.title("Best Accuracies")
plt.xlabel("Iterations")
plt.ylabel("Accuracy")
plt.grid()
plt.show()

## `Iris` Dataset

In [None]:
# Load data 
iris_data = load_iris()
iris = pd.DataFrame(data=iris_data.data, columns=iris_data.feature_names)
iris['target'] = iris_data.target
data, labels = iris.iloc[:, :-1].values, iris.iloc[:, -1].values
data = batch_norm(np.array(data, dtype=np.float32))
data[:5]

In [None]:
# Generate all possible mobius for training
features = list(range(len(data[0])))    # encode features as integers

nb_total = 5

# Input Mobius (10 mobius currently)
lst_mobius = []
for i in range(nb_total):
    mobius = generate_mobius(features, 2)
    mobius.pop(0)
    lst_mobius.append(mobius)

# Mutate all mobius
mutated = []
for i in range(len(lst_mobius)):
    mobius = lst_mobius[i]
    mutated_mobius = mutate(mobius, mutation_rate=0.8)
    mutated.append(mutated_mobius)

# Add mutated mobius to the list
lst_mobius.extend(mutated)

# Cross over all mobius
crossed_over = []
for i in range(len(lst_mobius)):
    for j in range(i + 1, len(lst_mobius) // 2):
        crossed = crossover(lst_mobius[np.random.randint(len(lst_mobius))], lst_mobius[np.random.randint(len(lst_mobius))])
        crossed_over.append(crossed)

# Add crossed over mobius to the list
lst_mobius.extend(crossed_over)
print(f"Total number of Möbius measures: {len(lst_mobius)}")

# Convert all Möbius measures to capacities
capacities_list = []
for i in range(len(lst_mobius)):
    mobius = lst_mobius[i]
    capacity = mobius_to_capacity(mobius, features)
    capacities_list.append(capacity)    
print('Test Möbius measures completed!')
print(f"Total number of capacities: {len(capacities_list)}")

### Test on Mobïus

In [None]:
# Define hyperparameters
p = np.arange(0.25, 3, 0.25)  # p values for Choquet integral
q = np.arange(0.25, 3, 0.25)  # q values for Choquet integral
sim = S2  # similarity measures
choquet_version = 'linear'
verbose = False 
eval_type = 'crossval'  # evaluation type
sim_agent = 'mobius'  # similarity agent for d_Choquet

# Define training loop
best_indexes = []
best_accuracies = []
best_mobius = []
print("Starting training loop...")
start_time = time.time()
i = 0
for p_val in p:
    for q_val in q:
        i += 1
        print(f"Training iteration {i}/{len(p) * len(q)}: p={p_val}, q={q_val}")
        FF_res = fitness_function(
            capacities_list=lst_mobius,
            DS=(data, labels),
            sim=sim,
            choquet_version=choquet_version,
            p=p_val,
            q=q_val,
            time_counter=False,
            verbose=verbose,
            eval_type=eval_type,
            sim_agent=sim_agent
        )
        best_indexes.append(np.argmin(FF_res))
        best_accuracies.append(-FF_res[np.argmin(FF_res)])
        best_mobius.append(lst_mobius[np.argmin(FF_res)])

end_time = time.time()
print(f"Training loop completed in {end_time - start_time:.2f} seconds.")

In [None]:
# Find the best index and accuracy
best_index = np.argmax(best_accuracies)
best_accuracy = best_accuracies[best_index]
print(f"Best index: {best_index}, Best accuracy: {best_accuracy}")

# get best mobius
mobius = best_mobius[best_index]
print(f"Best Möbius measure: {[f'{m.X}: {m.mu:.3f}' for m in mobius]}")

# Convert mobius to capacity
best_capacity = mobius_to_capacity(mobius, features)
print("\nBest Capacity:")
for c in best_capacity:
    print(f"Capacity of {c.X} is {c.mu:.3f}")

# Evaluate the best capacity
best_accuracy = FuzzLOO(
    DS=(data, labels),
    mu=best_capacity,
    sim=S1,
    choquet_version='d_choquet',
    p=1,
    q=1,
    time_counter=True
)
print(f"Best accuracy: {best_accuracy:.4f}")

# Plot the best accuracy
plt.figure(figsize=(10, 6))
plt.plot(best_accuracies, marker='o', linestyle='-', color='b')
plt.title("Best Accuracies")
plt.xlabel("Iterations")
plt.ylabel("Accuracy")
plt.grid()
plt.show()

### Test on Capacities

In [None]:
# Define hyperparameters
p = np.arange(0.25, 3, 0.25)  # p values for Choquet integral
q = np.arange(0.25, 3, 0.25)  # q values for Choquet integral
sim = S2  # similarity measures
choquet_version = 'd_choquet'
verbose = False 
eval_type = 'crossval'  # evaluation type
sim_agent = 'capacity'  # similarity agent for d_Choquet

# Define training loop
best_indexes = []
best_accuracies = []
best_mobius = []
print("Starting training loop...")
start_time = time.time()
i = 0
for p_val in p:
    for q_val in q:
        i += 1
        print(f"Training iteration {i}/{len(p) * len(q)}: p={p_val}, q={q_val}")
        FF_res = fitness_function(
            capacities_list=lst_mobius,
            DS=(data, labels),
            sim=sim,
            choquet_version=choquet_version,
            p=p_val,
            q=q_val,
            time_counter=False,
            verbose=verbose,
            eval_type=eval_type,
            sim_agent=sim_agent
        )
        best_indexes.append(np.argmin(FF_res))
        best_accuracies.append(-FF_res[np.argmin(FF_res)])
        best_mobius.append(lst_mobius[np.argmin(FF_res)])

end_time = time.time()
print(f"Training loop completed in {end_time - start_time:.2f} seconds.")

In [None]:
# Find the best index and accuracy
best_index = np.argmax(best_accuracies)
best_accuracy = best_accuracies[best_index]
print(f"Best index: {best_index}, Best accuracy: {best_accuracy}")

# get best mobius
mobius = best_mobius[best_index]
print(f"Best Möbius measure: {[f'{m.X}: {m.mu:.3f}' for m in mobius]}")

# Evaluate the best capacity
best_accuracy = FuzzLOO(
    DS=(data, labels),
    mu=mobius,
    sim=S1,
    choquet_version='d_choquet',
    p=1,
    q=1,
    time_counter=True
)
print(f"Best accuracy: {best_accuracy:.4f}")

# Plot the best accuracy
plt.figure(figsize=(10, 6))
plt.plot(best_accuracies, marker='o', linestyle='-', color='b')
plt.title("Best Accuracies")
plt.xlabel("Iterations")
plt.ylabel("Accuracy")
plt.grid()
plt.show()