In [1]:
import pandas as pd
import numpy as np
from typing import Union

# To do
## Pareto Script
* <input type="checkbox"/> function
* <input type="checkbox"/> test
* <input type="checkbox"/> reporting (pdf)


In [4]:
def find_pareto_optimal_points(
    data: Union[pd.DataFrame, np.ndarray],
    accuracy_minimise: bool = True,
    fairness_minimise: bool = True
) -> Union[pd.DataFrame, np.ndarray]:
    """
    Find Pareto-optimal points in the given dataset based on user preferences.

    Parameters:
    - data (Union[pd.DataFrame, np.ndarray]): 2D data with two columns representing accuracy and fairness scores.
    - accuracy_minimise (bool): Whether to minimize the first objective (accuracy).
    - fairness_minimise (bool): Whether to minimize the second objective (fairness score).

    Returns:
    - Union[pd.DataFrame, np.ndarray]: Pareto-optimal points from the input data.

    This function calculates the Pareto-optimal points in the dataset by comparing pairs of scores based on
    user preferences for minimization or maximization of objectives.
    """

    if isinstance(data, pd.DataFrame):
        data = data.to_numpy()

    # creates an empty list which will be appended later on
    pareto_points = []

    # iterating across accuracy and fairness
    for i, (accuracy_1, fair_1) in enumerate(data):
        # assume True until proven otherwise
        is_pareto = True

        # iterate across other data points/rows, j, to compare against current data point/row, i.
        for j, (accuracy_2, fair_2) in enumerate(data):
            if i == j:
                continue  # Skip self-comparison

            # Check Pareto dominance based on user preferences: minimise or maximise
            if (
                (accuracy_1 < accuracy_2 if accuracy_minimise else accuracy_1 > accuracy_2) and
                (fair_1 < fair_2 if fairness_minimise else fair_1 > fair_2)
            ):
                is_pareto = False
                break

        if is_pareto:
            pareto_points.append((accuracy_1, fair_1))

    return np.array(pareto_points)


In [40]:
# Sample data as a Pandas DataFrame
data = pd.DataFrame({
    'Accuracy': [0.9, 0.85, 0.92, 0.88, 0.94, 0.91, 0.91, 0.81, 0.87, 0.83],
    'Fairness': [0.75, 0.88, 0.81, 0.69, 0.82, 0.77, 0.69, 0.8, 0.83, 0.79]
})
data

Unnamed: 0,Accuracy,Fairness
0,0.9,0.75
1,0.85,0.88
2,0.92,0.81
3,0.88,0.69
4,0.94,0.82
5,0.91,0.77
6,0.91,0.69
7,0.81,0.8
8,0.87,0.83
9,0.83,0.79


In [41]:
# Find Pareto-optimal points with minimization of both objectives
pareto_points = find_pareto_optimal_points(data, accuracy_minimise=True, fairness_minimise=True)

print("Pareto-optimal points with minimization of both objectives:")
print(pareto_points)

Pareto-optimal points with minimization of both objectives:
[[0.85 0.88]
 [0.94 0.82]
 [0.87 0.83]]


In [42]:
# Find Pareto-optimal points with maximization of both objectives
pareto_points_max = find_pareto_optimal_points(data, accuracy_minimise=False, fairness_minimise=False)

print("\nPareto-optimal points with maximization of both objectives:")
print(pareto_points_max)


Pareto-optimal points with maximization of both objectives:
[[0.88 0.69]
 [0.91 0.69]
 [0.81 0.8 ]
 [0.83 0.79]]


In [35]:
from typing import Union, List, Dict, Tuple

def find_pareto_optimal_points_with_explanations(
    data: Union[pd.DataFrame, np.ndarray],
    accuracy_minimise: bool = True,
    fairness_minimise: bool = True
) -> Tuple[Union[pd.DataFrame, np.ndarray], List[Dict[str, str]]]:
    """
    Find Pareto-optimal points in the given dataset based on user preferences and provide explanations.

    Parameters:
    - data (Union[pd.DataFrame, np.ndarray]): 2D data with two columns representing accuracy and fairness scores.
    - accuracy_minimise (bool): Whether to minimize the first objective (accuracy).
    - fairness_minimise (bool): Whether to minimize the second objective (fairness score).

    Returns:
    - Tuple[Union[pd.DataFrame, np.ndarray], List[Dict[str, str]]]:
        - Pareto-optimal points from the input data.
        - List of explanations for each Pareto-optimal point.

    This function calculates the Pareto-optimal points in the dataset by comparing pairs of scores based on
    user preferences for minimization or maximization of objectives. It provides explanations for why each
    point is considered Pareto-optimal.
    """

    if isinstance(data, pd.DataFrame):
        data = data.to_numpy()

    pareto_points = []
    explanations = []

    for i, (accuracy_1, fair_1) in enumerate(data):
        is_pareto = True
        explanation = {
            'Point': f'Point {i + 1}',
            'Explanation': 'This point satisfies the following criteria:'
        }

        for j, (accuracy_2, fair_2) in enumerate(data):
            if i == j:
                continue  # Skip self-comparison

            # Check Pareto dominance based on user preferences
            if (
                (accuracy_1 < accuracy_2 if accuracy_minimise else accuracy_1 > accuracy_2) and
                (fair_1 < fair_2 if fairness_minimise else fair_1 > fair_2)
            ):
                is_pareto = False
                break
            else:
                explanation[f'Criteria {j + 1}'] = f'{accuracy_1=:.2f} vs. {accuracy_2=:.2f}, {fair_1=:.2f} vs. {fair_2=:.2f}'

        if is_pareto:
            pareto_points.append((accuracy_1, fair_1))
            explanations.append(explanation)

    return np.array(pareto_points), explanations


In [36]:
pareto_points, explanations = find_pareto_optimal_points_with_explanations(
    data, accuracy_minimise=True, fairness_minimise=True)

for point, explanation in zip(pareto_points, explanations):
    print(f"Pareto-optimal point: {point}")
    print("Explanation:")
    for key, value in explanation.items():
        if key != 'Point':
            print(f"  {key}: {value}")
    print()


Pareto-optimal point: [0.85 0.88]
Explanation:
  Explanation: This point satisfies the following criteria:
  Criteria 1: accuracy_1=0.85 vs. accuracy_2=0.90, fair_1=0.88 vs. fair_2=0.75
  Criteria 3: accuracy_1=0.85 vs. accuracy_2=0.92, fair_1=0.88 vs. fair_2=0.81
  Criteria 4: accuracy_1=0.85 vs. accuracy_2=0.88, fair_1=0.88 vs. fair_2=0.69
  Criteria 5: accuracy_1=0.85 vs. accuracy_2=0.94, fair_1=0.88 vs. fair_2=0.82
  Criteria 6: accuracy_1=0.85 vs. accuracy_2=0.91, fair_1=0.88 vs. fair_2=0.77
  Criteria 7: accuracy_1=0.85 vs. accuracy_2=0.91, fair_1=0.88 vs. fair_2=0.69
  Criteria 8: accuracy_1=0.85 vs. accuracy_2=0.81, fair_1=0.88 vs. fair_2=0.80
  Criteria 9: accuracy_1=0.85 vs. accuracy_2=0.87, fair_1=0.88 vs. fair_2=0.83
  Criteria 10: accuracy_1=0.85 vs. accuracy_2=0.83, fair_1=0.88 vs. fair_2=0.79

Pareto-optimal point: [0.94 0.82]
Explanation:
  Explanation: This point satisfies the following criteria:
  Criteria 1: accuracy_1=0.94 vs. accuracy_2=0.90, fair_1=0.82 vs. fair_2

In [37]:
from typing import Union, List, Dict, Tuple

def find_pareto_optimal_points_with_explanations(
    data: Union[pd.DataFrame, np.ndarray],
    objective1_minimize: bool = True,
    objective2_minimize: bool = True
) -> Tuple[Union[pd.DataFrame, np.ndarray], List[str]]:
    """
    Find Pareto-optimal points in the given dataset based on user preferences and provide explanations.

    Parameters:
    - data (Union[pd.DataFrame, np.ndarray]): 2D data with two columns representing accuracy and fairness scores.
    - objective1_minimize (bool): Whether to minimize the first objective (accuracy).
    - objective2_minimize (bool): Whether to minimize the second objective (fairness score).

    Returns:
    - Tuple[Union[pd.DataFrame, np.ndarray], List[str]]:
        - Pareto-optimal points from the input data.
        - List of explanations for each Pareto-optimal point.

    This function calculates the Pareto-optimal points in the dataset by comparing pairs of scores based on
    user preferences for minimization or maximization of objectives. It provides explanations for why each
    point is considered Pareto-optimal.
    """

    if isinstance(data, pd.DataFrame):
        data = data.to_numpy()

    pareto_points = []
    explanations = []

    for i, (acc1, fair1) in enumerate(data):
        is_pareto = True
        explanation = []

        for j, (acc2, fair2) in enumerate(data):
            if i == j:
                continue  # Skip self-comparison

            acc1_better = (acc1 < acc2 if objective1_minimize else acc1 > acc2)
            fair1_better = (fair1 < fair2 if objective2_minimize else fair1 > fair2)

            if acc1_better and fair1_better:
                explanation.append(f'Point {i + 1} ({acc1}, {fair1}) is better than Point {j + 1} in both accuracy ({acc2}) and fairness ({fair2}).')
            elif acc1_better:
                explanation.append(f'Point {i + 1} is better than Point {j + 1} in accuracy ({acc2}).')
            elif fair1_better:
                explanation.append(f'Point {i + 1} is better than Point {j + 1} in fairness ({fair2}).')

            if not (acc1_better or fair1_better):
                is_pareto = False

        if is_pareto:
            pareto_points.append((acc1, fair1))
            explanations.append('\n'.join(explanation))

    return np.array(pareto_points), explanations


In [43]:
# import numpy as np
# import pandas as pd

# # Sample data as a Pandas DataFrame
# data = pd.DataFrame({
#     'Accuracy': [0.9, 0.85, 0.92, 0.88, 0.94],
#     'Fairness': [0.95, 0.88, 0.91, 0.89, 0.92]
# })

# Find Pareto-optimal points with minimization of both objectives and get explanations
pareto_points, explanations = find_pareto_optimal_points_with_explanations(
    data, objective1_minimize=True, objective2_minimize=True)

print("-------------------------------------------------------------")
print("Pareto-optimal points with minimization of both objectives:")
print("-------------------------------------------------------------")
for i, (point, explanation) in enumerate(zip(pareto_points, explanations)):
    print(f"Point {i + 1}: {point}")
    print("Explanation:")
    print(explanation)
    print("**************************************************************")
    print()

# Find Pareto-optimal points with maximization of both objectives and get explanations
pareto_points_max, explanations_max = find_pareto_optimal_points_with_explanations(
    data, objective1_minimize=False, objective2_minimize=False)

print("-------------------------------------------------------------")
print("Pareto-optimal points with maximization of both objectives:")
print("-------------------------------------------------------------")
for i, (point, explanation) in enumerate(zip(pareto_points_max, explanations_max)):
    print(f"Point {i + 1}: {point}")
    print("Explanation:")
    print(explanation)
    print("**************************************************************")
    print()


-------------------------------------------------------------
Pareto-optimal points with minimization of both objectives:
-------------------------------------------------------------
Point 1: [0.88 0.69]
Explanation:
Point 4 (0.88, 0.69) is better than Point 1 in both accuracy (0.9) and fairness (0.75).
Point 4 is better than Point 2 in fairness (0.88).
Point 4 (0.88, 0.69) is better than Point 3 in both accuracy (0.92) and fairness (0.81).
Point 4 (0.88, 0.69) is better than Point 5 in both accuracy (0.94) and fairness (0.82).
Point 4 (0.88, 0.69) is better than Point 6 in both accuracy (0.91) and fairness (0.77).
Point 4 is better than Point 7 in accuracy (0.91).
Point 4 is better than Point 8 in fairness (0.8).
Point 4 is better than Point 9 in fairness (0.83).
Point 4 is better than Point 10 in fairness (0.79).
**************************************************************

Point 2: [0.81 0.8 ]
Explanation:
Point 8 is better than Point 1 in accuracy (0.9).
Point 8 (0.81, 0.8) is b