In [51]:
import argparse
from collections import defaultdict
from pathlib import Path
from typing import List, Tuple

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from joblib import Parallel, delayed
from sklearn.decomposition import PCA, KernelPCA
from sklearn.manifold import Isomap, LocallyLinearEmbedding
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

from mcda.dataset import MCDADataset
from mcda.report import calculate_heuristics
from mcda.uta import Criterion, check_uta_feasibility
from methods.mvu import MaximumVarianceUnfolding

In [111]:
dataset = MCDADataset.read_csv("./data/regions-italy/dataset.csv")
df = dataset.data

In [113]:
def is_dominating(a_values: List[float], b_values: List[float]) -> bool:
    return all(a >= b for a, b in zip(a_values, b_values)) and any(a > b for a, b in zip(a_values, b_values))

domination_org = []
for alt_1 in df.index:
    for alt_2 in df.index:
        if alt_1 == alt_2:
            continue

        if is_dominating(df.loc[alt_1, :].tolist(), df.loc[alt_2, :].tolist()):
            print(alt_1, " > ", alt_2)
            domination_org.append((alt_1, alt_2))

print(f"Oryginalnie wszystkich dominacji: {len(domination_org)}")

N = 2
methods = {
    "PCA": Pipeline([("scaler", StandardScaler()), ("pca", PCA(n_components=N, random_state=42))]),
    "KernelPCA": Pipeline([("scaler", StandardScaler()), ("kpca", KernelPCA(n_components=N, random_state=42))]),
    "LLE": Pipeline(
        [("scaler", StandardScaler()), ("lle", LocallyLinearEmbedding(n_components=N, random_state=42))]
    ),
    "Isomap": Pipeline([("scaler", StandardScaler()), ("isomap", Isomap(n_components=N))]),
    "MVU": Pipeline([("scaler", StandardScaler()), ("mvu", MaximumVarianceUnfolding(n_components=N, seed=42))]),
}

pipeline = Pipeline([("scaler", StandardScaler()), ("pca", PCA(n_components=2, random_state=42))])

for method_name, method in methods.items():
    df_m = pd.DataFrame(method.fit_transform(df), index=df.index, columns=[f"Dim {i+1}" for i in range(N)])
    counter = 0
    for alt_1, alt_2 in domination_org:
        if not is_dominating(df_m.loc[alt_1, :].tolist(), df_m.loc[alt_2, :].tolist()):
            # print("!!!", alt_1, " > ", alt_2)
            counter += 1
    print(f"{method_name.ljust(12, ' ')}niespełnionych dominacji: {counter}")

Villa Castelli  >  San Vito dei Normanni
Latiano  >  Ceglie Messapica
Latiano  >  Cisternino
Latiano  >  Ostuni
San Michele Salentino  >  Francavilla Fontana
San Michele Salentino  >  Ceglie Messapica
San Michele Salentino  >  Cisternino
San Michele Salentino  >  San Vito dei Normanni
San Michele Salentino  >  Ostuni
Oria  >  Francavilla Fontana
Oria  >  Ceglie Messapica
Oria  >  Cisternino
Oria  >  Ostuni
Francavilla Fontana  >  Ostuni
Carovigno  >  Ostuni
Torchiarolo  >  Mesagne
San Pietro Vernotico  >  Mesagne
Oryginalnie wszystkich dominacji: 17
PCA         niespełnionych dominacji: 0
KernelPCA   niespełnionych dominacji: 17
LLE         niespełnionych dominacji: 17
Isomap      niespełnionych dominacji: 14
MVU         niespełnionych dominacji: 10
