# Imports

In [3]:
from collections import defaultdict

import pandas as pd
import numpy as np
from sklearn.decomposition import PCA, KernelPCA
from sklearn.manifold import TSNE, MDS, Isomap
from sklearn.preprocessing import StandardScaler

from reader.reader import read_csv
from mcda.uta import Criterion
from mcda.report import calculate_heuristics
from methods.autoencoder import AutoencoderModel

# Read data

In [4]:
df, CRITERIA = read_csv("data/s2.csv")
print(CRITERIA)
df

[Criterion(name='g1', type=True, points=2), Criterion(name='g2', type=True, points=2), Criterion(name='g3', type=True, points=2), Criterion(name='g4', type=True, points=2), Criterion(name='g5', type=True, points=2)]


Unnamed: 0,g1,g2,g3,g4,g5
a01,-18342.0,-30.7,-37.2,2.33,3.0
a02,-15335.0,-30.2,-41.6,2.0,2.5
a03,-16973.0,-29.0,-34.9,2.66,2.5
a04,-15460.0,-30.4,-35.8,1.66,1.5
a05,-15131.0,-29.7,-35.6,1.66,1.75
a06,-13841.0,-30.8,-36.5,1.33,2.0
a07,-18971.0,-28.0,-35.6,2.33,2.0
a08,-18319.0,-28.9,-35.3,1.66,2.0
a09,-19800.0,-29.4,-34.7,2.0,1.75
a10,-16966.0,-30.0,-37.7,2.33,3.25


In [6]:
PREFERENCES = [
    ('a08', 'a09'),
    ('a10', 'a03')
]

# Calculations

In [7]:
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df)

available_points = [2, 3, 4]
n_components = [2, 3, 4]

In [8]:
results_original = defaultdict(dict)
for points in available_points:
    print(f"points: {points}, method: original")
    criteria = [Criterion(name, points=points) for name in df.columns]
    f_nec, f_era, f_pwi, f_rai = calculate_heuristics(df, PREFERENCES, criteria)
    results_original['original'][(f"points: {points}", 'f_nec')] = f_nec
    results_original['original'][(f"points: {points}", 'f_era')] = f_era
    results_original['original'][(f"points: {points}", 'f_pwi')] = f_pwi
    results_original['original'][(f"points: {points}", 'f_rai')] = f_rai

points: 2, method: original
points: 3, method: original
points: 4, method: original


In [9]:
df_results_original = pd.DataFrame(results_original)
df_results_original

Unnamed: 0,Unnamed: 1,original
points: 2,f_nec,24.0
points: 2,f_era,8.285714
points: 2,f_pwi,0.151608
points: 2,f_rai,2.097971
points: 3,f_nec,18.0
points: 3,f_era,9.785714
points: 3,f_pwi,0.181546
points: 3,f_rai,2.381436
points: 4,f_nec,10.0
points: 4,f_era,11.5


In [11]:
results = defaultdict(dict)
for points in available_points:
    for n in n_components:
        methods = {
            'PCA': PCA(n_components=n, random_state=42),
            'Autoencoder': AutoencoderModel(encoded_dim=n, epochs=300, batch_size=16),
            'KernelPCA': KernelPCA(n_components=n, random_state=42),
            't-SNE': TSNE(n_components=n, perplexity=10, method='exact', random_state=42),
            'MDS': MDS(n_components=n, random_state=42),
            'Isomap': Isomap(n_components=n)
        }
        for method_name, method in methods.items():
            print(f"points: {points}, components: {n}, method: {method_name}")
            df_m = pd.DataFrame(method.fit_transform(df_scaled), index=df.index, columns=range(n)).map(lambda x: f"{x:.4f}").astype(np.float64)
            criteria = [Criterion(name, points=points) for name in df_m.columns]
            f_nec, f_era, f_pwi, f_rai = calculate_heuristics(df_m, PREFERENCES, criteria)
            results[method_name, f"dims: {n}"][(f"points: {points}", 'f_nec')] = f_nec
            results[method_name, f"dims: {n}"][(f"points: {points}", 'f_era')] = f_era
            results[method_name, f"dims: {n}"][(f"points: {points}", 'f_pwi')] = f_pwi
            results[method_name, f"dims: {n}"][(f"points: {points}", 'f_rai')] = f_rai

points: 2, components: 2, method: PCA
points: 2, components: 2, method: Autoencoder
points: 2, components: 2, method: KernelPCA
points: 2, components: 2, method: t-SNE
points: 2, components: 2, method: MDS
points: 2, components: 2, method: Isomap
points: 2, components: 3, method: PCA
points: 2, components: 3, method: Autoencoder
points: 2, components: 3, method: KernelPCA
points: 2, components: 3, method: t-SNE
points: 2, components: 3, method: MDS
points: 2, components: 3, method: Isomap
points: 2, components: 4, method: PCA
points: 2, components: 4, method: Autoencoder
points: 2, components: 4, method: KernelPCA
points: 2, components: 4, method: t-SNE
points: 2, components: 4, method: MDS
Infeasible system.

points: 2, components: 4, method: Isomap
points: 3, components: 2, method: PCA
points: 3, components: 2, method: Autoencoder
points: 3, components: 2, method: KernelPCA
points: 3, components: 2, method: t-SNE
points: 3, components: 2, method: MDS
points: 3, components: 2, method:

In [12]:
df_results = pd.DataFrame(results)
df_results

Unnamed: 0_level_0,Unnamed: 1_level_0,PCA,Autoencoder,KernelPCA,t-SNE,MDS,Isomap,PCA,Autoencoder,KernelPCA,t-SNE,MDS,Isomap,PCA,Autoencoder,KernelPCA,t-SNE,MDS,Isomap
Unnamed: 0_level_1,Unnamed: 1_level_1,dims: 2,dims: 2,dims: 2,dims: 2,dims: 2,dims: 2,dims: 3,dims: 3,dims: 3,dims: 3,dims: 3,dims: 3,dims: 4,dims: 4,dims: 4,dims: 4,dims: 4,dims: 4
points: 2,f_nec,84.0,75.0,48.0,52.0,49.0,68.0,63.0,31.0,34.0,31.0,53.0,50.0,41.0,32.0,32.0,16.0,182.0,13.0
points: 2,f_era,1.0,2.142857,5.571429,4.785714,5.357143,2.714286,3.571429,6.785714,7.357143,7.214286,4.642857,4.857143,6.0,7.142857,7.571429,8.857143,13.0,9.5
points: 2,f_pwi,0.019905,0.03881,0.172038,0.14286,0.176436,0.061366,0.08979,0.144171,0.181535,0.16821,0.092192,0.088232,0.119285,0.159816,0.158312,0.248183,,0.193152
points: 2,f_rai,0.472557,0.8865,2.048643,1.969907,2.23065,0.958893,1.5667,1.971929,2.34345,2.102757,1.622971,1.457786,1.913171,2.052679,2.216036,2.469643,,2.437693
points: 3,f_nec,62.0,62.0,48.0,43.0,49.0,50.0,39.0,18.0,24.0,29.0,39.0,25.0,30.0,13.0,15.0,11.0,35.0,11.0
points: 3,f_era,3.714286,3.785714,6.642857,7.285714,6.071429,6.142857,7.428571,9.5,9.785714,8.571429,7.071429,9.214286,9.0,10.285714,10.5,10.5,7.571429,11.285714
points: 3,f_pwi,0.096811,0.079618,0.163325,0.155192,0.156611,0.08909,0.150353,0.208332,0.209062,0.195608,0.147762,0.142399,0.180759,0.211025,0.168929,0.238848,0.102684,0.22206
points: 3,f_rai,1.426514,1.4384,2.073679,2.20655,2.24075,1.405193,2.072636,2.5077,2.551407,2.371657,2.117664,1.986807,2.394243,2.545329,2.325021,2.60255,1.744586,2.6133
points: 4,f_nec,59.0,62.0,48.0,43.0,49.0,48.0,42.0,10.0,23.0,28.0,32.0,22.0,29.0,12.0,13.0,10.0,26.0,12.0
points: 4,f_era,4.214286,4.714286,7.357143,8.214286,7.357143,6.714286,7.357143,11.285714,10.214286,9.785714,8.642857,10.214286,9.928571,11.142857,11.285714,11.5,9.071429,11.285714


# Original dataset

In [4]:
calculate_heuristics(df, PREFERENCES, CRITERIA)

Input to the sampler:
0 1 0 1 0 1 0 0 1 0 1 = 1
0 0 1 0 0 0 0 0 0 0 0 = 0
0 0 0 0 1 0 0 0 0 0 0 = 0
0 0 0 0 0 0 1 0 0 0 0 = 0
0 0 0 0 0 0 0 1 0 0 0 = 0
0 0 0 0 0 0 0 0 0 1 0 = 0
0 -1 1 0 0 0 0 0 0 0 0 <= 0
0 0 0 -1 1 0 0 0 0 0 0 <= 0
0 0 0 0 0 -1 1 0 0 0 0 <= 0
0 0 0 0 0 0 0 1 -1 0 0 <= 0
0 0 0 0 0 0 0 0 0 1 -1 <= 0
-1 0.1977 -0.1977 0.1786 -0.1786 -0.087 0.087 0.2557 -0.2557 -0.125 0.125 >= -0.0
-1 0.0009 -0.0009 -0.3572 0.3572 -0.4058 0.4058 0.2481 -0.2481 -0.375 0.375 >= -0.0
-1 -0.4013 0.4013 -0.1786 0.1786 0.6377 -0.6377 -0.2481 0.2481 -0.25 0.25 >= -0.0
1 0 0 0 0 0 0 0 0 0 0 >= 0.000001


(np.int64(36),
 np.float64(6.785714285714286),
 np.float64(0.1162368131868132),
 np.float64(1.8043142857142855))

# Standarization

In [5]:
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df)

# PCA

In [6]:
pca = PCA(n_components=N_COMPONENTS)
df_pca = pd.DataFrame(pca.fit_transform(df_scaled), index=df.index, columns=range(N_COMPONENTS)).map(lambda x: f"{x:.4f}").astype(np.float64)

Unnamed: 0,0,1,2
a01,0.2439,1.7736,-0.2748
a02,-1.6764,2.3254,-1.2425
a03,1.6384,0.3946,1.0339
a04,-1.7974,-0.964,0.4173
a05,-1.2544,-0.9294,0.6849
a06,-2.8053,-0.2102,0.7608
a07,1.9283,-0.872,-0.3193
a08,0.2463,-1.316,-0.2723
a09,0.7564,-1.4173,-0.4856
a10,0.406,2.1308,0.0992


In [7]:
CRITERIA = [Criterion(name, points=2) for name in df_pca.columns]
calculate_heuristics(df_pca, PREFERENCES, CRITERIA)

Input to the sampler:
0 1 0 1 0 1 0 = 1
0 0 1 0 0 0 0 = 0
0 0 0 0 1 0 0 = 0
0 0 0 0 0 0 1 = 0
0 -1 1 0 0 0 0 <= 0
0 0 0 -1 1 0 0 <= 0
0 0 0 0 0 -1 1 <= 0
-1 -0.1077 0.1077 0.0271 -0.0271 0.0758 -0.0758 >= -0.0
-1 -0.2604 0.2604 0.4639 -0.4639 -0.3318 0.3318 >= -0.0
-1 0.4057 -0.4057 -0.1474 0.1474 0.3435 -0.3435 >= -0.0
1 0 0 0 0 0 0 >= 0.000001


(np.int64(66),
 np.float64(3.2857142857142856),
 np.float64(0.06945054945054946),
 np.float64(1.4326285714285716))

# t-SNE

In [8]:
tsne = TSNE(n_components=N_COMPONENTS, perplexity=10)
df_tsne = pd.DataFrame(tsne.fit_transform(df_scaled), index=df.index, columns=range(N_COMPONENTS)).map(lambda x: f"{x:.4f}").astype(np.float64)
calculate_heuristics(df_tsne, PREFERENCES, CRITERIA)

Input to the sampler:
0 1 0 1 0 1 0 = 1
0 0 1 0 0 0 0 = 0
0 0 0 0 1 0 0 = 0
0 0 0 0 0 0 1 = 0
0 -1 1 0 0 0 0 <= 0
0 0 0 -1 1 0 0 <= 0
0 0 0 0 0 -1 1 <= 0
-1 0.5558 -0.5558 -0.1914 0.1914 -0.2794 0.2794 >= -0.0
-1 0.2231 -0.2231 -0.0325 0.0325 0.3872 -0.3872 >= -0.0
-1 0.4641 -0.4641 -0.3803 0.3803 -0.243 0.243 >= -0.0
1 0 0 0 0 0 0 >= 0.000001


(np.int64(42),
 np.float64(5.857142857142857),
 np.float64(0.12306813186813186),
 np.float64(1.8376785714285715))

# MDS

In [9]:
mds = MDS(n_components=N_COMPONENTS)
df_mds = pd.DataFrame(mds.fit_transform(df_scaled), index=df.index, columns=range(N_COMPONENTS)).map(lambda x: f"{x:.4f}").astype(np.float64)
calculate_heuristics(df_mds, PREFERENCES, CRITERIA)

Input to the sampler:
0 1 0 1 0 1 0 = 1
0 0 1 0 0 0 0 = 0
0 0 0 0 1 0 0 = 0
0 0 0 0 0 0 1 = 0
0 -1 1 0 0 0 0 <= 0
0 0 0 -1 1 0 0 <= 0
0 0 0 0 0 -1 1 <= 0
-1 0.2658 -0.2658 0.0773 -0.0773 -0.0773 0.0773 >= -0.0
-1 -0.5399 0.5399 0.2882 -0.2882 0.0751 -0.0751 >= -0.0
-1 -0.233 0.233 -0.4748 0.4748 -0.0158 0.0158 >= -0.0
1 0 0 0 0 0 0 >= 0.000001
Infeasible system.



(np.int64(182), np.float64(13.0), None, None)