# Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE, MDS
from sklearn.preprocessing import StandardScaler

from reader import read_csv
from uta import Criterion
from report import calculate_heuristics

# Read data

In [2]:
df, CRITERIA = read_csv("data/s2.csv")
print(CRITERIA)
df

[Criterion(name='g1', type=True, points=2), Criterion(name='g2', type=True, points=2), Criterion(name='g3', type=True, points=2), Criterion(name='g4', type=True, points=2), Criterion(name='g5', type=True, points=2)]


Unnamed: 0,g1,g2,g3,g4,g5
a01,-18342.0,-30.7,-37.2,2.33,3.0
a02,-15335.0,-30.2,-41.6,2.0,2.5
a03,-16973.0,-29.0,-34.9,2.66,2.5
a04,-15460.0,-30.4,-35.8,1.66,1.5
a05,-15131.0,-29.7,-35.6,1.66,1.75
a06,-13841.0,-30.8,-36.5,1.33,2.0
a07,-18971.0,-28.0,-35.6,2.33,2.0
a08,-18319.0,-28.9,-35.3,1.66,2.0
a09,-19800.0,-29.4,-34.7,2.0,1.75
a10,-16966.0,-30.0,-37.7,2.33,3.25


In [3]:
PREFERENCES = [
    ('a08', 'a09'),
    ('a10', 'a03'),
    ('a01', 'a02')
]
N_COMPONENTS = 3

# Calculations

In [None]:
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df)

n_components = [2, 3, 4]
for n in n_components:
    methods = [PCA(n_components=n_components), TSNE(n_components=N_COMPONENTS, perplexity=10), MDS(n_components=N_COMPONENTS)]
    for method in methods:
        df_m = pd.DataFrame(pca.fit_transform(df_scaled), index=df.index, columns=range(n)).map(lambda x: f"{x:.4f}").astype(np.float64)
        CRITERIA = [Criterion(name, points=2) for name in df_m.columns]

# Original dataset

In [4]:
calculate_heuristics(df, PREFERENCES, CRITERIA)

Input to the sampler:
0 1 0 1 0 1 0 0 1 0 1 = 1
0 0 1 0 0 0 0 0 0 0 0 = 0
0 0 0 0 1 0 0 0 0 0 0 = 0
0 0 0 0 0 0 1 0 0 0 0 = 0
0 0 0 0 0 0 0 1 0 0 0 = 0
0 0 0 0 0 0 0 0 0 1 0 = 0
0 -1 1 0 0 0 0 0 0 0 0 <= 0
0 0 0 -1 1 0 0 0 0 0 0 <= 0
0 0 0 0 0 -1 1 0 0 0 0 <= 0
0 0 0 0 0 0 0 1 -1 0 0 <= 0
0 0 0 0 0 0 0 0 0 1 -1 <= 0
-1 0.1977 -0.1977 0.1786 -0.1786 -0.087 0.087 0.2557 -0.2557 -0.125 0.125 >= -0.0
-1 0.0009 -0.0009 -0.3572 0.3572 -0.4058 0.4058 0.2481 -0.2481 -0.375 0.375 >= -0.0
-1 -0.4013 0.4013 -0.1786 0.1786 0.6377 -0.6377 -0.2481 0.2481 -0.25 0.25 >= -0.0
1 0 0 0 0 0 0 0 0 0 0 >= 0.000001


(np.int64(36),
 np.float64(6.785714285714286),
 np.float64(0.1162368131868132),
 np.float64(1.8043142857142855))

# Standarization

In [5]:
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df)

# PCA

In [6]:
pca = PCA(n_components=N_COMPONENTS)
df_pca = pd.DataFrame(pca.fit_transform(df_scaled), index=df.index, columns=range(N_COMPONENTS)).map(lambda x: f"{x:.4f}").astype(np.float64)

Unnamed: 0,0,1,2
a01,0.2439,1.7736,-0.2748
a02,-1.6764,2.3254,-1.2425
a03,1.6384,0.3946,1.0339
a04,-1.7974,-0.964,0.4173
a05,-1.2544,-0.9294,0.6849
a06,-2.8053,-0.2102,0.7608
a07,1.9283,-0.872,-0.3193
a08,0.2463,-1.316,-0.2723
a09,0.7564,-1.4173,-0.4856
a10,0.406,2.1308,0.0992


In [7]:
CRITERIA = [Criterion(name, points=2) for name in df_pca.columns]
calculate_heuristics(df_pca, PREFERENCES, CRITERIA)

Input to the sampler:
0 1 0 1 0 1 0 = 1
0 0 1 0 0 0 0 = 0
0 0 0 0 1 0 0 = 0
0 0 0 0 0 0 1 = 0
0 -1 1 0 0 0 0 <= 0
0 0 0 -1 1 0 0 <= 0
0 0 0 0 0 -1 1 <= 0
-1 -0.1077 0.1077 0.0271 -0.0271 0.0758 -0.0758 >= -0.0
-1 -0.2604 0.2604 0.4639 -0.4639 -0.3318 0.3318 >= -0.0
-1 0.4057 -0.4057 -0.1474 0.1474 0.3435 -0.3435 >= -0.0
1 0 0 0 0 0 0 >= 0.000001


(np.int64(66),
 np.float64(3.2857142857142856),
 np.float64(0.06945054945054946),
 np.float64(1.4326285714285716))

# t-SNE

In [8]:
tsne = TSNE(n_components=N_COMPONENTS, perplexity=10)
df_tsne = pd.DataFrame(tsne.fit_transform(df_scaled), index=df.index, columns=range(N_COMPONENTS)).map(lambda x: f"{x:.4f}").astype(np.float64)
calculate_heuristics(df_tsne, PREFERENCES, CRITERIA)

Input to the sampler:
0 1 0 1 0 1 0 = 1
0 0 1 0 0 0 0 = 0
0 0 0 0 1 0 0 = 0
0 0 0 0 0 0 1 = 0
0 -1 1 0 0 0 0 <= 0
0 0 0 -1 1 0 0 <= 0
0 0 0 0 0 -1 1 <= 0
-1 0.5558 -0.5558 -0.1914 0.1914 -0.2794 0.2794 >= -0.0
-1 0.2231 -0.2231 -0.0325 0.0325 0.3872 -0.3872 >= -0.0
-1 0.4641 -0.4641 -0.3803 0.3803 -0.243 0.243 >= -0.0
1 0 0 0 0 0 0 >= 0.000001


(np.int64(42),
 np.float64(5.857142857142857),
 np.float64(0.12306813186813186),
 np.float64(1.8376785714285715))

# MDS

In [9]:
mds = MDS(n_components=N_COMPONENTS)
df_mds = pd.DataFrame(mds.fit_transform(df_scaled), index=df.index, columns=range(N_COMPONENTS)).map(lambda x: f"{x:.4f}").astype(np.float64)
calculate_heuristics(df_mds, PREFERENCES, CRITERIA)

Input to the sampler:
0 1 0 1 0 1 0 = 1
0 0 1 0 0 0 0 = 0
0 0 0 0 1 0 0 = 0
0 0 0 0 0 0 1 = 0
0 -1 1 0 0 0 0 <= 0
0 0 0 -1 1 0 0 <= 0
0 0 0 0 0 -1 1 <= 0
-1 0.2658 -0.2658 0.0773 -0.0773 -0.0773 0.0773 >= -0.0
-1 -0.5399 0.5399 0.2882 -0.2882 0.0751 -0.0751 >= -0.0
-1 -0.233 0.233 -0.4748 0.4748 -0.0158 0.0158 >= -0.0
1 0 0 0 0 0 0 >= 0.000001
Infeasible system.



(np.int64(182), np.float64(13.0), None, None)