In [1]:
# imports
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import itertools
import math 
import statistics
from itertools import permutations
import json
import time

In [2]:
cars = pd.read_csv("data/cars/cars.csv", index_col=0)

In [3]:
cars = cars[cars.horsepower != "?"]

## FS1

In [4]:
def isfloat(num):
    try:
        float(num)
        return True
    except ValueError:
        return False

def calculate_FS1_for_each_column(df,group_label):
    
    col = df.columns
    colu = []
    for c in col:
        if (isfloat(df[c][0])):
            colu.append(c)
            
    for c in colu:
        df[c] = df[c].astype(float)
    
    string_columns_float = list(df.select_dtypes(exclude=['object']).columns)
    string_columns_object = list(df.select_dtypes(include=['object']).columns)
    
    # print(f"Group label is: {group_label}")
    # print(f"Group label has {len(df[group_label].unique())} unique values")
    no_of_datasets = len(df[group_label].unique())
    df = df.drop(labels=string_columns_object,axis=1)
    list_of_datasets = []
    origin_dict = {}
    dict_of_mean_values = {} #key is the origin and the values are list of means for each column of a datas

    #creating separated datasetst for each group
    for count,value in enumerate(df[group_label].unique()):
        list_of_datasets.append( df[df[group_label] == value])
        list_of_datasets[count] = list_of_datasets[count].reset_index()
    
    
    #drop origin column for each group and save it as a key in dictionary where 
    #the value of the key will be the corresponding dataset
    
    for dataset in list_of_datasets:
        
        group = int(dataset[group_label].unique()[0])
        
        origin_dict[group] = dataset.drop(labels=[group_label],axis=1)
        
        columns = origin_dict[group].columns
        scaler = MinMaxScaler()
        scaler.fit(origin_dict[group])
        origin_dict[group] = scaler.transform(origin_dict[group])
        origin_dict[group] = pd.DataFrame(origin_dict[group], columns = columns)
        origin_dict[group] = origin_dict[group].drop(labels=["index"] ,axis=1)
        dict_of_mean_values[group] = [origin_dict[group][c].mean() for c in origin_dict[group].columns]
        
    #calculation of diff and QDS between each combination of the groups.
    diff = []

    for key1, key2 in itertools.combinations(dict_of_mean_values.keys(), 2):
        diff.append(abs(np.subtract(np.array(dict_of_mean_values[key1]),np.array(dict_of_mean_values[key2]))))
            
    #creation of the matrix W
    D = origin_dict[1].shape[1]
    W = np.ones((D, D))
    
    for row in range(len(W)):
        for col in range(len(W[0])):
            dij = abs(row-col)
            W[row][col] = 1 - (dij/(D-1))
            
    #calculating QFD
    QFD = []
    for count,d in enumerate(diff):
        QFD.append(math.sqrt(np.dot(np.dot(d,W),np.transpose(d))))
        
    tds = statistics.mean(QFD)
    
    return tds

In [5]:
print(calculate_FS1_for_each_column(cars,"origin"))

0.6268655640398301


# FS2

In [6]:
# FS2 funkcija

def calculate_FS2_for_each_column(df, group_label):
    
    col = df.columns
    colu = []
    for c in col:
        if (isfloat(df[c][0])):
            colu.append(c)
            
    for c in colu:
        df[c] = df[c].astype(float)
    
    string_columns_float = list(df.select_dtypes(exclude=['object']).columns)
    string_columns_object = list(df.select_dtypes(include=['object']).columns)
    
    # print(f"Group label is: {group_label}")
    # print(f"Group label has {len(df[group_label].unique())} unique values")
    no_of_datasets = len(df[group_label].unique())
    df = df.drop(labels=string_columns_object,axis=1)
    list_of_datasets = []
    origin_dict = {}
    dict_of_mean_values = {} #key is the origin and the values are list of means for each column of a datas
    dict_of_std_values_minus = {}
    dict_of_std_values_plus = {}
    final_dict = {}

    #creating separated datasetst for each group
    for count,value in enumerate(df[group_label].unique()):
        list_of_datasets.append( df[df[group_label] == value])
        list_of_datasets[count] = list_of_datasets[count].reset_index()
    
    
    #drop origin column for each group and save it as a key in dictionary where 
    #the value of the key will be the corresponding dataset
    
    for dataset in list_of_datasets:
        
        group = int(dataset[group_label].unique()[0])
        
        origin_dict[group] = dataset.drop(labels=[group_label],axis=1)
        
        columns = origin_dict[group].columns
        scaler = MinMaxScaler()
        scaler.fit(origin_dict[group])
        origin_dict[group] = scaler.transform(origin_dict[group])
        origin_dict[group] = pd.DataFrame(origin_dict[group], columns = columns)
        origin_dict[group] = origin_dict[group].drop(labels=["index"] ,axis=1)
        dict_of_std_values_minus[group] = [origin_dict[group][c].mean() - origin_dict[group][c].std() for c in origin_dict[group].columns]
        dict_of_mean_values[group] = [origin_dict[group][c].mean() for c in origin_dict[group].columns]
        dict_of_std_values_plus[group] = [origin_dict[group][c].mean() + origin_dict[group][c].std() for c in origin_dict[group].columns]
        matrix = np.array([dict_of_std_values_minus[group], dict_of_mean_values[group], dict_of_std_values_plus[group]])
        final_dict[group] = matrix
        
    #calculation of diff and QDS between each combination of the groups.
    diff = []

    for key1, key2 in itertools.combinations(final_dict.keys(), 2):
        diff.append(abs(np.subtract(np.array(final_dict[key1]),np.array(final_dict[key2]))))
            
    #creation of the matrix W
    D = origin_dict[1].shape[1]
    W = np.ones((D, D))
        
    for row in range(len(W)):
        for col in range(len(W[0])):
            dij = abs(row-col)
            W[row][col] = 1 - (dij/(D-1))
            
    # print(diff)
                        
    #calculating QFD
    QFD = []
    for count,d in enumerate(diff):
        # print(d.shape)
        # print(W.shape)
        QFD.append(np.sqrt(np.dot(np.dot(d,W),np.transpose(d))))
            
    TDS = sum(QFD)/len(QFD)
    TDS = sum(TDS)/len(TDS)
    TDS = sum(TDS)/len(TDS)
    
    return TDS

In [7]:
print(calculate_FS2_for_each_column(cars,"origin"))

0.625254807610549


# FS3 

In [8]:
# FS3 funkcija

def calc_hist(numbers):
    range_0_2 = 0
    range_2_4 = 0
    range_4_6 = 0
    range_6_8 = 0
    range_8_10 = 0

    # Iterate through the numbers and count occurrences in each range
    for number in numbers:
        if 0 <= number < 0.2:
            range_0_2 += 1
        elif 0.2 <= number < 0.4:
            range_2_4 += 1
        elif 0.4 <= number < 0.6:
            range_4_6 += 1
        elif 0.6 <= number < 0.8:
            range_6_8 += 1
        elif 0.8 <= number <= 1.0:
            range_8_10 += 1

    ranges = [range_0_2, range_2_4, range_4_6, range_6_8, range_8_10]
    
    return(np.array(ranges))

def calculate_FS3_for_each_column(df, group_label):
    
    col = df.columns
    colu = []
    for c in col:
        if (isfloat(df[c][0])):
            colu.append(c)
            
    for c in colu:
        df[c] = df[c].astype(float)
    
    string_columns_float = list(df.select_dtypes(exclude=['object']).columns)
    string_columns_object = list(df.select_dtypes(include=['object']).columns)
    
    # print(f"Group label is: {group_label}")
    # print(f"Group label has {len(df[group_label].unique())} unique values")
    no_of_datasets = len(df[group_label].unique())
    df = df.drop(labels=string_columns_object,axis=1)
    list_of_datasets = []
    origin_dict = {}
    final_dict = {}

    #creating separated datasetst for each group
    for count,value in enumerate(df[group_label].unique()):
        list_of_datasets.append( df[df[group_label] == value])
        list_of_datasets[count] = list_of_datasets[count].reset_index()
    
    
    #drop origin column for each group and save it as a key in dictionary where 
    #the value of the key will be the corresponding dataset
    
    for dataset in list_of_datasets:
        
        group = int(dataset[group_label].unique()[0])
        
        origin_dict[group] = dataset.drop(labels=[group_label],axis=1)
        
        columns = origin_dict[group].columns
        scaler = MinMaxScaler()
        scaler.fit(origin_dict[group])
        origin_dict[group] = scaler.transform(origin_dict[group])
        origin_dict[group] = pd.DataFrame(origin_dict[group], columns = columns)
        origin_dict[group] = origin_dict[group].drop(labels=["index"] ,axis=1)

        final_dict[group] = [(calc_hist(origin_dict[group][c]) / origin_dict[group][c].shape[0]) for c in origin_dict[group].columns]
        
    #calculation of diff and QDS between each combination of the groups.
    diff = []

    for key1, key2 in itertools.combinations(final_dict.keys(), 2):
        diff.append(abs(np.subtract(np.array(final_dict[key1]),np.array(final_dict[key2]))))
            
    # print(diff)
    
    #creation of the matrix W
    D = len(final_dict[1])
    W = np.ones((D, D))
        
    for row in range(len(W)):
        for col in range(len(W[0])):
            dij = abs(row-col)
            W[row][col] = 1 - (dij/(D-1))
                        
    #calculating QFD
    QFD = []
    for count,d in enumerate(diff):
        # print(W.shape)
        # print(d.shape)
        # QFD.append(np.sqrt(np.dot(np.dot(d,W),np.transpose(d))))
        QFD.append(np.sqrt(np.dot(np.dot(np.transpose(d),W),d)))
        
    TDS = sum(QFD)/len(QFD)
    TDS = sum(TDS)/len(TDS)
    TDS = sum(TDS)/len(TDS)
    
    return TDS

In [9]:
print(calculate_FS3_for_each_column(cars,"origin"))

0.6213322709152201


# Run the combinations for cars - fs1

In [10]:
cars = pd.read_csv("data/cars/cars.csv", index_col=0)

In [11]:
cars = cars[cars.horsepower != "?"]
df = cars.copy()

In [12]:
cars.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car_name
0,18.0,8,307.0,130.0,3504.0,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693.0,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150.0,3436.0,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150.0,3433.0,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140.0,3449.0,10.5,70,1,ford torino


In [13]:
og_order = df.columns.tolist()

In [14]:
col_keys = [i for i in range(0, len(df.columns.tolist()))]

In [15]:
tds_list = []
permutations_list = list(permutations(col_keys))
permutations_list = [list(tup) for tup in permutations_list]

# permutations_list = permutations_list[:100]

In [16]:
permutations_list_names = permutations_list.copy()

In [17]:
for i in range(len(permutations_list)):
    permutations_list_names[i] = [og_order[index] for index in permutations_list[i]]

In [22]:
total_time = 0

for i in range(len(permutations_list)):
    start_time = time.time()

    subset = df.iloc[:, permutations_list[i]]
    tds_list.append(calculate_FS1_for_each_column(subset,"origin"))
    
    end_time = time.time()
    total_time += (end_time - start_time)

    average_time = total_time / (i + 1)

    remaining_time = average_time * (len(permutations_list) - i)

    # Print remaining time dynamically
    print(f"Remaining time: {remaining_time:.2f}s", end="\r")

Remaining time: 3338.92s

KeyboardInterrupt: 

In [239]:
# the resulting dictionary along with the actual column names
result_dict_cols = {
    index: {"columns_order": permutations_list_names[index], "tds": tds_list[index]}
    for index, _ in enumerate(permutations_list_names)
}

# the resulting dictionary along with the indexes of the column names as was ordered in the original ordering
result_dict_indexes = {
    index: {"columns_order": permutations_list[index], "tds": tds_list[index]}
    for index, _ in enumerate(permutations_list)
}

In [242]:
# result_dict_cols

In [244]:
# result_dict_indexes

In [None]:
file_path = 'results/cars_results.json'

with open(file_path, "w") as json_file:
    json.dump(result_dict_cols, json_file)

# Run the combinations for glass data set - fs2

In [343]:
glass = pd.read_csv("data/glass/glass.csv", index_col=0)

In [344]:
df = glass.copy()
df.drop('id', axis=1, inplace=True)

In [345]:
df.head()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,type_of_glass
0,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
1,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1
2,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,1
3,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,1
4,1.51596,12.79,3.61,1.62,72.97,0.64,8.07,0.0,0.26,1


In [346]:
og_order = df.columns.tolist()

In [347]:
col_keys = [i for i in range(0, len(df.columns.tolist()))]

In [348]:
tds_list = []
permutations_list = list(permutations(col_keys))
permutations_list = [list(tup) for tup in permutations_list]

permutations_list = permutations_list[:100]

In [349]:
permutations_list_names = permutations_list.copy()

In [350]:
for i in range(len(permutations_list)):
    permutations_list_names[i] = [og_order[index] for index in permutations_list[i]]

In [352]:
for i in range(len(permutations_list)):
    cols_ordering = permutations_list[i]
    subset = df.iloc[:, cols_ordering]
    tds_list.append(calculate_FS2_for_each_column(subset,"type_of_glass"))
    
    remaining_time = i
    print(f"Remaining time: {remaining_time:.2f}s", end="\r")

Remaining time: 99.00s

In [335]:
# the resulting dictionary along with the actual column names
result_dict_cols = {
    index: {"columns_order": permutations_list_names[index], "tds": tds_list[index]}
    for index, _ in enumerate(permutations_list_names)
}

# the resulting dictionary along with the indexes of the column names as was ordered in the original ordering
result_dict_indexes = {
    index: {"columns_order": permutations_list[index], "tds": tds_list[index]}
    for index, _ in enumerate(permutations_list)
}

In [336]:
# result_dict_cols

In [337]:
# result_dict_indexes

In [341]:
file_path = 'results/glass_results.json'

with open(file_path, "w") as json_file:
    json.dump(result_dict_cols, json_file)

# Run the combinations for seeds data set - fs3

In [313]:
seeds = pd.read_csv("data/seeds/seeds.csv", index_col=0)

In [314]:
df = seeds.copy()

In [315]:
df.head()

Unnamed: 0,area,perimeter,compactness,length_kernel,width_kernel,asymmetry_coeff,length_groove,class
0,15.26,14.84,0.871,5.763,3.312,2.221,5.22,1
1,14.88,14.57,0.8811,5.554,3.333,1.018,4.956,1
2,14.29,14.09,0.905,5.291,3.337,2.699,4.825,1
3,13.84,13.94,0.8955,5.324,3.379,2.259,4.805,1
4,16.14,14.99,0.9034,5.658,3.562,1.355,5.175,1


In [316]:
og_order = df.columns.tolist()

In [317]:
col_keys = [i for i in range(0, len(df.columns.tolist()))]

In [318]:
tds_list = []
permutations_list = list(permutations(col_keys))
permutations_list = [list(tup) for tup in permutations_list]

permutations_list = permutations_list[:100]

In [319]:
permutations_list_names = permutations_list.copy()

In [320]:
for i in range(len(permutations_list)):
    permutations_list_names[i] = [og_order[index] for index in permutations_list[i]]

In [321]:
for cols_ordering in permutations_list:
    subset = df.iloc[:, cols_ordering]
    tds_list.append(calculate_FS3_for_each_column(subset,"class"))

In [322]:
# the resulting dictionary along with the actual column names
result_dict_cols = {
    index: {"columns_order": permutations_list_names[index], "tds": tds_list[index]}
    for index, _ in enumerate(permutations_list_names)
}

# the resulting dictionary along with the indexes of the column names as was ordered in the original ordering
result_dict_indexes = {
    index: {"columns_order": permutations_list[index], "tds": tds_list[index]}
    for index, _ in enumerate(permutations_list)
}

In [323]:
# result_dict_cols

In [325]:
# result_dict_indexes

In [None]:
file_path = 'results/seeds_results.json'

with open(file_path, "w") as json_file:
    json.dump(result_dict_cols, json_file)