In [14]:
# imports
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import itertools
import math 

In [15]:
cars = pd.read_csv("data/cars/cars.csv", index_col=0)

In [16]:
cars = cars[cars.horsepower != "?"]

In [17]:
#from sklearn.preprocessing import MinMaxScaler

def isfloat(num):
    try:
        float(num)
        return True
    except ValueError:
        return False

def calculate_FS1_for_each_column(df,group_label):
    
    col = df.columns
    colu = []
    for c in col:
        if (isfloat(df[c][0])):
            colu.append(c)
            
    for c in colu:
        df[c] = df[c].astype(float)
    
    string_columns_float = list(df.select_dtypes(exclude=['object']).columns)
    string_columns_object = list(df.select_dtypes(include=['object']).columns)
    
    print(f"Group label is: {group_label}")
    print(f"Group label has {len(df[group_label].unique())} unique values")
    no_of_datasets = len(df[group_label].unique())
    df = df.drop(labels=string_columns_object,axis=1)
    list_of_datasets = []
    origin_dict = {}
    dict_of_mean_values = {} #key is the origin and the values are list of means for each column of a datas

    #creating separated datasetst for each group
    for count,value in enumerate(df[group_label].unique()):
        list_of_datasets.append( df[df[group_label] == value])
        list_of_datasets[count] = list_of_datasets[count].reset_index()
    
    
    #drop origin column for each group and save it as a key in dictionary where 
    #the value of the key will be the corresponding dataset
    
    for dataset in list_of_datasets:
        
        group = int(dataset[group_label].unique()[0])
        
        origin_dict[group] = dataset.drop(labels=[group_label],axis=1)
        
        columns = origin_dict[group].columns
        scaler = MinMaxScaler()
        scaler.fit(origin_dict[group])
        origin_dict[group] = scaler.transform(origin_dict[group])
        origin_dict[group] = pd.DataFrame(origin_dict[group], columns = columns)
        origin_dict[group] = origin_dict[group].drop(labels=["index"] ,axis=1)
        dict_of_mean_values[group] = [origin_dict[group][c].mean() for c in origin_dict[group].columns]
        
    #calculation of diff and QDS between each combination of the groups.
    diff = []

    for key1, key2 in itertools.combinations(dict_of_mean_values.keys(), 2):
        diff.append(abs(np.subtract(np.array(dict_of_mean_values[key1]),np.array(dict_of_mean_values[key2]))))
            
    #creation of the matrix W
    D = origin_dict[1].shape[1]
    W = np.ones((D, D))
    
    for row in range(len(W)):
        for col in range(len(W[0])):
            dij = abs(row-col)
            W[row][col] = 1 - (dij/(D-1))
            
    #calculating QFD
    QFD = []
    for count,d in enumerate(diff):
        QFD.append(math.sqrt(np.dot(np.dot(d,W),np.transpose(d))))
    
    return origin_dict, dict_of_mean_values, diff, QFD

In [18]:
datasets,dict_of_mean_values,diff,QFD = calculate_FS1_for_each_column(cars,"origin")

Group label is: origin
Group label has 3 unique values


In [19]:
QFD

[0.44511152122143616, 0.7805869893625766, 0.6548981815354775]

In [20]:
len(list(datasets.keys()))

3

In [21]:
print(dict_of_mean_values[1])

[0.36778231292517016, 0.5693877551020409, 0.43922228350799786, 0.3766796606282962, 0.47080532811927156, 0.492267893072722, 0.46598639455782315]


In [102]:
# FS2 funkcija

def calculate_FS2_for_each_column(df, group_label):
    
    col = df.columns
    colu = []
    for c in col:
        if (isfloat(df[c][0])):
            colu.append(c)
            
    for c in colu:
        df[c] = df[c].astype(float)
    
    string_columns_float = list(df.select_dtypes(exclude=['object']).columns)
    string_columns_object = list(df.select_dtypes(include=['object']).columns)
    
    print(f"Group label is: {group_label}")
    print(f"Group label has {len(df[group_label].unique())} unique values")
    no_of_datasets = len(df[group_label].unique())
    df = df.drop(labels=string_columns_object,axis=1)
    list_of_datasets = []
    origin_dict = {}
    dict_of_mean_values = {} #key is the origin and the values are list of means for each column of a datas
    dict_of_std_values_minus = {}
    dict_of_std_values_plus = {}
    final_dict = {}

    #creating separated datasetst for each group
    for count,value in enumerate(df[group_label].unique()):
        list_of_datasets.append( df[df[group_label] == value])
        list_of_datasets[count] = list_of_datasets[count].reset_index()
    
    
    #drop origin column for each group and save it as a key in dictionary where 
    #the value of the key will be the corresponding dataset
    
    for dataset in list_of_datasets:
        
        group = int(dataset[group_label].unique()[0])
        
        origin_dict[group] = dataset.drop(labels=[group_label],axis=1)
        
        columns = origin_dict[group].columns
        scaler = MinMaxScaler()
        scaler.fit(origin_dict[group])
        origin_dict[group] = scaler.transform(origin_dict[group])
        origin_dict[group] = pd.DataFrame(origin_dict[group], columns = columns)
        origin_dict[group] = origin_dict[group].drop(labels=["index"] ,axis=1)
        dict_of_std_values_minus[group] = [origin_dict[group][c].mean() - origin_dict[group][c].std() for c in origin_dict[group].columns]
        dict_of_mean_values[group] = [origin_dict[group][c].mean() for c in origin_dict[group].columns]
        dict_of_std_values_plus[group] = [origin_dict[group][c].mean() + origin_dict[group][c].std() for c in origin_dict[group].columns]
        matrix = np.array([dict_of_std_values_minus[group], dict_of_mean_values[group], dict_of_std_values_plus[group]])
        final_dict[group] = matrix
        
    #calculation of diff and QDS between each combination of the groups.
    diff = []

    for key1, key2 in itertools.combinations(final_dict.keys(), 2):
        diff.append(abs(np.subtract(np.array(final_dict[key1]),np.array(final_dict[key2]))))
            
    #creation of the matrix W
    D = origin_dict[1].shape[1]
    W = np.ones((D, D))
        
    for row in range(len(W)):
        for col in range(len(W[0])):
            dij = abs(row-col)
            W[row][col] = 1 - (dij/(D-1))
            
    print(diff)
                        
    #calculating QFD
    QFD = []
    for count,d in enumerate(diff):
        print(d.shape)
        print(W.shape)
        QFD.append(np.sqrt(np.dot(np.dot(d,W),np.transpose(d))))
    
    return origin_dict, dict_of_mean_values, diff, QFD

In [103]:
datasets,dict_of_mean_values,diff,QFD  = calculate_FS2_for_each_column(cars,"origin")

Group label is: origin
Group label has 3 unique values
[array([[0.06929523, 0.01489528, 0.07570012, 0.02733172, 0.01420332,
        0.00613054, 0.15508121],
       [0.0675545 , 0.20229915, 0.1054584 , 0.02873662, 0.00897705,
        0.00483126, 0.15426677],
       [0.06581378, 0.41949357, 0.13521668, 0.03014152, 0.00375077,
        0.01579307, 0.15345233]]), array([[0.01852559, 0.327994  , 0.00865215, 0.01299342, 0.17420303,
        0.18004248, 0.02686108],
       [0.03801631, 0.4885054 , 0.07720182, 0.0205482 , 0.16580754,
        0.12765538, 0.00705282],
       [0.05750702, 0.64901681, 0.1457515 , 0.02810298, 0.15741205,
        0.07526829, 0.01275544]]), array([[0.05076964, 0.34288928, 0.06704796, 0.04032514, 0.15999971,
        0.17391193, 0.12822012],
       [0.0295382 , 0.28620625, 0.02825658, 0.04928482, 0.15683049,
        0.13248664, 0.14721395],
       [0.00830676, 0.22952323, 0.01053481, 0.0582445 , 0.15366128,
        0.09106136, 0.16620777]])]
(3, 7)
(7, 7)
(3, 7)
(7, 7)
(

In [63]:
for i in range(len(QFD)):
    QFD[i] = QFD[i].mean()

In [64]:
QFD

[0.44058384361353387, 0.7767591205481543, 0.658421458669959]

# FS3 

In [117]:
# FS3 funkcija

def calc_hist(numbers):
    range_0_2 = 0
    range_2_4 = 0
    range_4_6 = 0
    range_6_8 = 0
    range_8_10 = 0

    # Iterate through the numbers and count occurrences in each range
    for number in numbers:
        if 0 <= number < 0.2:
            range_0_2 += 1
        elif 0.2 <= number < 0.4:
            range_2_4 += 1
        elif 0.4 <= number < 0.6:
            range_4_6 += 1
        elif 0.6 <= number < 0.8:
            range_6_8 += 1
        elif 0.8 <= number <= 1.0:
            range_8_10 += 1

    ranges = [range_0_2, range_2_4, range_4_6, range_6_8, range_8_10]
    
    return(np.array(ranges))

def calculate_FS3_for_each_column(df, group_label):
    
    col = df.columns
    colu = []
    for c in col:
        if (isfloat(df[c][0])):
            colu.append(c)
            
    for c in colu:
        df[c] = df[c].astype(float)
    
    string_columns_float = list(df.select_dtypes(exclude=['object']).columns)
    string_columns_object = list(df.select_dtypes(include=['object']).columns)
    
    print(f"Group label is: {group_label}")
    print(f"Group label has {len(df[group_label].unique())} unique values")
    no_of_datasets = len(df[group_label].unique())
    df = df.drop(labels=string_columns_object,axis=1)
    list_of_datasets = []
    origin_dict = {}
    final_dict = {}

    #creating separated datasetst for each group
    for count,value in enumerate(df[group_label].unique()):
        list_of_datasets.append( df[df[group_label] == value])
        list_of_datasets[count] = list_of_datasets[count].reset_index()
    
    
    #drop origin column for each group and save it as a key in dictionary where 
    #the value of the key will be the corresponding dataset
    
    for dataset in list_of_datasets:
        
        group = int(dataset[group_label].unique()[0])
        
        origin_dict[group] = dataset.drop(labels=[group_label],axis=1)
        
        columns = origin_dict[group].columns
        scaler = MinMaxScaler()
        scaler.fit(origin_dict[group])
        origin_dict[group] = scaler.transform(origin_dict[group])
        origin_dict[group] = pd.DataFrame(origin_dict[group], columns = columns)
        origin_dict[group] = origin_dict[group].drop(labels=["index"] ,axis=1)

        ranges = [range_0_2, range_2_4, range_4_6, range_6_8, range_8_10]
        final_dict[group] = [calc_hist(origin_dict[group][c]) for c in origin_dict[group].columns]
        
        # dict_of_std_values_minus[group] = [origin_dict[group][c].mean() - origin_dict[group][c].std() for c in origin_dict[group].columns]
        # dict_of_mean_values[group] = [origin_dict[group][c].mean() for c in origin_dict[group].columns]
        # dict_of_std_values_plus[group] = [origin_dict[group][c].mean() + origin_dict[group][c].std() for c in origin_dict[group].columns]
        # matrix = np.array([dict_of_std_values_minus[group], dict_of_mean_values[group], dict_of_std_values_plus[group]])
        # final_dict[group] = matrix
        
    #calculation of diff and QDS between each combination of the groups.
    diff = []

    for key1, key2 in itertools.combinations(final_dict.keys(), 2):
        diff.append(abs(np.subtract(np.array(final_dict[key1]),np.array(final_dict[key2]))))
            
    print(diff)
    #creation of the matrix W
    D = origin_dict[1].shape[1]
    W = np.ones((D, D))
        
    for row in range(len(W)):
        for col in range(len(W[0])):
            dij = abs(row-col)
            W[row][col] = 1 - (dij/(D-1))
                        
    #calculating QFD
    QFD = []
    for count,d in enumerate(diff):
        print(W.shape)
        print(d.shape)
        QFD.append(np.sqrt(np.dot(np.dot(d,W),np.transpose(d))))
    
    return origin_dict, dict_of_mean_values, diff, QFD

In [118]:
datasets,dict_of_mean_values,diff,QFD  = calculate_FS3_for_each_column(cars, "origin")

Group label is: origin
Group label has 3 unique values
[array([[42, 91,  8, 13, 12],
       [65, 69, 73,  0, 97],
       [46, 18, 37, 44, 21],
       [30, 71, 34, 22,  9],
       [27, 34, 47, 48, 10],
       [ 8, 45, 73, 31,  9],
       [48, 33, 46, 35,  4]]), array([[41, 84, 22, 21,  9],
       [ 8,  0, 70,  0, 99],
       [53, 28, 28, 47, 21],
       [45, 62, 43, 17, 10],
       [ 4, 49, 55, 52, 17],
       [11, 49, 88, 41, 10],
       [45, 30, 42, 35, 25]]), array([[ 1,  7, 14,  8,  3],
       [57, 69,  3,  0,  2],
       [ 7, 10,  9,  3,  0],
       [15,  9,  9,  5,  1],
       [23, 15,  8,  4,  7],
       [19,  4, 15, 10,  1],
       [ 3,  3,  4,  0, 21]])]
(7, 7)
(7, 5)


ValueError: shapes (7,5) and (7,7) not aligned: 5 (dim 1) != 7 (dim 0)