In [112]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler

df = pd.read_csv('bodyPerformance.csv')
len_row, len_col = df.shape
df.fillna(df.mode().iloc[0], inplace=True) # Replace NaN values with mode of corresponding column
# use the factorize() function to convert the column of strings to integers
non_int_columns = df.select_dtypes(include=['object']).columns.tolist()
    
# Delete duplicate rows
df = df.drop_duplicates()

In [113]:
print(len_row, len_col)

13393 12


In [114]:
label_mapping = {'A': 0, 'B': 1, 'C': 2, 'D': 3}

df['class'] = df['class'].replace(label_mapping)

label_mapping = {'M': 1, 'F': 2}

df['gender'] = df['gender'].replace(label_mapping)

In [115]:
df.head(20)

Unnamed: 0,age,gender,height_cm,weight_kg,body fat_%,diastolic,systolic,gripForce,sit and bend forward_cm,sit-ups counts,broad jump_cm,class
0,27.0,1,172.3,75.24,21.3,80.0,130.0,54.9,18.4,60.0,217.0,2
1,25.0,1,165.0,55.8,15.7,77.0,126.0,36.4,16.3,53.0,229.0,0
2,31.0,1,179.6,78.0,20.1,92.0,152.0,44.8,12.0,49.0,181.0,2
3,32.0,1,174.5,71.1,18.4,76.0,147.0,41.4,15.2,53.0,219.0,1
4,28.0,1,173.8,67.7,17.1,70.0,127.0,43.5,27.1,45.0,217.0,1
5,36.0,2,165.4,55.4,22.0,64.0,119.0,23.8,21.0,27.0,153.0,1
6,42.0,2,164.5,63.7,32.2,72.0,135.0,22.7,0.8,18.0,146.0,3
7,33.0,1,174.9,77.2,36.9,84.0,137.0,45.9,12.3,42.0,234.0,1
8,54.0,1,166.8,67.5,27.6,85.0,165.0,40.4,18.6,34.0,148.0,2
9,28.0,1,185.0,84.6,14.4,81.0,156.0,57.9,12.1,55.0,213.0,1


In [116]:
# Membaca data
X = np.array(df.iloc[:, :-1])
y = np.array(df.iloc[:, -1])

X_col = df.iloc[:, :-1].columns.tolist()

# Create a StandardScaler object
scaler = StandardScaler()

# Fit the scaler to the dataset
scaler.fit(X)

# Transform the dataset using the scaler
X_std = scaler.transform(X)

df_X_std = pd.DataFrame(data=X_std, columns=X_col)

In [117]:
%run func.ipynb

In [118]:
# Assuming X_std and y are defined
feature_weights = reliefF(X_std, y)

combines = {'Features': df.iloc[:, :-1].columns, 'Weights': feature_weights}
weight_f = pd.DataFrame(combines)
weight_f = weight_f.sort_values('Weights', ascending=True)


In [119]:
weight_f.head(100)

Unnamed: 0,Features,Weights
0,age,-0.07648
9,sit-ups counts,-0.048126
7,gripForce,-0.040222
10,broad jump_cm,-0.035407
2,height_cm,-0.034446
3,weight_kg,-0.028426
4,body fat_%,-0.027585
6,systolic,-0.021937
5,diastolic,-0.021041
8,sit and bend forward_cm,-0.010361


In [131]:
def calculate_F1(X, y):
    c = len(np.unique(y))  # Number of classes
    means = np.zeros(c)  # Array to store means of each class
    variances = np.zeros(c)  # Array to store variances of each class
    proportions = np.zeros(c)  # Array to store proportions of each class
    
    # Calculate means, variances, and proportions of each class
    for i in range(c):
        means[i] = np.mean(X[y == i])
        
        variances[i] = np.var(X[y == i])
        proportions[i] = np.sum(y == i) / len(y)
        
    numerator = 0.0
    denominator = 0.0

    # Calculate numerator and denominator of F1
    for i in range(c):
        for j in range(c):
            if i != j:
                numerator += proportions[i] * proportions[j] * (means[i] - means[j]) ** 2
        denominator += proportions[i] * variances[i]

    F1 = numerator / denominator
    return F1

# Calculating the F1 score
F1_score = calculate_F1(X_std, y)

print("Maximum Fisher's discriminant ratio (F1):", F1_score)

Maximum Fisher's discriminant ratio (F1): 0.009551998997520652


In [146]:
# import numpy as np

# def calculate_F3(X_std, y):
#     num_classes = len(np.unique(y))
#     num_features = X_std.shape[1]
#     max_F3 = 0.0
    
#     for i in range(num_features):
#         for class1 in range(num_classes):
#             for class2 in range(class1 + 1, num_classes):
#                 feature_efficiency = 0.0
#                 for j in range(len(X_std)):
#                     min_val = min(max(X_std[j][i], y[j] == class1), max(X_std[j][i], y[j] == class2))
#                     max_val = max(min(X_std[j][i], y[j] == class1), min(X_std[j][i], y[j] == class2))
#                     if min_val <= X_std[j][i] <= max_val:
#                         feature_efficiency += 1
#                 feature_efficiency /= len(X_std)
#                 max_F3 = max(max_F3, feature_efficiency)
    
#     return max_F3

# F3 = calculate_F3(X_std, y)
# print("Maximum individual feature efficiency (F3):", F3)


Maximum individual feature efficiency (F3): 0.2968189964157706


In [147]:
import numpy as np

def calculate_f3(X_std, y):
    num_classes = len(np.unique(y))
    num_features = X_std.shape[1]
    max_f3 = 0.0
    
    for class_i in range(num_classes):
        for class_j in range(class_i + 1, num_classes):
            union_count = 0
            
            # Get feature values for class_i and class_j
            class_i_indices = np.where(y == class_i)[0]
            class_j_indices = np.where(y == class_j)[0]
            feature_values_i = X_std[class_i_indices]
            feature_values_j = X_std[class_j_indices]
            
            for feature in range(num_features):
                feature_values = np.concatenate((feature_values_i[:, feature], feature_values_j[:, feature]))
                max_val = max(np.max(feature_values_i[:, feature]), np.max(feature_values_j[:, feature]))
                min_val = min(np.min(feature_values_i[:, feature]), np.min(feature_values_j[:, feature]))
                count = np.sum((feature_values >= min_val) & (feature_values <= max_val))
                union_count += count
            
            f3 = union_count / X_std.shape[0]
            max_f3 = max(max_f3, f3)
    
    return max_f3

# Usage example
max_individual_feature_efficiency = calculate_f3(X_std, y)
print("Maximum Individual Feature Efficiency (F3):", max_individual_feature_efficiency)


Maximum Individual Feature Efficiency (F3): 5.501642771804062
