In [21]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import opfython.math.general as g
import opfython.stream.loader as l
import opfython.stream.parser as p
import opfython.stream.splitter as s
from opfython.models import KNNSupervisedOPF

In [22]:
# Load data:

data = pd.read_csv("bodyfat.csv")
print(data.head())

# Preprocessing:

predict = 'BodyFat'

Y = data[predict]
X = data.drop(columns=[predict])
Xlabels = X.columns

Y = Y.transform(lambda x: 1 if x <= 5 else (2 if x>5 and x<=14 else (3 if x>14 and x<16 else(4 if x>=16 and x<25 else 5))))

print(f'Target Class:\n{Y.head()}')
print(f'Feature Class:\n{X.head()}')

Y = Y.to_numpy()
X = X.to_numpy()

   Density  BodyFat  Age  Weight  Height  Neck  Chest  Abdomen    Hip  Thigh  \
0   1.0708     12.3   23  154.25   67.75  36.2   93.1     85.2   94.5   59.0   
1   1.0853      6.1   22  173.25   72.25  38.5   93.6     83.0   98.7   58.7   
2   1.0414     25.3   22  154.00   66.25  34.0   95.8     87.9   99.2   59.6   
3   1.0751     10.4   26  184.75   72.25  37.4  101.8     86.4  101.2   60.1   
4   1.0340     28.7   24  184.25   71.25  34.4   97.3    100.0  101.9   63.2   

   Knee  Ankle  Biceps  Forearm  Wrist  
0  37.3   21.9    32.0     27.4   17.1  
1  37.3   23.4    30.5     28.9   18.2  
2  38.9   24.0    28.8     25.2   16.6  
3  37.3   22.8    32.4     29.4   18.2  
4  42.2   24.0    32.2     27.7   17.7  
Target Class:
0    2
1    2
2    5
3    2
4    5
Name: BodyFat, dtype: int64
Feature Class:
   Density  Age  Weight  Height  Neck  Chest  Abdomen    Hip  Thigh  Knee  \
0   1.0708   23  154.25   67.75  36.2   93.1     85.2   94.5   59.0  37.3   
1   1.0853   22  173.25   7

In [24]:
# Splitting data into training and testing sets
X_train, X_test, Y_train, Y_test = s.split(
    X, Y, percentage=0.8, random_state=1)

# Splitting data into training and validation sets
X_train, X_val, Y_train, Y_val = s.split(
    X_train, Y_train, percentage=0.25, random_state=1)

# Creates an KNNSupervisedOPF instance
opf = KNNSupervisedOPF(max_k=10, distance='log_squared_euclidean', pre_computed_distance=None)

# Fits training data into the classifier
opf.fit(X_train, Y_train, X_val, Y_val)

# Predicts new data
preds = opf.predict(X_test)

# Calculating accuracy
acc = g.opf_accuracy(Y_test, preds)

print(f'Accuracy: {acc}')

2022-04-19 14:54:15,319 - opfython.stream.splitter — INFO — Splitting data ...
2022-04-19 14:54:15,320 - opfython.stream.splitter — DEBUG — X_1: (201, 14) | X_2: (51, 14) | Y_1: (201,) | Y_2: (51,).
2022-04-19 14:54:15,322 - opfython.stream.splitter — INFO — Data splitted.
2022-04-19 14:54:15,323 - opfython.stream.splitter — INFO — Splitting data ...
2022-04-19 14:54:15,325 - opfython.stream.splitter — DEBUG — X_1: (50, 14) | X_2: (151, 14) | Y_1: (50,) | Y_2: (151,).
2022-04-19 14:54:15,326 - opfython.stream.splitter — INFO — Data splitted.
2022-04-19 14:54:15,330 - opfython.models.knn_supervised — INFO — Overriding class: OPF -> KNNSupervisedOPF.
2022-04-19 14:54:15,332 - opfython.core.opf — INFO — Creating class: OPF.
2022-04-19 14:54:15,333 - opfython.core.opf — DEBUG — Distance: log_squared_euclidean | Pre-computed distance: False.
2022-04-19 14:54:15,335 - opfython.core.opf — INFO — Class created.
2022-04-19 14:54:15,336 - opfython.models.knn_supervised — INFO — Class overrided.
