In [7]:
import pandas as pd

sensus = {
    'tinggi': [158, 170, 182, 175, 157, 155, 163, 141, 167], 
    'berat': [50, 64, 79, 44, 59, 63, 56, 60, 53],
    'jk': [
        'pria', 'pria', 'pria', 'pria', 'wanita', 'wanita', 'wanita', 'wanita', 
        'wanita'
    ]
}

sensus_df =  pd.DataFrame(sensus)
sensus_df

Unnamed: 0,tinggi,berat,jk
0,158,50,pria
1,170,64,pria
2,182,79,pria
3,175,44,pria
4,157,59,wanita
5,155,63,wanita
6,163,56,wanita
7,141,60,wanita
8,167,53,wanita


In [None]:
#Visualisasi Data
import matploblib.pyplot as plt

fig, ax = plt.subplots()
for jk, d in sensus_df.groupby('jk'):
    ax.scatter(d['tingi'], d['berat'],  label=jk)

plt.legend(loc='upper left')
plt.title('Sebaran Data Tinggi Badan, Berat Badan, dan Jenis Kelamin')
plt.xlabel('Tinggi Badan (cm)')
plt.ylabel('Berat Badan (kg)')
plt.grid(True)
plt.show()

In [None]:
#Processing Dataset
import numpy as np

x_train = np.array(sensus_df[['tinggi', 'berat']])
y_train = np.array(sensus_df['jk'])

print(f'x_train:\n{x_train}\n')
print(f'y_train: {y_train}')

from sklearn.preprocessing import LabelBinarizer

lb = LabelBinarizer()
y_train = lb.fit_transform(y_train)
print(f'y_train:\n{y_train'


In [None]:
y_train = y_train.flatten()
print(f'y_train: {y_train}'}

In [None]:
#Training KNN Classification Model
from sklearn.neighbors import KNeighborsClassifier

K = 3
model = KNeighborsClssifier(n_neighbors=K)
model.fit(x_train, y_train)

In [None]:
#Prediksi Jenis Kelamin
tinggi_badan = 157
berat_badan = 51
x_new = np.array([tinggi_badan, berat_badan]).reshape(1, -1)
x_new

In [None]:
y_new = model.predict(x_new)
y_new

In [None]:
lb.inverse_transform(y_new)

In [None]:
#Visualisasi Nearest Neighbours

import matploblib.pyplot as plt

fig, ax = plt.subplots()
for jk, d in sensus_df.groupby('jk'):
    ax.scatter(d['tingi'], d['berat'],  label=jk)
    
plt.scatter(tinggi_badan,
            berat_badan,
            marker='s',
            color='red',
            label='misterius')

plt.legend(loc='upper left')
plt.title('Sebaran Data Tinggi Badan, Berat Badan, dan Jenis Kelamin')
plt.xlabel('Tinggi Badan (cm)')
plt.ylabel('Berat Badan (kg)')
plt.grid(True)
plt.show()
                


In [None]:
#Kalkulasi Distance (Euclidean Ditance)

misterius = np.array([tinggi_badan, berat_badan])
misterius

In [None]:
K_train

In [None]:
from scipy.spatial.distance import euclidean

data_jarak = [euclidean(misterius, d) for d in x_train]
data_jarak

In [None]:
sensus_df['jarak'] = data_jarak
sensus_df.sort_values(['jarak'])

In [None]:
#Testing Set

x_test = np.array([[155, 51], [170, 58], [152, 43], [182, 63]])
y_test = lb.transform(np.array(['pria', 'pria', 'wanita', 'wanita'])).flatten()

print(f'x_test:\n{x_test}\n')
print(f'y_test:\n{y_test}')

x_test:
[[150 50]
 [170 70]
 [160 60]
 [180 80]]

y_test:
0 0 1 1

In [None]:
#Prediksi terhadap testing set

y_pred = model.predict(x_test)
y_pred

In [None]:
#Accuracy

from sklearn.metrics import accuracy_score

acc = accuracy_score(y_test, y_pred)

print(f'Accuracy: {acc}')

In [None]:
#Precission

from sklearn.metrics import precision_score

prec = precision_score(y_test, y_pred)

print(f'Precision: {prec}')

In [None]:
#Recall

from sklearn.metrics import recall_score

rec = recall_score(y_test, y_pred)

print(f'Recall: {rec}')

In [None]:
#Classification Report

from sklearn.metrics import classification_report

cls_report = classification_report(y_test, y_pred)

print(f'Classification Report: {cls_report}')

In [None]:
#Matthews Correlation Coefficient(MCC)

from sklearn.metrics import matthews_corrcoef

mcc = matthews_corrcoef(y_test, y_pred)

print(f'MCC: {mcc}')

