# KNN
Harvest Walukow 164231104

In [1]:
import pandas as pd
import numpy as np
from collections import Counter

In [2]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [3]:
# Load data
train_data = pd.read_csv('train_rgb_dataset.csv')
test_data = pd.read_csv('test_rgb_dataset.csv')

In [4]:
# Feature dan target
X_train = train_data[['R', 'G', 'B']].values
y_train = train_data['RASA'].values
X_test = test_data[['R', 'G', 'B']].values
y_test = test_data['RASA'].values

## Manual

In [5]:
# Fungsi euclidean distance
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

In [6]:
# Fungsi KNN
def knn_predict(X_train, y_train, X_test, k=3):
    predictions = []
    
    for test_point in X_test:
        # Hitung euclidean distance
        distances = [euclidean_distance(test_point, train_point) for train_point in X_train]
        
        k_indices = np.argsort(distances)[:k]
        k_nearest_labels = [y_train[i] for i in k_indices]
        
        # Ambil label mayoritas
        most_common = Counter(k_nearest_labels).most_common(1)
        predictions.append(most_common[0][0])
    
    return predictions

In [7]:
# Membuat prediksi dengan k=3
predictions_manual = knn_predict(X_train, y_train, X_test, k=3)


In [8]:
# Menghitung akurasi
accuracy_manual = np.mean(predictions_manual == y_test) * 100
print(f"Akurasi: {accuracy_manual:.2f}%") 


Akurasi: 93.33%


In [9]:
# Hasil
pd.DataFrame({
    "Sampel": range(1, len(predictions_manual) + 1),
    "Prediksi": predictions_manual,
    "Aktual": y_test
})

Unnamed: 0,Sampel,Prediksi,Aktual
0,1,Asam,Manis
1,2,Manis,Manis
2,3,Manis,Manis
3,4,Manis,Manis
4,5,Manis,Manis
5,6,Asam,Asam
6,7,Asam,Asam
7,8,Asam,Asam
8,9,Asam,Asam
9,10,Asam,Asam


## SKLEARN

In [10]:
# Buat KNN
k = 3
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)

# Buat prediksi
predictions_sklearn = knn.predict(X_test)

# Hitung akurasi
accuracy_sklearn = accuracy_score(y_test, predictions_sklearn) * 100
print(f"Accuracy: {accuracy_sklearn:.2f}%")

Accuracy: 93.33%


In [11]:
# Hasil
pd.DataFrame({
    "Sampel": range(1, len(predictions_sklearn) + 1),
    "Prediksi": predictions_sklearn,
    "Aktual": y_test
})

Unnamed: 0,Sampel,Prediksi,Aktual
0,1,Asam,Manis
1,2,Manis,Manis
2,3,Manis,Manis
3,4,Manis,Manis
4,5,Manis,Manis
5,6,Asam,Asam
6,7,Asam,Asam
7,8,Asam,Asam
8,9,Asam,Asam
9,10,Asam,Asam
