In [1]:
!pip install scikit-learn

Collecting scikit-learn
  Using cached scikit_learn-1.4.2-cp310-cp310-win_amd64.whl.metadata (11 kB)
Collecting scipy>=1.6.0 (from scikit-learn)
  Using cached scipy-1.13.0-cp310-cp310-win_amd64.whl.metadata (60 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Using cached joblib-1.4.0-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=2.0.0 (from scikit-learn)
  Using cached threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Using cached scikit_learn-1.4.2-cp310-cp310-win_amd64.whl (10.6 MB)
Using cached joblib-1.4.0-py3-none-any.whl (301 kB)
Using cached scipy-1.13.0-cp310-cp310-win_amd64.whl (46.2 MB)
Using cached threadpoolctl-3.5.0-py3-none-any.whl (18 kB)
Installing collected packages: threadpoolctl, scipy, joblib, scikit-learn
Successfully installed joblib-1.4.0 scikit-learn-1.4.2 scipy-1.13.0 threadpoolctl-3.5.0


In [2]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import MinMaxScaler

# ==============================
# Nomor 1
# ==============================
# Step 1: Load the dataset
# Read dataset from 'milk.csv'
dataset = pd.read_csv('../files/milk.csv')

# Step 2: Split the dataset into data and labels
# The last column is assumed to be the label
train_data = np.array(dataset)[:, 0:-1]
train_label = np.array(dataset)[:, -1]

# Print the training data and labels
print('Train data:\n', train_data)
print('\nTrain label:\n', train_label)

Train data:
 [[6.6 35 1 ... 1 0 254]
 [6.6 36 0 ... 0 1 253]
 [8.5 70 1 ... 1 1 246]
 ...
 [3.0 40 1 ... 1 1 255]
 [6.8 43 1 ... 1 0 250]
 [8.6 55 0 ... 1 1 255]]

Train label:
 ['high' 'high' 'low' ... 'low' 'high' 'low']


In [3]:
# ==============================
# Nomor 2
# ==============================
# Step 3: Normalize the training data using MinMaxScaler
# Initialize MinMaxScaler object
sc = MinMaxScaler(feature_range=(0, 1))

# Fit the scaler to train_data and transform train_data
train_data = sc.fit_transform(train_data)

# Print the normalized training data
print('Normalized train data:\n', train_data)

Normalized train data:
 [[0.55384615 0.01785714 1.         ... 1.         0.         0.93333333]
 [0.55384615 0.03571429 0.         ... 0.         1.         0.86666667]
 [0.84615385 0.64285714 1.         ... 1.         1.         0.4       ]
 ...
 [0.         0.10714286 1.         ... 1.         1.         1.        ]
 [0.58461538 0.16071429 1.         ... 1.         0.         0.66666667]
 [0.86153846 0.375      0.         ... 1.         1.         1.        ]]


In [4]:
# ==============================
# Nomor 3
# ==============================
# Step 4: Create a KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = 7, weights = 'distance')

# Train the KNeighborsClassifier
knn.fit(train_data, train_label)

In [5]:
# Step 5: Take user input for test data
ph = input('Masukkan pH (3 - 9.5): ')
temperature = input('Masukkan suhu (C) (34-90): ')
taste = input('Masukkan rasa (0 for Bad, 1 for Good): ')
odor = input('Masukkan bau (0 for Bad, 1 for Good): ')
fat = input('Masukkan lemak (0 for Low, 1 for High): ')
turbidity = input('Masukkan kekeruhan (0 for Low, 1 for High): ')
colour = input('Masukkan warna (240-255): ')

# Convert the user input into an array
test_data = np.array([int(ph), int(temperature), int(taste), int(odor), int(fat), int(turbidity), int(colour)])

# Print the test data
print('\nTest data:\n', test_data)


Test data:
 [  7  50   1   1   1   1 250]


In [6]:
# Step 6: Reshape and normalize the test data
# Reshape the test data to match the expected input format for prediction
test_data = test_data.reshape(1, -1)

# Normalize the test data using the same MinMaxScaler
test_data = sc.transform(test_data)

# Print the reshaped and normalized test data
print('Reshaped and normalized test data:\n', test_data)

Reshaped and normalized test data:
 [[0.61538462 0.28571429 1.         1.         1.         1.
  0.66666667]]


In [7]:
# Step 7: Use the trained KNeighborsClassifier to predict the class of the test data
hasil = knn.predict(test_data)

# Print the prediction result
print("Hasil dari kNN:\n", hasil)

Hasil dari kNN:
 ['high']


In [9]:
# ==============================
# Nomor 4
# ==============================
# Read dataset from 'milk_training.csv'
dataset = pd.read_csv('../files/milk_training.csv')

# The last column is assumed to be the label
train_data = np.array(dataset)[:, 0:-1]
train_label = np.array(dataset)[:, -1]

# Print the training data and labels
print('Train data:\n', train_data)
print('\nTrain label:\n', train_label)

Train data:
 [[6.6 35 1 ... 1 0 254]
 [6.6 36 0 ... 0 1 253]
 [6.6 37 1 ... 1 1 255]
 ...
 [6.7 41 1 ... 0 0 247]
 [6.8 41 0 ... 0 0 255]
 [6.8 38 0 ... 0 0 255]]

Train label:
 ['high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high

In [10]:
# ==============================
# Nomor 5
# ==============================
# Initialize MinMaxScaler object
sc = MinMaxScaler(feature_range=(0, 1))

# Fit the scaler to train_data and transform train_data
train_data_normalized = sc.fit_transform(train_data)

# Print the normalized training data
print('Normalized train data:\n', train_data_normalized)

Normalized train data:
 [[0.55384615 0.01785714 1.         ... 1.         0.         0.93333333]
 [0.55384615 0.03571429 0.         ... 0.         1.         0.86666667]
 [0.55384615 0.05357143 1.         ... 1.         1.         1.        ]
 ...
 [0.56923077 0.125      1.         ... 0.         0.         0.46666667]
 [0.58461538 0.125      0.         ... 0.         0.         1.        ]
 [0.58461538 0.07142857 0.         ... 0.         0.         1.        ]]


In [12]:
# ==============================
# Nomor 6
# ==============================
# Read test dataset from 'milk_testing.csv'
test_dataset = pd.read_csv('../files/milk_testing.csv')

# The last column is assumed to be the label
test_data = np.array(test_dataset)[:, 0:-1]
test_label = np.array(test_dataset)[:, -1]

# Print the training data and labels
print('Test data:\n', test_data)
print('\nTest label:\n', test_label)

Test data:
 [[6.8 45 1 ... 1 0 245]
 [6.6 37 1 ... 1 1 255]
 [6.7 38 1 ... 1 0 255]
 ...
 [6.5 37 0 ... 0 0 255]
 [6.5 40 1 ... 0 0 250]
 [6.7 45 1 ... 0 0 247]]

Test label:
 ['high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'low' 'low' 'low' 'low'
 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low'
 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low'
 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low'
 'low' 'low' 'low' 'low' 'low' 'lo

In [13]:
# ==============================
# Nomor 7
# ==============================
# Initialize MinMaxScaler object
sc = MinMaxScaler(feature_range=(0, 1))

# Fit the scaler to train_data and transform train_data
test_data_normalized  = sc.fit_transform(test_data)

# Print the normalized training data
print('Normalized test data:\n', test_data_normalized)

Normalized test data:
 [[0.58461538 0.19642857 1.         ... 1.         0.         0.33333333]
 [0.55384615 0.05357143 1.         ... 1.         1.         1.        ]
 [0.56923077 0.07142857 1.         ... 1.         0.         1.        ]
 ...
 [0.53846154 0.05357143 0.         ... 0.         0.         1.        ]
 [0.53846154 0.10714286 1.         ... 0.         0.         0.66666667]
 [0.56923077 0.19642857 1.         ... 0.         0.         0.46666667]]


In [14]:
# Use the trained KNeighborsClassifier to predict the class of the test data
test_predictions = knn.predict(test_data_normalized)

# Print the prediction results
print("Hasil dari kNN:\n", test_predictions)

Hasil dari kNN:
 ['high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'low' 'low' 'low' 'low'
 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low'
 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low'
 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low'
 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low'
 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low'
 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low

In [15]:
# Print the predicted labels and compare them with the actual labels (test_label)
print("Predicted labels using k-NN:\n", test_predictions)
print("\nActual labels (Ground Truth):\n", test_label)

# Compare the predictions with the actual labels
comparison = test_predictions == test_label
print("\nComparison with Test label:\n", comparison)

Predicted labels using k-NN:
 ['high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'high'
 'high' 'high' 'high' 'high' 'high' 'high' 'high' 'low' 'low' 'low' 'low'
 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low'
 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low'
 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low'
 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low'
 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low' 'low'
 'low' 'low' 'low' 'low' 'low' 'lo