In [76]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV
import joblib

# Load data
data = pd.read_csv('/content/drive/MyDrive/prefrensi makanan kantin mamih yani.csv')

# Preprocess data
# Drop unnecessary columns
data = data.drop(['Tanggal_order', 'Participant_ID'], axis=1)

# Encode categorical columns
le = LabelEncoder()
data['Jenis_kelamin'] = le.fit_transform(data['Jenis_kelamin'])
data['Kewarganegaraan'] = le.fit_transform(data['Kewarganegaraan'])
data['Grup_umur'] = le.fit_transform(data['Grup_umur'])
data['Jenis Makanan'] = le.fit_transform(data['Jenis Makanan'])
data['Minuman'] = le.fit_transform(data['Minuman'])
data['Dessert'] = le.fit_transform(data['Dessert'])

# Feature selection
# Select K best features
X = data.drop(['Jenis_kelamin', 'Grup_umur', 'Jenis Makanan', 'Minuman'], axis=1)
y = data['Dessert']

selector = SelectKBest(chi2, k='all')
X_new = selector.fit_transform(X, y)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train KNN model
knn = KNeighborsClassifier()

# Hyperparameter tuning
param_grid = {
    'n_neighbors': [3, 5, 7],
    'weights': ['uniform', 'distance']
}

grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

best_knn = grid_search.best_estimator_
y_pred_knn = best_knn.predict(X_test)

# Evaluate KNN model
accuracy_knn = accuracy_score(y_test, y_pred_knn)
print("Akurasi KNN:", accuracy_knn)
print(classification_report(y_test, y_pred_knn))
print(confusion_matrix(y_test, y_pred_knn))

# Train SVC model
svc = SVC()

# Hyperparameter tuning
param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
}

grid_search = GridSearchCV(svc, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

best_svc = grid_search.best_estimator_
y_pred_svc = best_svc.predict(X_test)

# Evaluate SVC model
accuracy_svc = accuracy_score(y_test, y_pred_svc)
print("Akurasi SVC:", accuracy_svc)
print(classification_report(y_test, y_pred_svc))
print(confusion_matrix(y_test, y_pred_svc))

# Save the best model
if accuracy_knn > accuracy_svc:
    best_model = best_knn
else:
    best_model = best_svc

joblib.dump(best_model, 'best_model.pkl')

# Load the best model
best_model = joblib.load('best_model.pkl')


Akurasi KNN: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        27
           1       1.00      1.00      1.00         6
           2       1.00      1.00      1.00        24

    accuracy                           1.00        57
   macro avg       1.00      1.00      1.00        57
weighted avg       1.00      1.00      1.00        57

[[27  0  0]
 [ 0  6  0]
 [ 0  0 24]]
Akurasi SVC: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        27
           1       1.00      1.00      1.00         6
           2       1.00      1.00      1.00        24

    accuracy                           1.00        57
   macro avg       1.00      1.00      1.00        57
weighted avg       1.00      1.00      1.00        57

[[27  0  0]
 [ 0  6  0]
 [ 0  0 24]]
