# Tugas 3

### Dengan menggunakan dataset diabetes, buatlah ensemble voting dengan algoritma

1. Logistic Regression

2. SVM kernel polynomial

3. Decission Tree

In [43]:
# Import library yang diperlukan
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score, classification_report

# Persiapan Data

In [44]:
df = pd.read_csv('diabetes.csv')
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


# Seleksi Fitur

In [45]:
X = df.drop(columns='Outcome')
y = df['Outcome']

## Split Data Training dan Testing

In [46]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Melakukan Standardisasi

In [47]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Membuat model Logistic Regression, SVM kernel polynomial, dan Decision Tree

In [48]:
log_reg = LogisticRegression(random_state=42)
svm_poly = SVC(kernel='poly', random_state=42, probability=True)
tree_clf = DecisionTreeClassifier(random_state=42)

# Membuat Ensemble Voting

In [49]:
voting_clf = VotingClassifier(estimators=[('lr', log_reg), ('svc', svm_poly), ('tree', tree_clf)], voting='soft')

## Hyperparameter Tuning dengan GridSearchCV

In [50]:
param_grid = {
    'lr__C': [0.1,1.0,10],
    'svc__C': [0.1,1.0,10],
    'svc__degree': [2,3,4],
    'tree__max_depth': [3,5,10]
}

grid_search = GridSearchCV(estimator=voting_clf,param_grid=param_grid,cv=5,scoring='accuracy',n_jobs=-1)
grid_search.fit(X_train_scaled, y_train)

# Menampilkan hyperparameter terbaik
print(f"Best Parameters: {grid_search.best_params_}")

Best Parameters: {'lr__C': 0.1, 'svc__C': 0.1, 'svc__degree': 3, 'tree__max_depth': 5}


# Evaluasi Model

In [51]:
y_pred = grid_search.best_estimator_.predict(X_test_scaled)

# Menampilkan classification report pada data test
acc_test = accuracy_score(y_test, y_pred)
print(f"Accuracy on test: {acc_test}")
print(classification_report(y_test, y_pred))

Accuracy on test: 0.7922077922077922
              precision    recall  f1-score   support

           0       0.81      0.88      0.84        99
           1       0.74      0.64      0.69        55

    accuracy                           0.79       154
   macro avg       0.78      0.76      0.77       154
weighted avg       0.79      0.79      0.79       154



In [52]:
# Menampilkan classification report pada data train
y_pred_train = grid_search.best_estimator_.predict(X_train_scaled)

acc_train = accuracy_score(y_train, y_pred_train)
print(f"Accuracy on train: {acc_train}")
print(classification_report(y_train, y_pred_train))

Accuracy on train: 0.8208469055374593
              precision    recall  f1-score   support

           0       0.81      0.94      0.87       401
           1       0.84      0.60      0.70       213

    accuracy                           0.82       614
   macro avg       0.83      0.77      0.79       614
weighted avg       0.82      0.82      0.81       614

