In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score

# Load the diabetes dataset (pastikan Anda memiliki file dataset diabetes)
df = pd.read_csv('../data/diabetes.csv')

# Memisahkan fitur (X) dan label (y)
X = df.drop(columns=['Outcome'])
y = df['Outcome']

# Membagi dataset menjadi data pelatihan dan data pengujian
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Membuat model Logistic Regression
logistic_model = LogisticRegression(solver='liblinear', random_state=42)

# Membuat model SVM dengan kernel polynomial
svm_model = SVC(kernel='poly', degree=3, random_state=42)

# Membuat model Decision Tree
decision_tree_model = DecisionTreeClassifier(random_state=42)

# Menggabungkan ketiga model dalam ensemble voting
ensemble_model = VotingClassifier(
    estimators=[('lr', logistic_model), ('svm', svm_model), ('dt', decision_tree_model)],
    voting='hard'  # Anda dapat menggunakan 'soft' jika model Anda mengeluarkan probabilitas
)

# Melatih model ensemble voting
ensemble_model.fit(X_train, y_train)

# Menguji model ensemble voting
y_pred_ensemble = ensemble_model.predict(X_test)
accuracy_ensemble = accuracy_score(y_test, y_pred_ensemble)

# Menampilkan akurasi model ensemble voting
print("Akurasi Ensemble Voting: {:.2f}%".format(accuracy_ensemble * 100))


Akurasi Ensemble Voting: 77.92%
