# Kelompok 6 - DATASET PIMA (Pima-Indians-Diabetes)

Nama anggota kelompok :
*   Farhan Rangkuti - 1304202025
*   Bhagas Ade Pramono - 1304201018
*   Muhammad Meidy Mahardika - 1304202024

# Niapy Algoritma

Dataset

In [None]:
import pandas as pd

# Load the dataset
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
column_names = ["Pregnancies", "Glucose", "BloodPressure", "SkinThickness", "Insulin", "BMI", "DiabetesPedigreeFunction", "Age", "Outcome"]
data = pd.read_csv(url, header=None, names=column_names)
data.head()


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


## Pre-processing Data

In [20]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Split the data into features and labels
X = data.drop("Outcome", axis=1)
y = data["Outcome"]

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [21]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Define the model
model = SVC()

## Install library Niapy yang diperlukan

In [None]:
pip install niapy

Collecting niapy
  Downloading niapy-2.3.1-py3-none-any.whl (183 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m183.9/183.9 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting matplotlib<4.0.0,>=3.8.0 (from niapy)
  Downloading matplotlib-3.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (8.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.3/8.3 MB[0m [31m31.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting numpy<2.0.0,>=1.26.1 (from niapy)
  Downloading numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.2/18.2 MB[0m [31m59.9 MB/s[0m eta [36m0:00:00[0m
Collecting pandas<3.0.0,>=2.1.1 (from niapy)
  Downloading pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.0/13.0 MB[0m [31m84.8 MB/s[0m eta [36m0:00:00[0m
Instal

## Model Niapy menggunakan pendekatan GeneticAlgorithm

In [25]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from niapy.task import Task
from niapy.problems import Problem
from niapy.algorithms.basic import GeneticAlgorithm
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
import numpy as np

# Define the optimization problem
class SVMHyperparameterOptimization(Problem):
    def __init__(self):
        super().__init__(dimension=4, lower=[0.1, 0.0001, 0, 1], upper=[100, 1, 2, 5])

    def _evaluate(self, solution):
        C, gamma, kernel_idx, degree = solution
        kernels = ['linear', 'rbf', 'poly']
        kernel = kernels[int(kernel_idx)]
        if kernel == 'poly':
            model = SVC(C=C, gamma=gamma, kernel=kernel, degree=int(degree))
        else:
            model = SVC(C=C, gamma=gamma, kernel=kernel)
        scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
        return -np.mean(scores)

# Create the optimization task
task = Task(problem=SVMHyperparameterOptimization(), max_evals=300)

# Perform the optimization using Genetic Algorithm
algo = GeneticAlgorithm(population_size=50)
best_solution = algo.run(task=task)

# Extract the best hyperparameters
best_C, best_gamma, best_kernel_idx, best_degree = best_solution[0]
best_kernel = ['linear', 'rbf', 'poly'][int(best_kernel_idx)]
print(f"Best C: {best_C}, Best gamma: {best_gamma}, Best kernel: {best_kernel}, Best degree: {best_degree}")

# Train the model with the best hyperparameters
if best_kernel == 'poly':
    optimized_model = SVC(C=best_C, gamma=best_gamma, kernel=best_kernel, degree=int(best_degree))
else:
    optimized_model = SVC(C=best_C, gamma=best_gamma, kernel=best_kernel)

optimized_model.fit(X_train, y_train)

# Predict and evaluate the model
y_pred = optimized_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of the optimized model: {accuracy}")

Best C: 1.1567437584303444, Best gamma: 0.185435851720627, Best kernel: rbf, Best degree: 4.122304955988525
Accuracy of the optimized model: 0.7337662337662337


## Model Niapy menggunakan pendekatan ParticleSwarmOptimization

In [22]:
from niapy.task import Task
from niapy.problems import Problem
from niapy.algorithms.basic import ParticleSwarmOptimization
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
import numpy as np

class SVMHyperparameterOptimization(Problem):
    def __init__(self):
        super().__init__(dimension=2, lower=[0.1, 0.0001], upper=[100, 1])

    def _evaluate(self, solution):
        C, gamma = solution
        model = SVC(C=C, gamma=gamma, kernel='rbf')
        scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
        return -np.mean(scores)

# Create the optimization task
task = Task(problem=SVMHyperparameterOptimization(), max_evals=200)

# Perform the optimization using Particle Swarm Optimization
algo = ParticleSwarmOptimization(population_size=30)
best_solution = algo.run(task=task)

# Extract the best hyperparameters
best_C, best_gamma = best_solution[0]
print(f"Best C: {best_C}, Best gamma: {best_gamma}")


Best C: 96.36801395535757, Best gamma: 0.0001


In [23]:
from sklearn.metrics import accuracy_score

# Train the model with the best hyperparameters
optimized_model = SVC(C=best_C, gamma=best_gamma, kernel='rbf')
optimized_model.fit(X_train, y_train)

# Predict and evaluate the model
y_pred = optimized_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of the optimized model: {accuracy}")


Accuracy of the optimized model: 0.7662337662337663


Dari dua eksperimen mendapatkan accuracy terbaik model yaitu: 0.7662337662337663 dengan Best C: 96.36801395535757 dan Best gamma: 0.0001