In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from scipy.stats import uniform
import matplotlib.pyplot as plt
import random

In [2]:
data = pd.read_csv('adult_cleaned_final.csv')

In [4]:
X = data.drop('income', axis=1)

In [6]:
y = data['income'].apply(lambda x: 1 if x == '>50K' else 0)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
numerical_features = ['age', 'fnlwgt', 'educational-num', 'capital-gain', 'capital-loss', 'hours-per-week']
categorical_features = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'gender', 'native-country']

In [10]:
numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

In [11]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])

In [12]:
initial_clf = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', SVC(kernel='rbf'))
])

In [13]:
initial_clf.fit(X_train, y_train)
y_pred_initial = initial_clf.predict(X_test)
print("Inicijalna preciznost:", accuracy_score(y_test, y_pred_initial))

Inicijalna preciznost: 0.8453721075672295


In [14]:
initial_best = [1.5227581792019662, 0.06450025916751723]

In [15]:
lb = [max(0.1, initial_best[0] - 0.5), max(0.001, initial_best[1] - 0.01)]
ub = [initial_best[0] + 0.5, initial_best[1] + 0.01]
random_seed = 68

In [16]:
def objective_function(params, iteration_seed):
    C, gamma = params
    pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('classifier', SVC(kernel='rbf', C=C, gamma=gamma, random_state=iteration_seed))
    ])
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    return accuracy_score(y_test, y_pred)

In [17]:
def bat_algorithm(n_bats, iterations, lb, ub, pulse_freq, loudness, pulse_rate, alpha, gamma, initial_best, random_seed):
    np.random.seed(random_seed)
    random.seed(random_seed)
    dim = 2
    Qmin, Qmax = pulse_freq
    Q = np.zeros(n_bats)
    V = np.zeros((n_bats, dim))
    np.random.seed(random.randint(0, 123456789))
    X = np.random.uniform(lb, ub, (n_bats, dim))
    X[0] = initial_best
    print(X)
    fitness = np.array([objective_function(ind, random.randint(0, 10000)) for ind in X])
    best_bat = X[np.argmax(fitness)]
    best_fitness = np.max(fitness)

    history = []

    for t in range(iterations):
        for i in range(n_bats):
            Q[i] = Qmin + (Qmax - Qmin) * np.random.rand()
            V[i] += (X[i] - best_bat) * Q[i]
            X[i] = np.clip(X[i] + V[i], lb, ub)

            if np.random.rand() > pulse_rate:
                X[i] = np.clip(best_bat + 0.001 * np.random.randn(dim), lb, ub)
            iteration_seed = random.randint(0, 10000)
            if np.random.rand() < loudness and objective_function(X[i], iteration_seed) > fitness[i]:
                fitness[i] = objective_function(X[i], iteration_seed)
                if fitness[i] > best_fitness:
                    best_bat = X[i]
                    best_fitness = fitness[i]

        print(f"BAT Iteracija: {t+1}")
        history.append(best_fitness)

        loudness *= alpha
        pulse_rate *= (1 - np.exp(-gamma * t))

    return best_bat, best_fitness, history

In [18]:
n_bats = 10
iterations = 30
pulse_freq = [0, 2]
loudness = 0.5
pulse_rate = 0.5
alpha = 0.9
gamma = 0.9

In [19]:
best_params_bat, best_score_bat, history_bat = bat_algorithm(n_bats, iterations, lb, ub, pulse_freq, loudness, pulse_rate, alpha, gamma, initial_best, random_seed)

[[1.52275818 0.06450026]
 [1.76441665 0.06438944]
 [1.98784787 0.05686907]
 [1.8931667  0.07277934]
 [1.30734295 0.07087928]
 [1.6505671  0.06838754]
 [1.63917684 0.06855125]
 [1.74123146 0.06649116]
 [1.99115415 0.05569387]
 [1.21950118 0.06948251]]
BAT Iteracija: 1
BAT Iteracija: 2
BAT Iteracija: 3
BAT Iteracija: 4
BAT Iteracija: 5
BAT Iteracija: 6
BAT Iteracija: 7
BAT Iteracija: 8
BAT Iteracija: 9
BAT Iteracija: 10
BAT Iteracija: 11
BAT Iteracija: 12
BAT Iteracija: 13
BAT Iteracija: 14
BAT Iteracija: 15
BAT Iteracija: 16
BAT Iteracija: 17
BAT Iteracija: 18
BAT Iteracija: 19
BAT Iteracija: 20
BAT Iteracija: 21
BAT Iteracija: 22
BAT Iteracija: 23
BAT Iteracija: 24
BAT Iteracija: 25
BAT Iteracija: 26
BAT Iteracija: 27
BAT Iteracija: 28
BAT Iteracija: 29
BAT Iteracija: 30


In [20]:
print(f"BAT najbolje parametre: C = {best_params_bat[0]}, gamma = {best_params_bat[1]}")
print(f"BAT najbolji rezultat: {best_score_bat}")

BAT najbolje parametre: C = 1.022928653795048, gamma = 0.055679560530225235
BAT najbolji rezultat: 0.848968105065666
