In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from scipy.stats import uniform
import matplotlib.pyplot as plt
import random

In [2]:
data = pd.read_csv('adult_cleaned_final.csv')

In [4]:
X = data.drop('income', axis=1)

In [6]:
y = data['income'].apply(lambda x: 1 if x == '>50K' else 0)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
numerical_features = ['age', 'fnlwgt', 'educational-num', 'capital-gain', 'capital-loss', 'hours-per-week']
categorical_features = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'gender', 'native-country']

In [10]:
numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

In [11]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])

In [12]:
initial_clf = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', SVC(kernel='rbf'))
])

In [13]:
initial_clf.fit(X_train, y_train)
y_pred_initial = initial_clf.predict(X_test)
print("Inicijalna preciznost:", accuracy_score(y_test, y_pred_initial))

Inicijalna preciznost: 0.8453721075672295


In [14]:
initial_best = [1.5227581792019662, 0.06450025916751723]

In [15]:
lb = [max(0.1, initial_best[0] - 0.5), max(0.001, initial_best[1] - 0.01)]
ub = [initial_best[0] + 0.5, initial_best[1] + 0.01]
random_seed = 42

In [16]:
def objective_function(params, iteration_seed):
    C, gamma = params
    pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('classifier', SVC(kernel='rbf', C=C, gamma=gamma, random_state=iteration_seed))
    ])
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    return accuracy_score(y_test, y_pred)

In [17]:
def pso(n_particles, iterations, w, c1, c2, lb, ub, initial_best, random_seed):
    np.random.seed(random_seed)
    random.seed(random_seed)
    dim = 2
    X = np.random.uniform(lb, ub, (n_particles, dim))
    X[0] = initial_best
    print(X)
    V = np.random.uniform(-1, 1, (n_particles, dim))
    pbest = X.copy()
    pbest_scores = np.array([objective_function(ind, random.randint(0, 10000)) for ind in X])
    gbest = X[np.argmax(pbest_scores)]
    gbest_score = np.max(pbest_scores)

    history = []

    for t in range(iterations):
        for i in range(n_particles):
            r1, r2 = np.random.rand(), np.random.rand()
            V[i] = w * V[i] + c1 * r1 * (pbest[i] - X[i]) + c2 * r2 * (gbest - X[i])
            X[i] = np.clip(X[i] + V[i], lb, ub)

            iteration_seed = random.randint(0, 10000)
            score = objective_function(X[i], iteration_seed)
            if score > pbest_scores[i]:
                pbest[i] = X[i]
                pbest_scores[i] = score
                if score > gbest_score:
                    gbest = X[i]
                    gbest_score = score

        print(f"PSO Iteracija: {t+1}")
        history.append(gbest_score)

    return gbest, gbest_score, history

In [18]:
n_particles = 10
iterations = 30
w = 0.5
c1 = 1.5
c2 = 1.5

In [19]:
best_params_pso, best_score_pso, history_pso = pso(n_particles, iterations, w, c1, c2, lb, ub, initial_best, random_seed)

[[1.52275818 0.06450026]
 [1.75475212 0.06647343]
 [1.17877682 0.05762015]
 [1.08084179 0.07182378]
 [1.62387319 0.06866171]
 [1.04334267 0.07389846]
 [1.85520082 0.05874704]
 [1.20458315 0.05816835]
 [1.32700042 0.06499539]
 [1.4547032  0.06032484]]
PSO Iteracija: 1
PSO Iteracija: 2
PSO Iteracija: 3
PSO Iteracija: 4
PSO Iteracija: 5
PSO Iteracija: 6
PSO Iteracija: 7
PSO Iteracija: 8
PSO Iteracija: 9
PSO Iteracija: 10
PSO Iteracija: 11
PSO Iteracija: 12
PSO Iteracija: 13
PSO Iteracija: 14
PSO Iteracija: 15
PSO Iteracija: 16
PSO Iteracija: 17
PSO Iteracija: 18
PSO Iteracija: 19
PSO Iteracija: 20
PSO Iteracija: 21
PSO Iteracija: 22
PSO Iteracija: 23
PSO Iteracija: 24
PSO Iteracija: 25
PSO Iteracija: 26
PSO Iteracija: 27
PSO Iteracija: 28
PSO Iteracija: 29
PSO Iteracija: 30


In [20]:
print(f"PSO najbolje parametre: C = {best_params_pso[0]}, gamma = {best_params_pso[1]}")
print(f"PSO najbolji rezultat: {best_score_pso}")

PSO najbolje parametre: C = 1.8390146516443038, gamma = 0.07450025916751722
PSO najbolji rezultat: 0.848968105065666
