In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from scipy.stats import uniform
import matplotlib.pyplot as plt
import random

In [2]:
data = pd.read_csv('adult_cleaned_final.csv')

In [4]:
X = data.drop('income', axis=1)

In [6]:
y = data['income'].apply(lambda x: 1 if x == '>50K' else 0)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
numerical_features = ['age', 'fnlwgt', 'educational-num', 'capital-gain', 'capital-loss', 'hours-per-week']
categorical_features = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'gender', 'native-country']

In [10]:
numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

In [11]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])

In [12]:
initial_clf = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', SVC(kernel='rbf'))
])

In [13]:
initial_clf.fit(X_train, y_train)
y_pred_initial = initial_clf.predict(X_test)
print("Inicijalna preciznost:", accuracy_score(y_test, y_pred_initial))

Inicijalna preciznost: 0.8453721075672295


In [14]:
initial_best = [1.5227581792019662, 0.06450025916751723]

In [34]:
pso_best = [1.971300576029094, 0.07209545188564022]
ga_best = [1.8241394352184273, 0.07443940842727098]
bat_best = [2.0743086463321316, 0.054500259167517225]

In [23]:
pso_pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('classifier', SVC(kernel='rbf', C=pso_best[0], gamma=pso_best[1]))
    ])
pso_pipeline.fit(X_train, y_train)
y_pred_pso = pso_pipeline.predict(X_test)

In [24]:
print("PSO preciznost:", accuracy_score(y_test, y_pred_pso))

PSO preciznost: 0.8492808005003127


In [25]:
ga_pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('classifier', SVC(kernel='rbf', C=ga_best[0], gamma=ga_best[1]))
    ])
ga_pipeline.fit(X_train, y_train)
y_pred_ga = ga_pipeline.predict(X_test)

In [26]:
print("GA preciznost:", accuracy_score(y_test, y_pred_ga))

GA preciznost: 0.8488117573483427


In [35]:
bat_pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('classifier', SVC(kernel='rbf', C=bat_best[0], gamma=bat_best[1]))
    ])
bat_pipeline.fit(X_train, y_train)
y_pred_bat = bat_pipeline.predict(X_test)

In [36]:
print("BAT preciznost:", accuracy_score(y_test, y_pred_bat))

BAT preciznost: 0.8470919324577861
