In [4]:
# 🔁 Step 1: Import libraries

import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from scipy.spatial.distance import cdist


In [5]:
# 📥 Step 2: Load and clean Adult dataset

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
column_names = [
    "age", "workclass", "fnlwgt", "education", "education-num", "marital-status",
    "occupation", "relationship", "race", "sex", "capital-gain", "capital-loss",
    "hours-per-week", "native-country", "income"
]

df = pd.read_csv(url, names=column_names, sep=',\s*', engine='python')
df = df.replace('?', np.nan).dropna()


  df = pd.read_csv(url, names=column_names, sep=',\s*', engine='python')


In [6]:
# 🔤 Step 3: Encode categorical columns

for col in df.select_dtypes(include='object').columns:
    df[col] = LabelEncoder().fit_transform(df[col])

df.head()


Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,39,5,77516,9,13,4,0,1,4,1,2174,0,40,38,0
1,50,4,83311,9,13,2,3,0,4,1,0,0,13,38,0
2,38,2,215646,11,9,0,5,1,4,1,0,0,40,38,0
3,53,2,234721,1,7,2,5,0,2,1,0,0,40,38,0
4,28,2,338409,9,13,2,9,5,2,0,0,0,40,4,0


In [7]:
# 🧮 Step 4: Scale data and split into train/test

X = df.drop('income', axis=1)
y = df['income']  # 1 = >50K, 0 = <=50K

X_scaled = StandardScaler().fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

# Convert train data by class to list format for your algorithm
class0 = X_train[y_train == 0].tolist()
class1 = X_train[y_train == 1].tolist()


In [8]:
# 🔁 Step 5: Define your custom K-Center algorithm

def dist(p1, p2):
    return np.linalg.norm(np.array(p1) - np.array(p2))


def constructingG(p1, r, points):
    return [point for point in points if dist(point, p1) <= r]

def constructingE(p1, r, points):
    return [point for point in points if dist(point, p1) <= 3 * r]

def findingHeaviestDisk(r, points):
    heavy = 0
    heavy_disk = []
    coordinate = []
    for point in points:
        G = constructingG(point, r, points)
        if len(G) > heavy:
            heavy = len(G)
            heavy_disk = constructingE(point, r, points)
            coordinate = point
    return [heavy_disk, coordinate]

def algorithm(points, r, k):
    remaining_pts = points.copy()
    solution = []
    for i in range(k):
        disk, center = findingHeaviestDisk(r, remaining_pts)
        solution.append(center)
        for x in disk:
            if x in remaining_pts:
                remaining_pts.remove(x)
    if len(remaining_pts) == 0:
        return solution
    else:
        return "r not feasible"


In [13]:
# ⚙️ Step 6: Apply your algorithm on both classes

r = 2.5  # Radius — try different values
k = 10   # Number of centers

centers_0 = [[-0.4901540183921728, -0.20895463813743015, -0.011706764286022596, 0.17476277261377215, -0.4397381988619049, 0.9478469877358469, 0.009963981043775699, -0.261248926324518, 0.38504790219493434, 0.6928061582539681, -0.14744462210705175, -0.21858597514475184, -0.07773411061311615, 0.2649239426976268], [0.19506706269085577, -0.20895463813743015, -0.15083435777372717, -1.3991197098674213, -2.400558924753033, -0.3872747067529501, -0.9827151147195954, -0.8857369684706675, -3.2090767602778167, 0.6928061582539681, -0.14744462210705175, -0.21858597514475184, -0.07773411061311615, -5.631619965209495], [-0.642425369743957, -0.20895463813743015, 9.416154070283026, 1.2240177609345677, -0.04757405368367934, -0.3872747067529501, 1.747152398629675, -0.8857369684706675, -2.011035206120233, 0.6928061582539681, -0.14744462210705175, -0.21858597514475184, -0.4951038493472376, 0.2649239426976268], [1.717780576208697, -0.20895463813743015, -1.521372245346088, 0.17476277261377215, -0.4397381988619049, -1.7223964012417472, 1.2508128507479896, 1.6122152001139305, 0.38504790219493434, -1.4434051835223747, -0.14744462210705175, 6.5933278381445355, -1.3298433268154806, 0.2649239426976268], [1.8700519275604812, 1.887682020379259, 5.736196567917448, -1.923747204027819, -3.1848872151094842, 2.2829686822246438, 0.2581337549846185, 1.6122152001139305, 0.38504790219493434, -1.4434051835223747, -0.14744462210705175, -0.21858597514475184, -0.912473588081359, -4.812655533555728], [0.9564238194497765, 1.887682020379259, 0.7462778565509294, 1.2240177609345677, -0.04757405368367934, -0.3872747067529501, 0.754473302866304, 2.23670324226008, 0.38504790219493434, -1.4434051835223747, -0.14744462210705175, 4.869324959472556, 1.5917448443233697, -5.795412851540249], [-0.26174699136449664, -1.2572729673957748, 4.5292914333361365, -0.3498647215466256, 1.1289183818509974, -0.3872747067529501, -0.7345453407787527, -0.8857369684706675, 0.38504790219493434, 0.6928061582539681, -0.14744462210705175, 4.085237567020593, 3.2612237992598554, 0.2649239426976268], [-0.18561131568860456, 1.887682020379259, -0.00777874528490705, -0.08755097446642673, 2.305410817385674, 1.6154078349802452, 0.754473302866304, 1.6122152001139305, 0.38504790219493434, -1.4434051835223747, -0.14744462210705175, -0.21858597514475184, -2.5819525430178447, -5.795412851540249], [-1.0992394237993095, 2.9360003496376037, 0.18014146977087134, -0.08755097446642673, 2.305410817385674, 0.9478469877358469, 0.754473302866304, -0.261248926324518, 0.38504790219493434, -1.4434051835223747, -0.14744462210705175, -0.21858597514475184, 4.847228806449517, -4.648862647224975]]

centers_1 = [[0.49960976539442403, -0.20895463813743015, -0.17930539670229478, 0.17476277261377215, -0.4397381988619049, -0.3872747067529501, 0.009963981043775699, -0.8857369684706675, 0.38504790219493434, 0.6928061582539681, -0.14744462210705175, -0.21858597514475184, 0.256161680374181, 0.2649239426976268], [0.8802881437738844, 0.8393636911209145, 0.00510326401754658, 0.9617040138543689, 1.9132466722074486, -0.3872747067529501, 0.754473302866304, -0.8857369684706675, 0.38504790219493434, 0.6928061582539681, 13.35457797222187, -0.21858597514475184, 1.1743751055892482, 0.2649239426976268], [-0.642425369743957, -0.20895463813743015, 1.1911473049158252, -0.08755097446642673, 2.305410817385674, -0.3872747067529501, 0.754473302866304, -0.8857369684706675, -3.2090767602778167, 0.6928061582539681, -0.14744462210705175, 4.671448015005184, -0.07773411061311615, -5.631619965209495], [0.34733841404263993, -0.20895463813743015, -0.7003026928406179, 0.43707651969397104, 1.521082527029223, -1.7223964012417472, -0.7345453407787527, 1.6122152001139305, 0.38504790219493434, -1.4434051835223747, -0.14744462210705175, 6.081321244335211, -0.07773411061311615, 0.2649239426976268], [0.27120273836674785, 0.8393636911209145, 8.591090243034252, -0.3498647215466256, 1.1289183818509974, -0.3872747067529501, 1.2508128507479896, -0.8857369684706675, 0.38504790219493434, 0.6928061582539681, -0.14744462210705175, -0.21858597514475184, 1.5917448443233697, 0.2649239426976268], [-0.03333996433682043, -0.20895463813743015, -0.8471159524220714, -1.3991197098674213, -2.400558924753033, -0.3872747067529501, 0.009963981043775699, 2.23670324226008, 0.38504790219493434, -1.4434051835223747, -0.14744462210705175, -0.21858597514475184, 0.7570053668551268, -5.795412851540249], [-0.03333996433682043, -2.305591296654119, -1.439745170971099, 0.9617040138543689, 1.9132466722074486, 0.9478469877358469, 0.754473302866304, -0.261248926324518, -3.2090767602778167, -1.4434051835223747, 13.35457797222187, -0.21858597514475184, 1.3413230010828967, -5.795412851540249]]



In [15]:
print(centers_1)

[[0.49960976539442403, -0.20895463813743015, -0.17930539670229478, 0.17476277261377215, -0.4397381988619049, -0.3872747067529501, 0.009963981043775699, -0.8857369684706675, 0.38504790219493434, 0.6928061582539681, -0.14744462210705175, -0.21858597514475184, 0.256161680374181, 0.2649239426976268], [0.8802881437738844, 0.8393636911209145, 0.00510326401754658, 0.9617040138543689, 1.9132466722074486, -0.3872747067529501, 0.754473302866304, -0.8857369684706675, 0.38504790219493434, 0.6928061582539681, 13.35457797222187, -0.21858597514475184, 1.1743751055892482, 0.2649239426976268], [-0.642425369743957, -0.20895463813743015, 1.1911473049158252, -0.08755097446642673, 2.305410817385674, -0.3872747067529501, 0.754473302866304, -0.8857369684706675, -3.2090767602778167, 0.6928061582539681, -0.14744462210705175, 4.671448015005184, -0.07773411061311615, -5.631619965209495], [0.34733841404263993, -0.20895463813743015, -0.7003026928406179, 0.43707651969397104, 1.521082527029223, -1.7223964012417472, 

In [14]:
# ✅ Utility function to safely check if centers are invalid
def is_invalid(centers):
    return (
        isinstance(centers, str)
        or centers is None
        or len(centers) == 0
        or (isinstance(centers, np.ndarray) and centers.shape[0] == 0)
    )

# ✅ Step 7: Predict based on nearest center (only if both are valid)
def predict(x, centers_0, centers_1):
    dist_0 = min([dist(x, c) for c in centers_0])
    dist_1 = min([dist(x, c) for c in centers_1])
    return 0 if dist_0 < dist_1 else 1

# ✅ Run prediction if centers are valid
if is_invalid(centers_0) or is_invalid(centers_1):
    print("❌ r not feasible for given k (or no centers found)")
else:
    y_pred = [predict(x, centers_0, centers_1) for x in X_test]
    acc = accuracy_score(y_test, y_pred)
    print(f"✅ Accuracy using custom K-Center: {acc:.4f}")



✅ Accuracy using custom K-Center: 0.5974
