In [None]:
!pip install niapy



In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.feature_selection import RFE
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from niapy.task import Task
from niapy.problems import Problem
from niapy.algorithms.basic import CuckooSearch

In [None]:
# Load from the .txt files
features_file = "X_train.txt"
labels_file = "y_train.txt"
features_test_file = "X_test.txt"
labels_test_file = "y_test.txt"

x_train = np.loadtxt(features_file)
y_train = np.loadtxt(labels_file)
X_test = np.loadtxt(features_test_file)
y_test = np.loadtxt(labels_test_file)

print(f"Size of x_train: {x_train.shape}")
print(f"Size of y_train: {y_train.shape}")
print(f"Size of X_test: {X_test.shape}")
print(f"Size of y_test: {y_test.shape}")

Size of x_train: (7352, 561)
Size of y_train: (7352,)
Size of X_test: (2947, 561)
Size of y_test: (2947,)


In [None]:
# Convert NumPy arrays to Pandas DataFrames
x_train = pd.DataFrame(x_train)
y_train = pd.DataFrame(y_train, columns=["Label"])
x_test = pd.DataFrame(X_test)
y_test = pd.DataFrame(y_test, columns=["Label"])

In [None]:
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [None]:
# Feature Selection
class FeatureSelectionProblem(Problem):
    def __init__(self, x, y):
        super().__init__(dimension=x.shape[1], lower=0, upper=1)
        self.x = x
        self.y = y

    def _evaluate(self, solution):
        selected_features = np.where(solution > 0.5)[0]
        if len(selected_features) == 0:  # Avoid empty selection
            return 1e6  # Large penalty
        x_selected = self.x[:, selected_features]
        model = SVC(kernel='linear')
        model.fit(x_selected, self.y)
        predictions = model.predict(x_selected)
        return 1 - accuracy_score(self.y, predictions)  # Minimize 1 - accuracy

# Problem and task
problem = FeatureSelectionProblem(x_train_scaled, y_train.values.ravel())
task = Task(problem=problem, max_iters=100)  # Adjust max_iters as needed

# Run Cuckoo Search for feature selection
cs = CuckooSearch(n=20, alpha=0.01, beta=1.5, seed=1234)
best_solution = cs.run(task=task)[0]

# Extract selected features from Cuckoo Search
selected_features_cs = np.where(best_solution > 0.5)[0]
print("Selected Features after Cuckoo Search (Indices):", selected_features_cs)

# Apply RFE on the selected features
svc = SVC(kernel="linear")
rfe = RFE(estimator=svc, n_features_to_select=10)  # Adjust number of features to select
rfe.fit(x_train_scaled[:, selected_features_cs], y_train.values.ravel())

# Final selected features after RFE
final_selected_features = selected_features_cs[rfe.support_]
print("Final Selected Features after RFE (Indices):", final_selected_features)

Selected Features after Cuckoo Search (Indices): [  0   1   2   3   4   6   7   8   9  10  11  12  13  20  21  22  23  24
  25  26  27  30  32  33  34  35  36  37  38  39  41  43  45  47  52  53
  55  57  64  65  68  70  71  73  77  78  79  81  83  85  86  88  90  91
  92  95  97  98  99 103 104 108 110 115 116 117 118 120 121 122 123 126
 127 128 129 130 132 135 136 137 142 144 145 146 147 149 151 152 155 156
 157 159 161 162 163 164 166 167 168 170 171 172 177 179 180 181 182 183
 187 189 190 193 194 195 196 197 199 200 202 204 205 206 211 212 213 214
 215 216 217 221 223 224 225 226 230 231 232 234 242 247 248 250 251 254
 255 259 260 261 262 263 265 267 268 271 272 277 279 281 282 287 289 290
 292 295 296 297 299 300 301 302 304 307 308 309 310 311 313 315 317 323
 325 327 328 332 334 338 339 341 343 345 347 353 356 357 359 363 365 367
 368 369 370 373 374 375 376 377 379 380 381 382 383 387 389 390 393 396
 398 400 402 404 408 409 413 414 415 416 417 420 423 424 425 426 429 432
 4

In [None]:
# SVM on the final selected features
x_train_final = x_train_scaled[:, final_selected_features]
x_test_final = x_test_scaled[:, final_selected_features]

model = SVC(kernel="linear")
model.fit(x_train_final, y_train.values.ravel())

y_pred = model.predict(x_test_final)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.8771632168306752
Classification Report:
               precision    recall  f1-score   support

         1.0       0.85      0.89      0.87       496
         2.0       0.87      0.85      0.85       471
         3.0       0.81      0.79      0.80       420
         4.0       0.92      0.78      0.84       491
         5.0       0.82      0.94      0.88       532
         6.0       1.00      1.00      1.00       537

    accuracy                           0.88      2947
   macro avg       0.88      0.87      0.87      2947
weighted avg       0.88      0.88      0.88      2947

