<a href="https://colab.research.google.com/github/hemchan-cyber/M.tech-Ai-Practical/blob/main/Sequential_Forward_Feature_Selection_Algorithm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Q4. Sequential Forward Feature Selection Algorithm

In [None]:
# Dataset
ids = ["A1", "A2", "A3", "B1", "B2", "B3"]
labels = {"A1": 1, "A2": 1, "A3": 1,"B1": -1, "B2": -1, "B3": -1}

In [None]:
# Feature values: [f1, f2, f3, f4]
features = {"A1": [2, 3, 0, 1],"A2": [3, 2, 1, 2],"A3": [2, 2, 0, 0],"B1": [0, 1, 3, 2],"B2": [1, 0, 2, 1],"B3": [1, 2, 3, 3],}
feature_names = ["f1", "f2", "f3", "f4"]

In [None]:
# LOOCV accuracy using nearest-centroid
def loo_accuracy(feature_indices):
    correct = 0

    for test_id in ids:
        sums = {1: [0]*len(feature_indices), -1: [0]*len(feature_indices)}
        counts = {1: 0, -1: 0}

        for train_id in ids:
            if train_id == test_id:
                continue
            cls = labels[train_id]
            vec = [features[train_id][i] for i in feature_indices]
            counts[cls] += 1
            sums[cls] = [s + v for s, v in zip(sums[cls], vec)]

        centroids = {
            cls: [s / counts[cls] for s in sums[cls]]
            for cls in (1, -1)
        }

        test_vec = [features[test_id][i] for i in feature_indices]

        dist_pos = sum((test_vec[i] - centroids[1][i])**2 for i in range(len(feature_indices)))
        dist_neg = sum((test_vec[i] - centroids[-1][i])**2 for i in range(len(feature_indices)))

        pred = 1 if dist_pos < dist_neg else -1

        if pred == labels[test_id]:
            correct += 1

    return correct / len(ids)

In [None]:
# Sequential Forward Selection with prints
remaining = [0, 1, 2, 3]
selected = []
best_so_far = 0.0
step = 1

print("\n Sequential Forward Selection Process \n")

while remaining:
    print(f"Step {step}: Current selected set = {[feature_names[i] for i in selected]}")
    best_feature = None
    best_score = -1

    for f in remaining:
        score = loo_accuracy(selected + [f])
        print(f"  Try adding {feature_names[f]} → LOOCV accuracy = {score:.4f}")

        if score > best_score:
            best_score = score
            best_feature = f

    if best_score <= best_so_far:
        print("\nNo further improvement. Stopping SFS.\n")
        break

    selected.append(best_feature)
    remaining.remove(best_feature)
    best_so_far = best_score

    print(f">>Selected feature: {feature_names[best_feature]}")
    print(f">>Best accuracy so far: {best_so_far:.4f}\n")

    step += 1

print("Final selected features:", [feature_names[i] for i in selected])
print("Final LOOCV accuracy:", best_so_far)


 Sequential Forward Selection Process 

Step 1: Current selected set = []
  Try adding f1 → LOOCV accuracy = 1.0000
  Try adding f2 → LOOCV accuracy = 0.8333
  Try adding f3 → LOOCV accuracy = 1.0000
  Try adding f4 → LOOCV accuracy = 0.6667
>>Selected feature: f1
>>Best accuracy so far: 1.0000

Step 2: Current selected set = ['f1']
  Try adding f2 → LOOCV accuracy = 0.8333
  Try adding f3 → LOOCV accuracy = 1.0000
  Try adding f4 → LOOCV accuracy = 0.8333

No further improvement. Stopping SFS.

Final selected features: ['f1']
Final LOOCV accuracy: 1.0
