In [None]:
# Importing necessary libraries
import weka.core.jvm as jvm
from weka.attribute_selection import ASSearch, ASEvaluation, AttributeSelection
from weka.core.converters import Saver, Loader
from weka.filters import Filter
import pandas as pd
import numpy as np

# Start the Weka JVM
if not jvm.started:
    jvm.start(packages=True, max_heap_size="4G")
    print("JVM Started")

In [None]:
otu_file_name = "normalized_feature_table_three_class.csv"

thresh = 0.01 #varying threshold for the feature selection

Loading the dataset into Weka and specifying the class column

In [19]:
def load_dataset(filename):
    loader = Loader(classname="weka.core.converters.CSVLoader")
    data = loader.load_file(filename)
    data.class_is_last()
    return data

Feature selection iwth BestFirst search and CfsSubsetEval evaluator

In [None]:
def best_first_ranker(data):
    print("Best First Ranker Feature Selection")
    search = ASSearch(classname="weka.attributeSelection.BestFirst", options=[
                      "-D", "2",  # Search direction: bidirectional (2)
                      "-N", "5"   # Number of non-improving nodes to consider
                    ])
    evaluator = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval")

    attsel = AttributeSelection()
    attsel.search(search)
    attsel.evaluator(evaluator)
    
    try:
        attsel.select_attributes(data)
        print("Selected attributes:", attsel.selected_attributes)
        print("Number of attributes selected:", attsel.number_attributes_selected)
        
        selected_attributes = attsel.selected_attributes
        selected_attributes = [int(att) for att in selected_attributes]

        attribute_info = []
        
        for att in selected_attributes:
            name = data.attribute(att).name
            attribute_info.append({'Index': att, 'Name': name})
    
        attributes_df = pd.DataFrame(attribute_info)
        attributes_df.to_csv('best_first_attributes.csv', index=False)
    
        
    except Exception as e:
        print(f"An error occurred during attribute selection: {e}")
        import traceback
        traceback.print_exc()

Feature selection iwth BestFirst search and Wrapper evaluator using RF

In [None]:
def best_first_wrapper(data):
    print("Best First Search with Wrapper Evaluation")
    search = ASSearch(classname="weka.attributeSelection.BestFirst", options=[
                      "-D", "1", 
                    "-N", "5"  
                    ])
    evaluator = ASEvaluation(classname="weka.attributeSelection.WrapperSubsetEval", options=["-B", "weka.classifiers.trees.J48", "-F", "10", "-T", "0.01"])

    attsel = AttributeSelection()
    attsel.search(search)
    attsel.evaluator(evaluator)
    
    try:
        attsel.select_attributes(data)
        print("Selected attributes:", attsel.selected_attributes)
        print("Number of attributes selected:", attsel.number_attributes_selected)

        selected_attributes = attsel.selected_attributes
        selected_attributes = [int(att) for att in selected_attributes]

        attribute_info = []
        
        for att in selected_attributes:
            name = data.attribute(att).name
            attribute_info.append({'Index': att, 'Name': name})
        
        attributes_df = pd.DataFrame(attribute_info)

        attributes_df.to_csv('best_first_wrapper.csv', index=False)
    
        
    except Exception as e:
        print(f"An error occurred during attribute selection: {e}")
        import traceback
        traceback.print_exc()

Perform feature selection

In [None]:
dataset = load_dataset(otu_file_name)
print("Dataset Loaded")
best_first_ranker(dataset)
best_first_wrapper(dataset)
jvm.stop() #stop after compelte execution