In [1]:
import pandas as pd
from sklearn.metrics import accuracy_score,precision_score,recall_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

In [2]:
# Load the Excel file
excel_file = './feature_ranking_3_filters.xlsx'
rankings_df = pd.read_excel(excel_file)

In [3]:
rankings_df.head(10)

Unnamed: 0,CHI_2,Mutual Information,Pearson Correlation
0,f5,f5,f29
1,f6,f3,f33
2,f1,f6,f34
3,f33,f4,f12
4,f23,f29,f39
5,f32,f30,f38
6,f3,f33,f25
7,f4,f34,f26
8,f39,f12,f4
9,f26,f38,f23


In [4]:
# Load the dataset from CSV file
train_dataset_file = '../../data/BinaryClassify/train_nsl_kdd_binary_encoded.csv'
dataset = pd.read_csv(train_dataset_file)
dataset = dataset.drop('level',axis=1)

dataset.columns = [f'f{i+1}' for i in range(len(dataset.columns))]

In [5]:
X  =dataset.drop('f42',axis=1)
y = dataset['f42']

In [6]:
def calculate_accuracy(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    return accuracy_score(y_test, predictions)


In [7]:
# Split your data into train and test sets before the iterations
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)


In [8]:
classifiers = {
    'Random Forest': RandomForestClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'KNN': KNeighborsClassifier()
}

In [10]:
import pandas as pd
from collections import Counter
from sklearn.metrics import accuracy_score, precision_score, recall_score

# Define n_val to select top 10 features
n_val = 10

# Extract top 10 features from each filter method
top_features_by_method = {}
for column in rankings_df.columns:
    method_name = column  # Assuming column names are the filter method names
    top_features_by_method[method_name] = set(rankings_df[column][:n_val])  # Extracting top 10 features

# Union of all the top 10 features from each filter method
features_union = set()
for features in top_features_by_method.values():
    features_union.update(features)
print("Ishita Method")
print("-------------")
# Print the features that are in the union of all top 10 features
print("Union of top 10 features from each filter method:")
print(features_union)

# Initialize an empty DataFrame to store the results
results_df = pd.DataFrame()

for i, (classifier_name, classifier) in enumerate(classifiers.items()):
    metrics_results = {'accuracy': {}, 'precision': {}, 'recall': {}}
    print(f'\n{classifier_name}:\n{"-" * len(classifier_name)}')

    if not features_union:
        print('No features selected from the union of top 10 features')
        continue

    X_train_selected = X_train[list(features_union)]
    X_test_selected = X_test[list(features_union)]

    classifier.fit(X_train_selected, y_train)
    y_pred = classifier.predict(X_test_selected)

    metrics_results['accuracy'][n_val] = accuracy_score(y_test, y_pred)
    metrics_results['precision'][n_val] = precision_score(y_test, y_pred)
    metrics_results['recall'][n_val] = recall_score(y_test, y_pred)

    print(f'Accuracy: {metrics_results["accuracy"][n_val]:.4f}, Precision: {metrics_results["precision"][n_val]:.4f}, Recall: {metrics_results["recall"][n_val]:.4f}')
    
    temp_df = pd.DataFrame(metrics_results, index=[classifier_name])
    results_df = pd.concat([results_df, temp_df])



Ishita Method
-------------
Union of top 10 features from each filter method:
{'f25', 'f30', 'f38', 'f26', 'f32', 'f5', 'f33', 'f34', 'f39', 'f12', 'f4', 'f6', 'f29', 'f3', 'f23', 'f1'}

Random Forest:
-------------
Accuracy: 0.9987, Precision: 0.9979, Recall: 0.9996

Decision Tree:
-------------
Accuracy: 0.9980, Precision: 0.9982, Recall: 0.9981

KNN:
---
Accuracy: 0.9958, Precision: 0.9967, Recall: 0.9954


In [ ]:
'''
Ishita Method
-------------
Union of top 10 features from each filter method:
{'f25', 'f30', 'f38', 'f26', 'f32', 'f5', 'f33', 'f34', 'f39', 'f12', 'f4', 'f6', 'f29', 'f3', 'f23', 'f1'}

Random Forest:
-------------
Accuracy: 0.9987, Precision: 0.9979, Recall: 0.9996

Decision Tree:
-------------
Accuracy: 0.9980, Precision: 0.9982, Recall: 0.9981

KNN:
---
Accuracy: 0.9958, Precision: 0.9967, Recall: 0.9954


'''