In [21]:
import pandas as pd
from mrmr import mrmr_classif


df = pd.read_csv('data/table.csv')

X = df.drop('Label', axis=1)
y, _ = pd.factorize(df['Label'])

selected_feature = mrmr_classif(X, y, K=1)

print(selected_feature)

100%|██████████| 1/1 [00:00<?, ?it/s]

['zero_crossing_SPD_ML']





In [23]:
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier

scaler = StandardScaler()
X = scaler.fit_transform(df[selected_feature])

classifiers = {
    'LogisticRegression': LogisticRegression(),
    'RandomForest': RandomForestClassifier(),
    'SVC': SVC(),
    'KNeighbors': KNeighborsClassifier(),
    'DecisionTree': DecisionTreeClassifier(),
    'GradientBoosting': GradientBoostingClassifier()
}

param_grids = {
    'LogisticRegression': {
        'C': [0.1, 1, 10],
        'solver': ['liblinear', 'lbfgs']
    },
    'RandomForest': {
        'n_estimators': [10, 50, 100, 200, 300],
        'max_depth': [None, 10, 20, 30]
    },
    'SVC': {
        'C': [0.1, 1, 10],
        'kernel': ['linear', 'rbf']
    },
    'KNeighbors': {
        'n_neighbors': [3, 5, 7],
        'weights': ['uniform', 'distance']
    },
    'DecisionTree': {
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10]
    },
    'GradientBoosting': {
        'n_estimators': [50, 100, 200],
        'learning_rate': [0.01, 0.1, 0.2],
        'max_depth': [3, 5, 7]
    }
}

best_classifiers = {}
for name, clf in classifiers.items():
    grid_search = GridSearchCV(clf, param_grids[name], cv=5, scoring='accuracy')
    grid_search.fit(X, y)
    best_classifiers[name] = grid_search.best_estimator_


best_score = 0
best_classifier_name = None
for name, clf in best_classifiers.items():
    scores = cross_val_score(clf, X, y, cv=5, scoring='accuracy')
    mean_score = scores.mean()
    
    if mean_score > best_score:
        best_score = mean_score
        best_classifier_name = name

print(f"Best classifier: {best_classifier_name}")
print(f"Best  mean cross-validation score: {best_score}")

Best classifier: SVC
Best  mean cross-validation score: 0.7619047619047619
