In [1]:
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pandas as pd
from sklearn.model_selection import GridSearchCV

# Load your dataset
df = pd.read_csv("data\data.csv") 


label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])

X = df.drop(['label', 'filename'], axis=1)
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)




# Define your base classifiers
base_classifiers = [
    ('rf', RandomForestClassifier(random_state=42)),
    ('svc', SVC(probability=True, random_state=42)),
    ('xgb', XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')),
    ('mlp', MLPClassifier(random_state=42)),
    ('knn', KNeighborsClassifier()),
    ('log_clf', LogisticRegression(random_state=42))
]

# Meta-classifier
meta_classifier = LogisticRegression(random_state=42)

# Initialize the Stacking Classifier
stacking_clf = StackingClassifier(estimators=base_classifiers, final_estimator=meta_classifier, cv=5)

# Parameter grid for GridSearchCV
param_grid = {
    'rf__n_estimators': [100, 200, 300],
    'rf__max_depth': [10, None, 5],
    'svc__C': [0.1, 1, 10],
    'xgb__n_estimators': [50, 100, 150],
    'xgb__learning_rate': [0.01, 0.1, 0.2],
    'mlp__hidden_layer_sizes': [(50,), (100,), (50, 50)],
    'mlp__max_iter': [500, 1000, 1500],
    'knn__n_neighbors': [3, 5, 7],
    'log_clf__C': [0.1, 1, 10],
    'final_estimator__C': [0.1, 1, 10]
}

# Setup GridSearchCV
grid_search = GridSearchCV(estimator=stacking_clf, param_grid=param_grid, cv=3, scoring='accuracy', verbose=2, n_jobs=-1)

# Perform the search
grid_search.fit(X_train_scaled, y_train)

# Best parameters and score
print("Best parameters found: ", grid_search.best_params_)
print("Best cross-validation score: {:.2f}".format(grid_search.best_score_))

# Evaluate on the test set
y_pred = grid_search.predict(X_test_scaled)
print("Accuracy on test set: {:.2f}".format(accuracy_score(y_test, y_pred)))
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


Fitting 3 folds for each of 59049 candidates, totalling 177147 fits


KeyboardInterrupt: 

In [2]:
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pandas as pd
from sklearn.model_selection import GridSearchCV

# Load your dataset
df = pd.read_csv("data\data.csv") 


label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])

X = df.drop(['label', 'filename'], axis=1)
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [3]:
from sklearn.ensemble import StackingClassifier
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint as sp_randint
from scipy.stats import uniform

# Assuming the imports and data preparation steps are done as before

# Base classifiers with example classifiers
base_classifiers = [
    ('rf', RandomForestClassifier(random_state=42)),
    ('svc', SVC(probability=True, random_state=42)),
    ('xgb', XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')),
    ('mlp', MLPClassifier(random_state=42)),
    ('knn', KNeighborsClassifier()),
    ('log_clf', LogisticRegression(random_state=42))
]


# Meta-classifier
meta_classifier = LogisticRegression(random_state=42)

# Initialize the Stacking Classifier
stacking_clf = StackingClassifier(estimators=base_classifiers, final_estimator=meta_classifier, cv=5)

# Parameter distributions
param_dist = {
    'rf__n_estimators': sp_randint(100, 500),  # Number of trees in random forest
    'rf__max_depth': [None, 10, 20, 30],  # Maximum depth of trees
    'svc__C': uniform(0.1, 10),  # Penalty parameter C of the SVC
    'xgb__n_estimators': sp_randint(50, 150),  # Number of gradient boosted trees
    'xgb__learning_rate': uniform(0.01, 0.2),  # Boosting learning rate
    'mlp__hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 50)],  # MLP architecture
    'mlp__alpha': uniform(0.0001, 0.001),  # L2 penalty (regularization term) parameter
    'knn__n_neighbors': sp_randint(3, 10),  # Number of neighbors to use
    'log_clf__C': uniform(0.1, 10),  # Inverse of regularization strength in LogisticRegression
    'final_estimator__C': uniform(0.1, 10),  # Regularization in the final meta-classifier
}


# Initialize RandomizedSearchCV with the Stacking Classifier and parameter distribution
random_search = RandomizedSearchCV(stacking_clf, param_distributions=param_dist, n_iter=100, cv=5, verbose=2, random_state=42, n_jobs=-1)

# Perform the search
random_search.fit(X_train_scaled, y_train)

# Best parameters and score
print("Best parameters found: ", random_search.best_params_)
print("Best cross-validation score: {:.2f}".format(random_search.best_score_))

# Evaluate on the test set
y_pred = random_search.predict(X_test_scaled)
print("Accuracy on test set: {:.2f}".format(accuracy_score(y_test, y_pred)))
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


Fitting 5 folds for each of 100 candidates, totalling 500 fits
