In [None]:
models = {
    "Decision Tree Classifier": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "XGBoost": xgb.XGBClassifier()
}

# Define hyperparameter grids for each model
param_grid = {
    "Decision Tree Classifier": {
        "criterion": ["gini", "entropy"],
        "max_depth": [None, 10, 15, 23, 35, 50],
        "min_samples_split": [2, 5, 10],
        "min_samples_leaf": [1, 2, 4],
        "max_features": ["auto", "sqrt", "log2", None]
    },
    "Random Forest": {
        "n_estimators": [int(x) for x in np.linspace(start=100, stop=2000, num=10)],
        "max_features": ["auto", "sqrt", "log2"],
        "max_depth": [int(x) for x in np.linspace(10, 1000, num=10)],
        "min_samples_split": [2, 5, 10, 14],
        "min_samples_leaf": [1, 2, 4, 6, 8],
        "bootstrap": [True, False]
    },
    "XGBoost": {
        "learning_rate": [0.01, 0.1, 0.2, 0.3],
        "n_estimators": [int(x) for x in np.linspace(start=50, stop=1000, num=10)],
        "max_depth": [3, 4, 5, 6, 8, 10],
        "min_child_weight": [1, 3, 5, 7],
        "gamma": [0.0, 0.1, 0.2, 0.3, 0.4],
        "subsample": [0.6, 0.7, 0.8, 0.9],
        "colsample_bytree": [0.6, 0.7, 0.8, 0.9],
        "reg_alpha": [0, 0.001, 0.005, 0.01, 0.05]
    }
}

def train_algorithms(models, X_train, y_train, X_test, y_test):
    for model_name, model in models.items():
        # Hyperparameter tuning using Randomized Search
        if model_name in param_grid:
            random_search = RandomizedSearchCV(estimator=model, param_distributions=param_grid[model_name],
                                               n_iter=10, scoring='accuracy', n_jobs=-1, cv=5, verbose=2)
            random_search.fit(X_train, y_train)
            best_model = random_search.best_estimator_
        else:
            best_model = model
        
        # Cross-validation
        cross_val_scores = cross_val_score(best_model, X_train, y_train, cv=5, scoring='accuracy')
        
        # Training set performance
        best_model.fit(X_train, y_train)
        y_train_pred = best_model.predict(X_train)
        
        # Test set performance
        y_test_pred = best_model.predict(X_test)
        
        # Save best model
        joblib.dump(best_model, f"../models/{model_name}_best_joblib")
        
        # Print results
        print(model_name)
        print("Best Model:")
        print(best_model)
        
        print("Cross-Validation Mean Accuracy: {:.4f}".format(np.mean(cross_val_scores)))
        
        print("Model performance for Training set")
        print("- Accuracy Score: {:.4f}".format(accuracy_score(y_train, y_train_pred)))
        # Include other performance metrics here
        
        print("Model performance for Test set")
        print("- Accuracy Score: {:.4f}".format(accuracy_score(y_test, y_test_pred)))
        # Include other performance metrics here

        print("=" * 35)