In [2]:
from EnsembleModels import MyRandomForestClassifier, MyRandomForestRegressor
from EnsembleModels import MyGradientBoostingClassifier, MyGradientBoostingRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.datasets import make_regression, make_classification
from ModelEvaluation import ModelEvaluation
from Metrics import *

In [3]:
# oob оценку для RF можно проверить отдельно
# поскольку в RF не реализовал random_state, иногда оценка незначительно расходится из-за разницы в сэмплировании
def main_regression():
    metrics = [MSE(), R2()]
    X, y = make_regression(n_samples=10_000, n_features=10, n_informative=4, noise=50, random_state=42, coef=False)
    eval = ModelEvaluation(X, y)
    eval.evaluate_model(MyRandomForestRegressor(criterion="squared_error"), model_name="My RF", metrics=metrics)
    eval.evaluate_model(RandomForestRegressor(oob_score=True, max_features="sqrt"), model_name="Sklearn RF", metrics=metrics)
    eval.evaluate_model(MyGradientBoostingRegressor(criterion="squared_error"), model_name="My GBM", metrics=metrics)
    eval.evaluate_model(GradientBoostingRegressor(criterion="squared_error"), model_name="Sklearn GBM", metrics=metrics)
    print(eval.get_result())

if  __name__ == "__main__":
    main_regression()

         model          MSE        R2
0        My RF  2891.608788  0.794382
1   Sklearn RF  2908.263845  0.793197
2       My GBM  2584.267117  0.816236
3  Sklearn GBM  2584.101726  0.816248


In [4]:
def main_classification():
    metrics=[Accuracy(), ROCAUC()]
    predict_proba = True
    X, y = make_classification(n_samples=10_000, n_features=10, n_informative=4, n_redundant=0, n_classes=2, random_state=42, flip_y=0.2)
    eval = ModelEvaluation(X, y)
    eval.evaluate_model(MyRandomForestClassifier(criterion="gini"), model_name="My RF", metrics=metrics, predict_proba=predict_proba)
    eval.evaluate_model(RandomForestClassifier(max_features="sqrt"), model_name="Sklearn RF", metrics=metrics, predict_proba=predict_proba)
    eval.evaluate_model(MyGradientBoostingClassifier(n_estimators=200, criterion="squared_error"), model_name="My GBM", metrics=metrics, predict_proba=predict_proba)
    eval.evaluate_model(GradientBoostingClassifier(n_estimators=200, criterion="squared_error"), model_name="Sklearn GBM", metrics=metrics, predict_proba=predict_proba)
    print(eval.get_result())

if  __name__ == "__main__":
    main_classification()

         model  Accuracy   ROC AUC
0        My RF  0.832667  0.878958
1   Sklearn RF  0.836000  0.879543
2       My GBM  0.827000  0.871876
3  Sklearn GBM  0.827000  0.871741
