In [1]:
from factories.selectors_factory import SelectorsFactory
from factories.extractors_factory import ExtractorsFactory
from factories.classifiers_factory import ClassifiersFactory
from sklearn.preprocessing import Normalizer, StandardScaler
from data_loading import load_data
from logging_utils import serialize_model, save_resutls, get_model_name, read_best_scores
from tuning import make_pipe, tune_model
from sklearn.decomposition import PCA
from sklearn.feature_selection import VarianceThreshold

In [2]:
def run_model_test(pipe, hyperparams, datasets = ["artificial", "digits"], random_search_iters=100):
    for ds in datasets:
        print(r"\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\")
        print(f"dataset: {ds}")
        print("//////////////////////////////////////////////////////////////////////////")
        X_train, X_test, y_train = load_data(ds)
        model = tune_model(X_train, y_train, pipe, hyperparams, ds, random_search_iters)
        serialize_model(model, ds)
        save_resutls(model, ds, X_test)
        print("==========================================================================")
        print(f'{get_model_name(model, ds)}: cross val scor = {model.best_score_}')
        print(f"best params: {model.best_params_}")
        print("==========================================================================")
        return model

## Artificial

In [3]:
# defining training trials

training_trails = [ 
        make_pipe(steps=[
            SelectorsFactory.get_tuple_for_pipe('kbest_fs'),
            ClassifiersFactory.get_tuple_for_pipe('RF')
        ]),
        make_pipe(steps=[
            SelectorsFactory.get_tuple_for_pipe('kbest_fs'),
            ClassifiersFactory.get_tuple_for_pipe('XGB')
        ]),
        # enlarge the hyperparameters space in factories 
        make_pipe(steps=[
            ('longtuning_standardization', StandardScaler(), {}),
            SelectorsFactory.get_tuple_for_pipe('rf_fs'),
            ('pca_fe', PCA(0.99), {}),
            ClassifiersFactory.get_tuple_for_pipe('XGB')
        ]),
        # enlarge the hyperparameters space in factories 
        make_pipe(steps=[
            ('longtuning_standardization', StandardScaler(), {}),
            SelectorsFactory.get_tuple_for_pipe('XGB_fs'),
            ('pca_fe', PCA(0.99), {}),
            ClassifiersFactory.get_tuple_for_pipe('RF')
        ]),
        # enlarge the hyperparameters space in factories 
        make_pipe(steps=[
            ('longtuning_standardization', StandardScaler(), {}),
            SelectorsFactory.get_tuple_for_pipe('XGB_fs'),
            ('pca_fe', PCA(0.99), {}),
            ClassifiersFactory.get_tuple_for_pipe('XGB')
        ]),
        make_pipe(steps=[
            ('normalization', Normalizer(), {}),
            SelectorsFactory.get_tuple_for_pipe('kbest_fs'),
            ClassifiersFactory.get_tuple_for_pipe('RF')
        ]),
        make_pipe(steps=[
            ('normalization', Normalizer(), {}),
            ('variance_thr_fs', VarianceThreshold(), {}),
            SelectorsFactory.get_tuple_for_pipe('boruta_fs'),
            SelectorsFactory.get_tuple_for_pipe('kbest_fs'),
            ('pca_fe', PCA(0.99), {}),
            ClassifiersFactory.get_tuple_for_pipe('LGBM')
        ]),
        make_pipe(steps=[
            ('normalization', Normalizer(), {}),
            ('variance_thr_fs', VarianceThreshold(), {}),
            SelectorsFactory.get_tuple_for_pipe('boruta_fs'),
            ('pca_fe', PCA(0.99), {}),
            ClassifiersFactory.get_tuple_for_pipe('LGBM')
        ]),
        make_pipe(steps=[
            ('normalization', Normalizer(), {}),
            ('variance_thr_fs', VarianceThreshold(), {}),
            SelectorsFactory.get_tuple_for_pipe('kbest_fs'),
            ClassifiersFactory.get_tuple_for_pipe('LGBM')
        ]),
        make_pipe(steps=[
            ('normalization', Normalizer(), {}),
            ('variance_thr_fs', VarianceThreshold(), {}),
            SelectorsFactory.get_tuple_for_pipe('kbest_fs'),
            ('pca_fe', PCA(0.99), {}),
            ClassifiersFactory.get_tuple_for_pipe('LGBM')
        ]),
        make_pipe(steps=[
            ('normalization', Normalizer(), {}),
            ('variance_thr_fs', VarianceThreshold(), {}),
            SelectorsFactory.get_tuple_for_pipe('kbest_fs'),
            ClassifiersFactory.get_tuple_for_pipe('RF')
        ]),
        make_pipe(steps=[
            ('normalization', Normalizer(), {}),
            ('variance_thr_fs', VarianceThreshold(), {}),
            SelectorsFactory.get_tuple_for_pipe('rf_fs'),
            SelectorsFactory.get_tuple_for_pipe('kbest_fs'),
            ClassifiersFactory.get_tuple_for_pipe('LGBM')
        ]),
        make_pipe(steps=[
            ('normalization', Normalizer(), {}),
            ('variance_thr_fs', VarianceThreshold(), {}),
            SelectorsFactory.get_tuple_for_pipe('rf_fs'),
            SelectorsFactory.get_tuple_for_pipe('kbest_fs'),
            ('pca_fe', PCA(0.99), {}),
            ClassifiersFactory.get_tuple_for_pipe('LGBM')
        ]),
        make_pipe(steps=[
            ('normalization', Normalizer(), {}),
            ('variance_thr_fs', VarianceThreshold(), {}),
            SelectorsFactory.get_tuple_for_pipe('rf_fs'),
            SelectorsFactory.get_tuple_for_pipe('kbest_fs'),
            ('pca_fe', PCA(0.99), {}),
            ClassifiersFactory.get_tuple_for_pipe('RF')
        ]),
        make_pipe(steps=[
            ('normalization', Normalizer(), {}),
            ('variance_thr_fs', VarianceThreshold(), {}),
            SelectorsFactory.get_tuple_for_pipe('rf_fs'),
            SelectorsFactory.get_tuple_for_pipe('kbest_fs'),
            ClassifiersFactory.get_tuple_for_pipe('RF')
        ]),
        make_pipe(steps=[
            SelectorsFactory.get_tuple_for_pipe('rf_fs'),
            ClassifiersFactory.get_tuple_for_pipe('XGB')
        ]),
        make_pipe(steps=[
            ('standardization', StandardScaler(), {}),
            SelectorsFactory.get_tuple_for_pipe('rf_fs'),
            ('pca_fe', PCA(0.99), {}),
            ClassifiersFactory.get_tuple_for_pipe('XGB')
        ]),
        make_pipe(steps=[
            ('standardization', StandardScaler(), {}),
            SelectorsFactory.get_tuple_for_pipe('XGB_fs'),
            ('pca_fe', PCA(0.99), {}),
            ClassifiersFactory.get_tuple_for_pipe('RF')
        ]),
        make_pipe(steps=[
            SelectorsFactory.get_tuple_for_pipe('XGB_fs'),
            ClassifiersFactory.get_tuple_for_pipe('XGB')
        ]),

        # # the same classifier for comparing feature selection
        # make_pipe(steps=[
        #     ('fs_comparison_standardization', StandardScaler(), {}),
        #     SelectorsFactory.get_tuple_for_pipe('XGB_fs'),
        #     ('pca_fe', PCA(0.99), {}),
        #     ClassifiersFactory.get_tuple_for_pipe('XGB')
        # ]),
        # # the same classifier for comparing feature selection
        # make_pipe(steps=[
        #     ('fs_comparison_standardization', StandardScaler(), {}),
        #     SelectorsFactory.get_tuple_for_pipe('rf_fs'),
        #     ('pca_fe', PCA(0.99), {}),
        #     ClassifiersFactory.get_tuple_for_pipe('XGB')
        # ]),
        # # the same classifier for comparing feature selection
        # make_pipe(steps=[
        #     ('fs_comparison_standardization', StandardScaler(), {}),
        #     SelectorsFactory.get_tuple_for_pipe('boruta_fs'),
        #     ('pca_fe', PCA(0.99), {}),
        #     ClassifiersFactory.get_tuple_for_pipe('XGB')
        # ]),
        # # the same classifier for comparing feature selection
        # make_pipe(steps=[
        #     ('fs_comparison_standardization', StandardScaler(), {}),
        #     SelectorsFactory.get_tuple_for_pipe('kbest_fs'),
        #     ('pca_fe', PCA(0.99), {}),
        #     ClassifiersFactory.get_tuple_for_pipe('XGB')
        # ]),
]



In [None]:
for trial in training_trails:
    pipe = trial[0]
    hyperparams = trial[1] 
    run_model_test(pipe, hyperparams, datasets=["artificial"], random_search_iters=100)

## Digits

In [None]:
# defining training trials

training_trails = [
   
    # (
    #     # norm + xgb + lgbm
    #     make_pipe(steps=[
    #         ('standardization', StandardScaler(), {}),
    #         SelectorsFactory.get_tuple_for_pipe('XGB_fs'),
    #         ExtractorsFactory.get_tuple_for_pipe('pca_fe'),
    #         ClassifiersFactory.get_tuple_for_pipe('RF')
    #     ])
    # ),
    #     (
    #     # norm + xgb + lgbm
    #     make_pipe(steps=[
    #         ('standardization', StandardScaler(), {}),
    #         SelectorsFactory.get_tuple_for_pipe('XGB_fs'),
    #         ClassifiersFactory.get_tuple_for_pipe('RF')
    #     ])
    # ),
    # (
    #     # norm + xgb + lgbm
    #     make_pipe(steps=[
    #         ('normalization', Normalizer(), {}),
    #         SelectorsFactory.get_tuple_for_pipe('XGB_fs'),
    #         ExtractorsFactory.get_tuple_for_pipe('pca_fe'),
    #         ClassifiersFactory.get_tuple_for_pipe('RF')
    #     ])
    # ),
    # (
    #     # norm + xgb + lgbm
    #     make_pipe(steps=[
    #         ('normalization', Normalizer(), {}),
    #         SelectorsFactory.get_tuple_for_pipe('XGB_fs'),
    #         ClassifiersFactory.get_tuple_for_pipe('RF')
    #     ])
    # ),
    # (
    #     # norm + xgb + lgbm
    #     make_pipe(steps=[
    #         ('normalization', Normalizer(), {}),
    #         SelectorsFactory.get_tuple_for_pipe('XGB_fs'),
    #         ClassifiersFactory.get_tuple_for_pipe('XGB')
    #     ])
    # ),
    #     (
    #     # norm + xgb + lgbm
    #     make_pipe(steps=[
    #         ('standardization', StandardScaler(), {}),
    #         SelectorsFactory.get_tuple_for_pipe('XGB_fs'),
    #         ClassifiersFactory.get_tuple_for_pipe('XGB')
    #     ])
    # ),
    (
        make_pipe(steps=[
            SelectorsFactory.get_tuple_for_pipe('rf_fs'),
            ClassifiersFactory.get_tuple_for_pipe('RF')
        ])
    ),
        (
        make_pipe(steps=[
            ('normalization', Normalizer(), {}),
            SelectorsFactory.get_tuple_for_pipe('rf_fs'),
            ClassifiersFactory.get_tuple_for_pipe('RF')
        ])
    ),
    (
        make_pipe(steps=[
            ('normalization', Normalizer(), {}),
            SelectorsFactory.get_tuple_for_pipe('rf_fs'),
            ExtractorsFactory.get_tuple_for_pipe('pca_fe'),
            ClassifiersFactory.get_tuple_for_pipe('RF')
        ])
    ),
    (
        make_pipe(steps=[
            ('standardization', StandardScaler(), {}),
            SelectorsFactory.get_tuple_for_pipe('rf_fs'),
            ExtractorsFactory.get_tuple_for_pipe('pca_fe'),
            ClassifiersFactory.get_tuple_for_pipe('RF')
        ])
    ),
        (
        make_pipe(steps=[
            SelectorsFactory.get_tuple_for_pipe('rf_fs'),
            ClassifiersFactory.get_tuple_for_pipe('LGBM')
        ])
    ),
    (
        make_pipe(steps=[
            ('normalization', Normalizer(), {}),
            SelectorsFactory.get_tuple_for_pipe('rf_fs'),
            ExtractorsFactory.get_tuple_for_pipe('pca_fe'),
            ClassifiersFactory.get_tuple_for_pipe('LGBM')
        ])
    ),
            (
        make_pipe(steps=[
            SelectorsFactory.get_tuple_for_pipe('rf_fs'),
            ClassifiersFactory.get_tuple_for_pipe('XGB')
        ])
    ),
    (
        make_pipe(steps=[
            ('normalization', Normalizer(), {}),
            SelectorsFactory.get_tuple_for_pipe('rf_fs'),
            ExtractorsFactory.get_tuple_for_pipe('pca_fe'),
            ClassifiersFactory.get_tuple_for_pipe('XGB')
        ])
    ),
    (
        make_pipe(steps=[
           ('standardization', StandardScaler(), {}),
            SelectorsFactory.get_tuple_for_pipe('rf_fs'),
            ExtractorsFactory.get_tuple_for_pipe('pca_fe'),
            ClassifiersFactory.get_tuple_for_pipe('XGB')
        ])
    ),
    (
        make_pipe(steps=[
            ('normalization', Normalizer(), {}),
            SelectorsFactory.get_tuple_for_pipe('lgbm_fs'),
            ExtractorsFactory.get_tuple_for_pipe('pca_fe'),
            ClassifiersFactory.get_tuple_for_pipe('XGB')
        ])
    ),
    (
        make_pipe(steps=[
            ('normalization', Normalizer(), {}),
            SelectorsFactory.get_tuple_for_pipe('lgbm_fs'),
            ClassifiersFactory.get_tuple_for_pipe('XGB')
        ])
    ),
        (
        make_pipe(steps=[
            ('normalization', Normalizer(), {}),
            SelectorsFactory.get_tuple_for_pipe('lgbm_fs'),
            ExtractorsFactory.get_tuple_for_pipe('pca_fe'),
            ClassifiersFactory.get_tuple_for_pipe('RF')
        ])
    ),
    (
        make_pipe(steps=[
            ('normalization', Normalizer(), {}),
            SelectorsFactory.get_tuple_for_pipe('lgbm_fs'),
            ClassifiersFactory.get_tuple_for_pipe('RF')
        ])
    ),
        (
        make_pipe(steps=[
            ('standardization', StandardScaler(), {}),
            SelectorsFactory.get_tuple_for_pipe('lgbm_fs'),
            ExtractorsFactory.get_tuple_for_pipe('pca_fe'),
            ClassifiersFactory.get_tuple_for_pipe('XGB')
        ])
    ),
    (
        make_pipe(steps=[
            ('standardization', StandardScaler(), {}),
            SelectorsFactory.get_tuple_for_pipe('lgbm_fs'),
            ClassifiersFactory.get_tuple_for_pipe('XGB')
        ])
    ),
    (
        make_pipe(steps=[
            ('standardization', StandardScaler(), {}),
            SelectorsFactory.get_tuple_for_pipe('lgbm_fs'),
            ExtractorsFactory.get_tuple_for_pipe('pca_fe'),
            ClassifiersFactory.get_tuple_for_pipe('RF')
        ])
    ),
    (
        make_pipe(steps=[
            ('standardization', StandardScaler(), {}),
            SelectorsFactory.get_tuple_for_pipe('lgbm_fs'),
            ClassifiersFactory.get_tuple_for_pipe('RF')
        ])
    ),
    (
        make_pipe(steps=[
            ('standardization', StandardScaler(), {}),
            SelectorsFactory.get_tuple_for_pipe('lgbm_fs'),
            ExtractorsFactory.get_tuple_for_pipe('pca_fe'),
            ClassifiersFactory.get_tuple_for_pipe('LGBM')
        ])
    ),
    (
        make_pipe(steps=[
            ('standardization', StandardScaler(), {}),
            SelectorsFactory.get_tuple_for_pipe('lgbm_fs'),
            ClassifiersFactory.get_tuple_for_pipe('LGBM')
        ])
    ),
     (
        make_pipe(steps=[
            ('standardization', StandardScaler(), {}),
            SelectorsFactory.get_tuple_for_pipe('kbest_fs'),
            ExtractorsFactory.get_tuple_for_pipe('pca_fe'),
            ClassifiersFactory.get_tuple_for_pipe('XGB')
        ])
    ),
    (
        make_pipe(steps=[
           ('standardization', StandardScaler(), {}),
            SelectorsFactory.get_tuple_for_pipe('kbest_chi2_fs'),
            ExtractorsFactory.get_tuple_for_pipe('pca_fe'),
            ClassifiersFactory.get_tuple_for_pipe('XGB')
        ])
    ),  
     (
        make_pipe(steps=[
            ('standardization', StandardScaler(), {}),
            SelectorsFactory.get_tuple_for_pipe('kbest_fs'),
            ClassifiersFactory.get_tuple_for_pipe('XGB')
        ])
    ),
    (
        make_pipe(steps=[
           ('standardization', StandardScaler(), {}),
            SelectorsFactory.get_tuple_for_pipe('kbest_chi2_fs'),
            ClassifiersFactory.get_tuple_for_pipe('XGB')
        ])
    ),  
    (
        make_pipe(steps=[
            SelectorsFactory.get_tuple_for_pipe('kbest_chi2_fs'),
            ClassifiersFactory.get_tuple_for_pipe('XGB')
        ])
    ),  
         (
        make_pipe(steps=[
            SelectorsFactory.get_tuple_for_pipe('kbest_fs'),
            ClassifiersFactory.get_tuple_for_pipe('XGB')
        ])
    ),
    (
        make_pipe(steps=[
            SelectorsFactory.get_tuple_for_pipe('variance_thr_fs'),
            ('standardization', StandardScaler(), {}),
            SelectorsFactory.get_tuple_for_pipe('kbest_fs'),
            ExtractorsFactory.get_tuple_for_pipe('pca_fe'),
            ClassifiersFactory.get_tuple_for_pipe('XGB')
        ])
    ),
    (
        make_pipe(steps=[
            SelectorsFactory.get_tuple_for_pipe('variance_thr_fs'),
           ('standardization', StandardScaler(), {}),
            SelectorsFactory.get_tuple_for_pipe('kbest_chi2_fs'),
            ExtractorsFactory.get_tuple_for_pipe('pca_fe'),
            ClassifiersFactory.get_tuple_for_pipe('XGB')
        ])
    ),  
     (
        make_pipe(steps=[
            SelectorsFactory.get_tuple_for_pipe('variance_thr_fs'),
            ('standardization', StandardScaler(), {}),
            SelectorsFactory.get_tuple_for_pipe('kbest_fs'),
            ClassifiersFactory.get_tuple_for_pipe('XGB')
        ])
    ),
    (
        make_pipe(steps=[
            SelectorsFactory.get_tuple_for_pipe('variance_thr_fs'),
            ('standardization', StandardScaler(), {}),
            SelectorsFactory.get_tuple_for_pipe('kbest_chi2_fs'),
            ClassifiersFactory.get_tuple_for_pipe('XGB')
        ])
    ),  
    (
        make_pipe(steps=[
            SelectorsFactory.get_tuple_for_pipe('variance_thr_fs'),
            SelectorsFactory.get_tuple_for_pipe('kbest_chi2_fs'),
            ClassifiersFactory.get_tuple_for_pipe('XGB')
        ])
    ),  
         (
        make_pipe(steps=[
            SelectorsFactory.get_tuple_for_pipe('variance_thr_fs'),
            SelectorsFactory.get_tuple_for_pipe('kbest_fs'),
            ClassifiersFactory.get_tuple_for_pipe('XGB')
        ])
    ),
]



In [None]:
for trial in training_trails:
    pipe = trial[0]
    hyperparams = trial[1] 
    run_model_test(pipe, hyperparams, datasets=["digits"], random_search_iters=10)