## Avoid running repeated experiments with remayn

Sometimes, some experiments may fail for some reason and we must run our code again. To make sure that the experiments that already have results are not run again, we can use remayn.

### 1. Running some experiments with GridSearchCV and saving the results

A Logistic Regression model and a Ridge Classifier are trained using a GridSearch cross-validation procedure. Then, the results are saved including the best parameters found.

In [None]:
from remayn.result import make_result
from remayn.result_set import ResultFolder
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
import time

# Clean up the results folder if exists
from shutil import rmtree
rmtree('./results', ignore_errors=True)

def run_experiments():
    # Load the results folder to check if the experiments exist
    rf = ResultFolder('./results')

    # Repeat the experiment 10 times with different random seeds
    for seed in range(10):
        for model, param_grid in [(LogisticRegression, {'C': [0.1, 1, 10], 'max_iter': [50, 100, 150]}),
                                (RidgeClassifier, {'alpha': [0.1, 1, 10], 'max_iter': [50, 100, 150]})]:
            
            # First, create the estimator
            gs = GridSearchCV(model(), param_grid=param_grid, cv=5)

            # Then, construct the experiment config that we are going to look for.
            estimator_config = gs.get_params()
            # Remove the 'estimator' key from the config, as it is not serializable
            estimator_config.pop('estimator')

            experiment_config = {
                "estimator_config": estimator_config,
                "estimator_name": model.__name__,
                "seed": seed,
            }

            # Now, we can simply check if our results folder contains the experiment
            # Note that the 'in' operator will look for a experiments whose config exactly matches
            # the experiment_config. If you want to check for a subset of the config, you can use
            # the filter function.
            if experiment_config in rf:
                continue

            print(f"Running {experiment_config}")

            # Generate a sample dataset
            X, y = make_classification(n_samples=1000, n_features=20, n_informative=2, n_redundant=2, n_clusters_per_class=2, random_state=0)

            # Split the dataset into training and test sets
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

            # Train the model
            start_time = time.time()
            gs.fit(X_train, y_train)
            train_time = time.time() - start_time

            # Make predictions
            y_train_pred = gs.predict(X_train)
            y_test_pred = gs.predict(X_test)

            # Save the results of the experiment
            make_result(
                base_path='./results',
                config=experiment_config,
                targets=y_test,
                predictions=y_test_pred,
                train_targets=y_train,
                train_predictions=y_train_pred,
                time=train_time,

                # Save the best hyperparameters and the best model
                best_params=gs.best_params_,
                best_model=gs.best_estimator_
            ).save()

run_experiments()

Running {'estimator_config': {'cv': 5, 'error_score': nan, 'estimator__C': 1.0, 'estimator__class_weight': None, 'estimator__dual': False, 'estimator__fit_intercept': True, 'estimator__intercept_scaling': 1, 'estimator__l1_ratio': None, 'estimator__max_iter': 100, 'estimator__multi_class': 'auto', 'estimator__n_jobs': None, 'estimator__penalty': 'l2', 'estimator__random_state': None, 'estimator__solver': 'lbfgs', 'estimator__tol': 0.0001, 'estimator__verbose': 0, 'estimator__warm_start': False, 'n_jobs': None, 'param_grid': {'C': [0.1, 1, 10], 'max_iter': [50, 100, 150]}, 'pre_dispatch': '2*n_jobs', 'refit': True, 'return_train_score': False, 'scoring': None, 'verbose': 0}, 'estimator_name': 'LogisticRegression', 'seed': 0}
Running {'estimator_config': {'cv': 5, 'error_score': nan, 'estimator__alpha': 1.0, 'estimator__class_weight': None, 'estimator__copy_X': True, 'estimator__fit_intercept': True, 'estimator__max_iter': None, 'estimator__positive': False, 'estimator__random_state': No

### 2. Run the experiments again

In this case, we are going to check if the experiment config exists in our result folder before executing it.

In [10]:
# It should not run any experiments because they are already done
run_experiments()

Now, if we remove some experiment (the first one, for example), it will be run again.

In [14]:
rf = ResultFolder('./results')
first_result = list(rf)[0]
first_result.delete()

run_experiments()

Running {'estimator_config': {'cv': 5, 'error_score': nan, 'estimator__alpha': 1.0, 'estimator__class_weight': None, 'estimator__copy_X': True, 'estimator__fit_intercept': True, 'estimator__max_iter': None, 'estimator__positive': False, 'estimator__random_state': None, 'estimator__solver': 'auto', 'estimator__tol': 0.0001, 'n_jobs': None, 'param_grid': {'alpha': [0.1, 1, 10], 'max_iter': [50, 100, 150]}, 'pre_dispatch': '2*n_jobs', 'refit': True, 'return_train_score': False, 'scoring': None, 'verbose': 0}, 'estimator_name': 'RidgeClassifier', 'seed': 6}
