In [36]:
import numpy as np 
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn import datasets

In [106]:
def tune_bb(algo, X, y, 
            regularization="Dropout", M=10, 
            c=None, K=5, criterion="MSE"):

    """function to automatically tune blackbox regression model
    
    Parameters:
    -----------

    algo : callable
        A learning algorithm that takes as input a matrix X in R nxp
        and a vector of responses Y in Rn and returns a function that
        maps inputs to outputs. Must have methods like .fit() and .predict()
    X : array-like of shape (n,p)
        training data X in R nxp
    y : array-like of shape (n,)
        training labels, Y, in Rn
    regularization : str, default="Dropout"
        regularization method, can be any of "Dropout",
        "NoiseAddition", or "Robust"
    M : int, default=
        A positive integer indicating the number of Monte Carlo
        replicates to be used if the method specified is Dropout or 
        NoiseAddition
    c : default=None
        A vector of column bounds to be used if method specified is "Robust"
    K : int, default=5
        A positive integer indicating the number of CV-folds to be used to 
        tune the amount of regularization, e.g., K = 5 indicates five-fold CV
    criterion : str, default="MSE"
        A criterion to be used to evaluate the method that belongs to the set 
        {MSE, MAD} where MSE encodes mean square error and MAD encodes mean
        absolute deviation.

    Returns:
    -----------
    tuned_mode : callable 
        A tuned predictive model that optimizes the specific criterion using 
        the specified method

    Example:
    -----------
    >>> tune_bb()
    >>> 
    """
    
    # statements here to ensure model has the methods we need to tune it
    assert hasattr(algo, "fit"), "model object must have .fit() method"
    assert hasattr(algo, "predict"), "model object must have .predict() method"

    if criterion == "MSE":
        criterion = "neg_mean_squared_error"
    elif criterion == "MAE":
        criterion = "neg_mean_absolute_error"
    else:
        raise ValueError("Please input either MAE or MSE for criterion.")
    
    # ridge tuning parameter, for example
    # expand this later
    parameter_set = np.linspace(0,10,11) # 0 to 10

    cv_score_set = []
    # begin cross validation
    for alpha in parameter_set:
        
        algo.alpha = alpha
        cv = cross_val_score(algo, 
                             X, 
                             y, 
                             scoring=criterion,
                             cv=K)

        # cv returns negative values, need abs()
        cv_score = np.mean(np.absolute(cv))
        cv_score_set.append(cv_score)
    
    minimum_score = cv_score_set.index(np.min(cv_score_set))
    alpha_value = parameter_set[minimum_score]

    # will change return value later
    print("CV Scores : ",cv_score_set, 
          "Alpha value for minimum : ", alpha_value)


In [107]:
# matrices to play around with and feed into function 
# NO train test split here, just full data
X, y = datasets.load_iris(return_X_y=True)

# test here, maybe start testing with Ridge()
tune_bb(Ridge(),X,y, criterion="MAE")

CV Scores :  [0.20580793485737062, 0.21064739059696858, 0.2135713484277618, 0.21544921198881067, 0.216755711162185, 0.21776526175405958, 0.21860265955701436, 0.21925132023297816, 0.21980899618115401, 0.22045671304505232, 0.22101445754354873] Alpha value for minimum :  0.0


In [103]:
 np.linspace(0,10,11)

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.])

### Scratchwork below

In [15]:
# maybe something like this if they all follow the .fit() syntax? 
def model_test(model,X,y):
    return model.fit(X,y)

In [None]:
# this passes the model in ?
model_test(LinearRegression(), X, y).coef_

In [18]:
model_test(Ridge(), X, y).coef_ # niceeeeeeeeee

array([[ 0.67567568, -0.05405405, -0.32432432]])

In [20]:
model_test(Lasso(), X, y).coef_

array([ 0.57894737, -0.        , -0.        ])

### Checking everything on Iris data


In [38]:
X_iris, y_iris = datasets.load_iris(return_X_y=True)
cv = KFold(n_splits=10, random_state=1, shuffle=True)

KFold(n_splits=10, random_state=1, shuffle=True)

### Resources and Notes: 

1. https://www.statology.org/k-fold-cross-validation-in-python/

In [67]:
# use this to view doc string
?tune_bb