In [3]:
import numpy as np 
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn import datasets
import scipy.stats as st

In [22]:
def tune_bb(algo, X, y, 
            regularization="Dropout", M=10, 
            c=None, K=5, criterion="MSE"):

    """function to automatically tune blackbox regression model
    
    Parameters:
    -----------

    algo : callable
        A learning algorithm that takes as input a matrix X in R nxp
        and a vector of responses Y in Rn and returns a function that
        maps inputs to outputs. Must have methods like .fit() and .predict()
    X : array-like of shape (n,p)
        training data X in R nxp
    y : array-like of shape (n,)
        training labels, Y, in Rn
    regularization : str, default="Dropout"
        regularization method, can be any of "Dropout",
        "NoiseAddition", or "Robust"
    M : int, default=
        A positive integer indicating the number of Monte Carlo
        replicates to be used if the method specified is Dropout or 
        NoiseAddition
    c : default=None
        A vector of column bounds to be used if method specified is "Robust"
    K : int, default=5
        A positive integer indicating the number of CV-folds to be used to 
        tune the amount of regularization, e.g., K = 5 indicates five-fold CV
    criterion : str, default="MSE"
        A criterion to be used to evaluate the method that belongs to the set 
        {MSE, MAD} where MSE encodes mean square error and MAD encodes mean
        absolute deviation.

    Returns:
    -----------
    tuned_mode : callable 
        A tuned predictive model that optimizes the specific criterion using 
        the specified method

    Example:
    -----------
    >>> tune_bb()
    >>> 
    """
    
    # statements here to ensure model has the methods we need to tune it
    assert hasattr(algo, "fit"), "model object must have .fit() method"
    assert hasattr(algo, "predict"), "model object must have .predict() method"

    if criterion == "MSE":
        criterion = "neg_mean_squared_error"
    elif criterion == "MAE":
        criterion = "neg_mean_absolute_error"
    else:
        raise ValueError("Please input either MAE or MSE for criterion.")
    
    if regularization == "Dropout":
        # dropout notes: topic 1 p. 30 
        # draw Z matrix
        Z = np.random.binomial(1,0.5,size=X.shape)
        dropout_matrix = Z * X 
    elif regularization == "NoiseAddition":
        print('do something')
    elif regularization == "Robust":
        #Assertion: we don't actually have any reasonable bounds to argue for robust regression here
        #Claim: We could take a sample of many matrix M and then choose 1 based on a criteria to fit our regression
        maxmatrix = None
        maxnorm = -np.inf
        for i in range(10000):
            matrix = np.random.rand(X.shape[0], X.shape[1])
            for m in range(matrix.shape[1]):
                matrix[:,m] = (c[m] / np.linalg.norm(matrix[:,m], 2)) * matrix[:,m]
            fnorm = np.linalg.norm(matrix, 2) #The criteria I'm using here is the two-norm
            if fnorm > maxnorm:
                maxnorm = fnorm
                maxmatrix = matrix
        #print(maxmatrix, maxnorm)
        #print(np.linalg.norm(X, 2))
        X = X + maxmatrix #We add the permuted matrix to our design matrix
        #print(np.linalg.norm(X,2))
    else: 
        raise ValueError('Please input one of of "Dropout", "NoiseAddition", or "Robust"')



In [24]:
# matrices to play around with and feed into function 
# NO train test split here, just full data
X, y = datasets.load_iris(return_X_y=True)

# test here, maybe start testing with Ridge()
tune_bb(Ridge(),
        X,
        y,
        regularization="Robust",
        c = [2,2,1,1.5],
        criterion="MAE")

[[7.12551497e-03 1.13427964e-01 7.39207125e-03 1.43744299e-02]
 [1.75473326e-01 5.31664536e-03 3.23546289e-02 5.57539481e-02]
 [1.89467565e-01 1.94843397e-01 1.25883542e-01 1.97285651e-01]
 [2.00948194e-01 2.15572600e-01 8.63625500e-02 1.24398802e-01]
 [2.50763481e-01 2.20964802e-01 4.76317666e-02 9.13935405e-02]
 [2.32529461e-01 1.52743246e-01 8.04447065e-02 1.37429811e-01]
 [2.55910178e-01 2.37575200e-01 3.29946526e-02 1.94133067e-01]
 [1.57049224e-01 2.22069843e-01 1.32681283e-01 1.01567575e-01]
 [1.01958745e-01 2.41827677e-02 7.89866953e-02 1.77225729e-01]
 [1.19339999e-01 1.27001391e-01 1.12908779e-01 1.23090526e-01]
 [9.49961877e-02 1.66959942e-01 1.05269635e-01 6.62043927e-02]
 [1.05643821e-01 1.08000110e-01 6.27223129e-02 1.93561782e-01]
 [7.32899934e-02 8.00431770e-02 3.36562958e-02 9.24492185e-03]
 [6.55545659e-02 6.32533859e-02 1.94484776e-02 1.98991893e-01]
 [1.81075963e-01 8.56480470e-02 1.29836454e-01 1.83827549e-01]
 [1.55563341e-01 7.06164327e-02 1.31038601e-01 1.060351

### Scratchwork below

In [15]:
# maybe something like this if they all follow the .fit() syntax? 
def model_test(model,X,y):
    return model.fit(X,y)

In [None]:
# this passes the model in ?
model_test(LinearRegression(), X, y).coef_

In [18]:
model_test(Ridge(), X, y).coef_ # niceeeeeeeeee

array([[ 0.67567568, -0.05405405, -0.32432432]])

In [20]:
model_test(Lasso(), X, y).coef_

array([ 0.57894737, -0.        , -0.        ])

In [None]:
# ridge tuning parameter, for example
# expand this later
parameter_set = np.linspace(0,10,11) # 0 to 10

cv_score_set = []
# begin cross validation
for alpha in parameter_set:

    algo.alpha = alpha
    cv = cross_val_score(algo, 
                         X, 
                         y, 
                         scoring=criterion,
                         cv=K)

    # cv returns negative values, need abs()
    cv_score = np.mean(np.absolute(cv))
    cv_score_set.append(cv_score)

minimum_score = cv_score_set.index(np.min(cv_score_set))
alpha_value = parameter_set[minimum_score]

In [16]:
X_iris, y_iris = datasets.load_iris(return_X_y=True)

(150, 4)

### Resources and Notes: 

1. https://www.statology.org/k-fold-cross-validation-in-python/

In [67]:
# use this to view doc string
?tune_bb