In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import sklearn
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold

  from pandas import Int64Index as NumericIndex


In [None]:
def blackbox(algo, X, Y, method = "Dropout", M = 100, K = 5, criteria = "MSE"):
    
    phi_list = np.arange(0, 1, 0.1)
    cv_error = []
    
    for phi in phi_list:
        kf = KFold(n_splits = K, shuffle=True)
        error = []
        for train_index, test_index in kf.split(X):
            X_train, X_test = X[train_index], X[test_index]
            Y_train, Y_test = Y[train_index], Y[test_index]
            
            X_new_ori = np.repeat(X_train, M, axis=0)
            Y_new = np.repeat(Y_train, M, axis=0)

            z = np.random.binomial(1, phi, size=X_new_ori.shape)
            X_new = X_new_ori*z/(1-phi)
            X_new = (X_new - np.mean(X_new, axis=0))/np.std(X_new, axis=0)

            reg = algo(X_new, Y_new)
            pred = reg.predict(X_test)

            if criteria == "MSE":
                error.append(mean_squared_error(Y_test, pred))
            elif criteria == "MAD":
                error.append(mean_absolute_error(Y_test, pred))
            else:
                raise ValueError('Please input either MSE or MAD!')
        
        cv_error.append(np.mean(error))
        
    phi_opt = phi_list[np.argmin(cv_error)]
    

    
    
    
               

    
    

In [None]:
def dropout(algo, X, Y, M = 100, K = 5, criteria = "MSE"):
    
    phi_list = np.arange(0, 1, 0.1)
    cv_error = []
    
    for phi in phi_list:
        kf = KFold(n_splits = K, shuffle=True)
        error = []
        for train_index, test_index in kf.split(X):
            X_train, X_test = X[train_index], X[test_index]
            Y_train, Y_test = Y[train_index], Y[test_index]
            
            X_new_ori = np.repeat(X_train, M, axis=0)
            Y_new = np.repeat(Y_train, M, axis=0)

            z = np.random.binomial(1, phi, size=X_new_ori.shape)
            X_new = X_new_ori*z/(1-phi)
            X_new = (X_new - np.mean(X_new, axis=0))/np.std(X_new, axis=0)

            reg = algo(X_new, Y_new)
            pred = reg.predict(X_test)

            if criteria == "MSE":
                error.append(mean_squared_error(Y_test, pred))
            elif criteria == "MAD":
                error.append(mean_absolute_error(Y_test, pred))
            else:
                raise ValueError('Please input either MSE or MAD!')
        
        cv_error.append(np.mean(error))
        
    phi_opt = phi_list[np.argmin(cv_error)]
    
    X_new_ori = np.repeat(X, M, axis=0)
    Y_new = np.repeat(Y, M, axis=0)

    z = np.random.binomial(1, phi_opt, size=X_new_ori.shape)
    X_new = X_new_ori*z/(1-phi_opt)
    X_new = (X_new - np.mean(X_new, axis=0))/np.std(X_new, axis=0)

    reg = algo(X_new, Y_new)
    
    return reg