In [1]:
%run "./metrics/metrics"

In [2]:
import numpy as np

class Bootstrap():
    def __init__(self, X, y, n_samples, len_sample):
        self.n_samples = n_samples
        self.len_sample = len_sample
        self.X = np.array(X)
        self.y = np.array(y)
        self.sample_X = np.zeros((n_samples, len_sample, self.X.shape[1]))
        self.sample_y = np.zeros((n_samples, len_sample, self.y.shape[1]))
        for i in range(n_samples):
            idx = np.random.choice(np.arange(len(X)), len_sample, replace=True)
            self.sample_X[i] = self.X[idx]
            self.sample_y[i] = self.y[idx]
        #print('a', *self.sample_X)
        #print('b', *self.sample_y)
    
    def values(self):
        return self.sample_X, self.sample_y
        


In [309]:
X = [[1, 2], [3, 4], [5, 6]]
b = Bootstrap(X, X, 5, 3)


In [636]:
class Model():
    def __init__(self):
        pass
    def fit(self, X, y):
        pass
    def predict(self, X):
        pass
    def predict_params(self, X):
        pass
        

In [3]:
%run "./metrics/metrics"

In [4]:
class FunctionEvaluator():
    def __init__(self, func=None):
        if func == None:
            self.function = lambda a, b: 0
        else:
            self.function = func

In [5]:
class Metric(FunctionEvaluator):
    def evaluate(self, model, X, y):
        z = self.function(y, model.predict(X))
        return z
    

In [6]:
class Characteristic(FunctionEvaluator):
    def evaluate(self, model):
        pass
    

In [7]:
class Comparison(FunctionEvaluator):
    def evaluate(self, full_model, model, full_X, y, mask):
        pass

In [8]:
class RSS(Metric):
    def __init__(self):
        super(RSS, self).__init__(residual_square_sum)

In [9]:
class Adjusted_Determination_Coefficient(Metric):
    def __init__(self):
        super(Adjusted_Determination_Coefficient, self).__init__(determination_coefficient)
        
class Determination_Coefficient(Metric):
    def __init__(self):
        super(Determination_Coefficient, self).__init__(determination_coefficient)
    def evaluate(self, model, X, y):
        z = self.function(y, model.predict(X), False)
        #print("Z", z)
        return z

In [10]:
class VIF(Metric):
    def __init__(self):
        super(VIF, self).__init__(variance_inflation_factor)

In [11]:
class BIC(Metric):
    def __init__(self):
        super(BIC, self).__init__(bayesian_information_criterion)
    def evaluate(self, model, X, y):
        num_features = X.shape[1]
        z = self.function(y, model.predict(X), num_features)
        #print("Z", z)
        return z

In [42]:
class Cp(Comparison):
    def __init__(self):
        super(Cp, self).__init__(mallows_Cp)
    def evaluate(self, full_model, model, full_X, reduced_X, y):
        y_full = full_model.predict(full_X)
        y_p = model.predict(reduced_X)
        p = reduced_X.shape[1]
        z = self.function(y, y_full, y_p, p)
        #print(*mask)
        #print(*full_model.coef_)
        #print(*model.coef_)
        #print("A", y_full, y_p, z)
        return z
        
        

In [43]:
class Condition_Number(Characteristic):
    def __init__(self):
        super(Condition_Number, self).__init__(condition_number_xtx)
    def evaluate(self, model):
        X = model.coef_
        return self.function(X)

In [44]:
c = Condition_Number()
c.evaluate(model)

NameError: name 'model' is not defined

In [45]:
class EvaluateInfo():
    def __init__(self, model, metrics = [], characteristics = [], comparisons = []):
        self.model = model
        self.characteristics = characteristics # characteristic of the fitted model itself
        self.metrics = metrics # compares y and y_pred
        self.comparisons = comparisons #compares y, y_pred, y_pred_with_reduced_features
        #self.mode = mode
    
    def fit(self, X_train, y_train, X_test, y_test, mask = None, n_samples=20, len_sample=None):
        if mask is None:
            mask = np.ones(X.shape[1], dtype=np.bool)
        #print("MASK", mask)
        self.mask = np.array(mask, dtype=bool)
        if len_sample is None:
            len_sample = X_train.shape[1]
        self.len_sample = len_sample
        self.n_samples = n_samples

        self.X_train = X_train
        self.y_train = y_train

        self.X_test = X_test
        self.y_test = y_test
        
        self.full = deepcopy(self.model.fit(self.X_train, self.y_train))

In [77]:
from copy import deepcopy
class EvaluateStaticInfo(EvaluateInfo):
    def __init__(self, model, metrics = [], comparisons = []):
        super(EvaluateStaticInfo, self).__init__(model, metrics, [], comparisons)
    
    def __eval(self, b):
        model = self.model
        
        sample_X, sample_y = b.values()
        result_metrics = np.zeros((len(self.metrics), self.n_samples))
        result_comparisons = np.zeros((len(self.comparisons), self.n_samples))
        
        model.fit((self.X_train.T[self.mask]).T, self.y_train)
        
        for it in range(self.n_samples):
            reduced_X_cur = (sample_X[it].T[self.mask]).T
            for (i, metric) in enumerate(self.metrics):
                result_metrics[i][it] = metric.evaluate(model, reduced_X_cur, sample_y[it])
            for(i, comp) in enumerate(self.comparisons):
                result_comparisons[i][it] = comp.evaluate(self.full, model, sample_X[it], 
                                                          reduced_X_cur, sample_y[it])
                
        return result_metrics, result_comparisons

    def fit(self, X_train, y_train, X_test, y_test, mask = None, n_samples=20, len_sample=None):
        #self.b_train = Bootstrap(self.X_train, self.y_train, self.n_samples, self.len_sample)
        super(EvaluateStaticInfo, self).fit(X_train, y_train, X_test, y_test, mask, n_samples, len_sample)
        self.b = Bootstrap(self.X_test, self.y_test, self.n_samples, self.len_sample)
        return (self.__eval(self.b))
        

In [84]:
from copy import deepcopy
class EvaluateDynamicInfo(EvaluateInfo):
    def __init__(self, model, metrics = [], characteristics = [], comparisons = []):
        super(EvaluateDynamicInfo, self).__init__(model, metrics, characteristics, comparisons)
    
    def __eval(self, b):
        model = self.model
        
        sample_X, sample_y = b.values()
        result_metrics = np.zeros((len(self.metrics), self.n_samples))
        result_characteristics = np.zeros((len(self.characteristics), self.n_samples))
        result_comparisons = np.zeros((len(self.comparisons), self.n_samples))
        
        model.fit((self.X_train.T[self.mask]).T, self.y_train)
        
        for it in range(self.n_samples):
            model.fit((sample_X[it].T[self.mask]).T, sample_y[it])
            for (i, metric) in enumerate(self.metrics):
                result_metrics[i][it] = metric.evaluate(model, self.reduced_X_test, self.y_test)
                
            for (i, char) in enumerate(self.characteristics):
                result_characteristics[i][it] = char.evaluate(model)
                
            for(i, comp) in enumerate(self.comparisons):
                result_comparisons[i][it] = comp.evaluate(self.full, model, self.X_test,
                                                          self.reduced_X_test, self.y_test)
                
        return result_metrics, result_characteristics, result_comparisons

    def fit(self, X_train, y_train, X_test, y_test, mask = None, n_samples=20, len_sample=None):
        #self.b_train = Bootstrap(self.X_train, self.y_train, self.n_samples, self.len_sample)
        super(EvaluateDynamicInfo, self).fit(X_train, y_train, X_test, y_test, mask, n_samples, len_sample)
        self.reduced_X_test = (self.X_test.T[self.mask]).T
        self.b = Bootstrap(self.X_test, self.y_test, self.n_samples, self.len_sample)
        return (self.__eval(self.b))

In [85]:
from sklearn.linear_model import LinearRegression
mo = LinearRegression()

In [86]:
import scipy.stats as sps

XX = sps.norm.rvs(size=(10, 5))
X2 = sps.norm.rvs(size=(10, 5))
zz = sps.uniform.rvs(size=(5, 3))
yy = XX @ zz + sps.norm.rvs(scale=0.1, size=(10, 3))
y2 = X2 @ zz + sps.norm.rvs(scale=0.1, size=(10, 3))
maskk = np.ones(XX.shape[1])
maskk[2]=0
maskk[1] = 0
maskk[3] = 0
print(maskk)

[ 1.  0.  0.  0.  1.]


In [87]:
metric1 = RSS()
metric2 = Adjusted_Determination_Coefficient()
metric3 = Determination_Coefficient()
metric4 = VIF()
metric5 = BIC()
char1 = Condition_Number()
comp1 = Cp()

In [88]:
info = EvaluateDynamicInfo(model=mo, metrics = [metric1, metric2, metric3, metric4, metric5], characteristics=[char1],
                           comparisons=[comp1])

In [89]:
info.fit(XX, yy, X2, y2, mask=maskk)

(array([[  1.88912046e+02,   1.19515280e+02,   1.74089869e+01,
           1.02188939e+02,   1.94580945e+02,   3.31004416e+01,
           1.23173146e+01,   1.75225123e+01,   1.37729678e+02,
           4.42695688e+01,   5.88742332e+01,   1.70182971e+01,
           2.77492672e+02,   1.54155709e+01,   1.84514062e+01,
           3.25685432e+01,   1.51212134e+01,   1.14674478e+02,
           1.22826028e+01,   9.83819404e+01],
        [ -2.63650982e+00,  -1.30063937e+00,   6.64881339e-01,
          -9.67111623e-01,  -2.74563472e+00,   3.62824746e-01,
           7.62894763e-01,   6.62696003e-01,  -1.65126200e+00,
           1.47821828e-01,  -1.33314323e-01,   6.72402020e-01,
          -4.34166480e+00,   7.03254100e-01,   6.44815028e-01,
           3.73063656e-01,   7.08920409e-01,  -1.20745514e+00,
           7.63562956e-01,  -8.93827844e-01],
        [ -3.67551263e+00,  -1.95796490e+00,   5.69133151e-01,
          -1.52914352e+00,  -3.81581607e+00,   1.80774673e-01,
           6.95150410e-01,