In [1]:
%run "./metrics/metrics"

In [2]:
import numpy as np

class Bootstrap():
    def __init__(self, X, y, n_samples, len_sample):
        self.n_samples = n_samples
        self.len_sample = len_sample
        self.X = np.array(X)
        self.y = np.array(y)
        self.sample_X = np.zeros((n_samples, len_sample, self.X.shape[1]))
        self.sample_y = np.zeros((n_samples, len_sample, self.y.shape[1]))
        for i in range(n_samples):
            idx = np.random.choice(np.arange(len(X)), len_sample, replace=True)
            self.sample_X[i] = self.X[idx]
            self.sample_y[i] = self.y[idx]
        #print('a', *self.sample_X)
        #print('b', *self.sample_y)
    
    def values(self):
        return self.sample_X, self.sample_y
        


In [3]:
X = [[1, 2], [3, 4], [5, 6]]
b = Bootstrap(X, X, 5, 3)


In [4]:
class Model():
    def __init__(self):
        pass
    def fit(self, X, y):
        pass
    def predict(self, X):
        pass
    def predict_params(self, X):
        pass
        

In [5]:
%run "./metrics/metrics"

In [6]:
class FunctionEvaluator():
    def __init__(self, func=None):
        if func == None:
            self.function = lambda a, b: 0
        else:
            self.function = func

In [7]:
class Metric(FunctionEvaluator):
    def evaluate(self, model, X, y):
        z = self.function(y, model.predict(X))
        return z
    

In [8]:
class Characteristic(FunctionEvaluator):
    def evaluate(self, model):
        pass
    

In [9]:
class Comparison(FunctionEvaluator):
    def evaluate(self, full_model, model, full_X, y, mask):
        pass

In [10]:
class RSS(Metric):
    def __init__(self):
        super(RSS, self).__init__(residual_square_sum)

In [11]:
class Adjusted_Determination_Coefficient(Metric):
    def __init__(self):
        super(Adjusted_Determination_Coefficient, self).__init__(determination_coefficient)
        
class Determination_Coefficient(Metric):
    def __init__(self):
        super(Determination_Coefficient, self).__init__(determination_coefficient)
    def evaluate(self, model, X, y):
        z = self.function(y, model.predict(X), False)
        #print("Z", z)
        return z

In [12]:
class VIF(Metric):
    def __init__(self):
        super(VIF, self).__init__(variance_inflation_factor)

In [13]:
class BIC(Metric):
    def __init__(self):
        super(BIC, self).__init__(bayesian_information_criterion)
    def evaluate(self, model, X, y):
        num_features = X.shape[1]
        z = self.function(y, model.predict(X), num_features)
        #print("Z", z)
        return z

In [14]:
class Cp(Comparison):
    def __init__(self):
        super(Cp, self).__init__(mallows_Cp)
    def evaluate(self, full_model, model, full_X, reduced_X, y):
        y_full = full_model.predict(full_X)
        y_p = model.predict(reduced_X)
        p = reduced_X.shape[1]
        z = self.function(y, y_full, y_p, p)
        #print(*mask)
        #print(*full_model.coef_)
        #print(*model.coef_)
        #print("A", y_full, y_p, z)
        return z
        
        

In [15]:
class Condition_Number(Characteristic):
    def __init__(self):
        super(Condition_Number, self).__init__(condition_number_xtx)
    def evaluate(self, model):
        X = model.coef_
        return self.function(X)

In [16]:
c = Condition_Number()
c.evaluate(model)

NameError: name 'model' is not defined

In [88]:
class EvaluateInfo():
    def __init__(self, model, metrics = [], comparisons = [], characteristics = []):
        self.model = model
        self.characteristics = characteristics # characteristic of the fitted model itself
        self.metrics = metrics # compares y and y_pred
        self.comparisons = comparisons #compares y, y_pred, y_pred_with_reduced_features
        
    def fit(self, X_train, y_train, X_test, y_test, masks = None, n_samples=20, len_sample=None):
        if masks is None:
            masks = np.ones((1, X.shape[1]), dtype=bool)
        masks = np.array(masks, dtype=bool)
        if len(masks.shape) == 1:
            masks = masks.reshape((1, len(masks.shape)))
        self.masks = masks
        
        if len_sample is None:
            len_sample = X_train.shape[1]
        self.len_sample = len_sample
        
        self.n_samples = n_samples

        self.X_train = X_train
        self.y_train = y_train

        self.X_test = X_test
        self.y_test = y_test
        
        self.full = deepcopy(self.model.fit(self.X_train, self.y_train))
        
        self.result_metrics = np.zeros((len(self.metrics), len(self.masks), self.n_samples))
        self.result_comparisons = np.zeros((len(self.comparisons), len(self.masks), self.n_samples))
        
    def returnMetrics(self):
        return self.result_metrics
    def returnComparisons(self):
        return self.result_comparisons
    

In [89]:
from copy import deepcopy
class EvaluateStaticInfo(EvaluateInfo):
    def __init__(self, model, metrics = [], comparisons = []):
        super(EvaluateStaticInfo, self).__init__(model, metrics, comparisons, [])
    
    def __eval(self):
        model = self.model
        
        sample_X, sample_y = self.boot.values()
        
        self.models = []
        for mask in self.masks:
            self.models += [deepcopy(model.fit((self.X_train.T[mask]).T, self.y_train))]
        
        for (m, mask) in enumerate(self.masks):
            for it in range(self.n_samples):
                reduced_X_cur = (sample_X[it].T[mask]).T
                for (i, metric) in enumerate(self.metrics):
                    self.result_metrics[i][m][it] = metric.evaluate(self.models[m], reduced_X_cur, sample_y[it])
                for(i, comp) in enumerate(self.comparisons):
                    self.result_comparisons[i][m][it] = comp.evaluate(self.full, self.models[m], sample_X[it], 
                                                              reduced_X_cur, sample_y[it])
                
        return result_metrics, result_comparisons

    def fit(self, X_train, y_train, X_test, y_test, masks = None, n_samples=20, len_sample=None):
        super(EvaluateStaticInfo, self).fit(X_train, y_train, X_test, y_test, masks, n_samples, len_sample)
        self.boot = Bootstrap(self.X_test, self.y_test, self.n_samples, self.len_sample)
        self.__eval()
        

In [90]:
from copy import deepcopy
class EvaluateDynamicInfo(EvaluateInfo):
    def __init__(self, model, metrics = [], comparisons = [], characteristics = []):
        super(EvaluateDynamicInfo, self).__init__(model, metrics, comparisons, characteristics)
    
    def __eval(self):
        model = self.model
        
        sample_X, sample_y = self.boot.values()
        
        self.result_characteristics = np.zeros((len(self.characteristics), len(self.masks), self.n_samples))
            
        for (m, mask) in enumerate(self.masks):
            reduced_X_test = (self.X_test.T[mask]).T
            for it in range(self.n_samples):
                model.fit((sample_X[it].T[mask]).T, sample_y[it])
                for (i, metric) in enumerate(self.metrics):
                    self.result_metrics[i][m][it] = metric.evaluate(model, reduced_X_test, self.y_test)

                for (i, char) in enumerate(self.characteristics):
                    self.result_characteristics[i][m][it] = char.evaluate(model)

                for (i, comp) in enumerate(self.comparisons):
                    self.result_comparisons[i][m][it] = comp.evaluate(self.full, model, self.X_test,
                                                              reduced_X_test, self.y_test)
                

    def fit(self, X_train, y_train, X_test, y_test, masks = None, n_samples=20, len_sample=None):
        super(EvaluateDynamicInfo, self).fit(X_train, y_train, X_test, y_test, masks, n_samples, len_sample)
        self.boot = Bootstrap(self.X_test, self.y_test, self.n_samples, self.len_sample)
        self.__eval()
    
    def returnCharacteristics(self):
        return self.result_characteristics

In [91]:
from sklearn.linear_model import LinearRegression
mo = LinearRegression()

In [92]:
import scipy.stats as sps

XX = sps.norm.rvs(size=(10, 5))
X2 = sps.norm.rvs(size=(10, 5))
zz = sps.uniform.rvs(size=(5, 3))
yy = XX @ zz + sps.norm.rvs(scale=0.1, size=(10, 3))
y2 = X2 @ zz + sps.norm.rvs(scale=0.1, size=(10, 3))
maskk = np.ones(XX.shape[1])
maskk[2]=0
maskk[1] = 0
maskk[3] = 0
print(maskk)

[ 1.  0.  0.  0.  1.]


In [93]:
metric1 = RSS()
metric2 = Adjusted_Determination_Coefficient()
metric3 = Determination_Coefficient()
metric4 = VIF()
metric5 = BIC()
char1 = Condition_Number()
comp1 = Cp()

In [94]:
info = EvaluateDynamicInfo(mo, [metric1, metric2, metric3, metric4, metric5],
                           [comp1], [char1])

In [95]:
info.fit(XX, yy, X2, y2, masks=[maskk])

In [96]:
info.returnComparisons()

array([[[  1.99595725e+01,   1.76834590e+03,   3.59855593e+01,
           3.16836763e+02,   5.91651541e+01,   5.20435897e+03,
           8.14823974e+04,   1.69382504e+01,   3.94164590e+01,
           5.81383293e+03,   1.47671362e+04,   3.98990714e+01,
           2.90024117e+02,   2.01017844e+04,   2.02453433e+01,
           1.80025596e+01,   2.21682228e+01,   3.21612057e+01,
           2.06164804e+02,   2.25273143e+01]]])