In [2]:
%run "./metrics/metrics"

In [3]:
import numpy as np

class Bootstrap():
    def __init__(self, X, y, n_samples, len_sample):
        self.n_samples = n_samples
        self.len_sample = len_sample
        self.X = np.array(X)
        self.y = np.array(y)
        self.sample_X = np.zeros((n_samples, len_sample, self.X.shape[1]))
        self.sample_y = np.zeros((n_samples, len_sample, self.y.shape[1]))
        for i in range(n_samples):
            idx = np.random.choice(np.arange(len(X)), len_sample, replace=True)
            self.sample_X[i] = self.X[idx]
            self.sample_y[i] = self.y[idx]

    def values(self):
        return self.sample_X, self.sample_y
        


In [4]:
X = [[1, 2], [3, 4], [5, 6]]
b = Bootstrap(X, X, 5, 3)


In [185]:
class Model():
    def __init__(self):
        pass
    def fit(self, X, y):
        pass
    def predict(self, X):
        pass
    def predict_params(self, X):
        pass
        

In [186]:
%run "./metrics/metrics"

In [187]:
class FunctionEvaluator():
    def __init__(self, func=None):
        if func == None:
            self.function = lambda a, b: 0
        else:
            self.function = func

In [204]:
class Metric(FunctionEvaluator):
    def evaluate(self, model, X, y):
        z = self.function(y, model.predict(X))
        #print(y, model.predict(X))
        #print(z)
        return z
    

In [205]:
class Characteristic(FunctionEvaluator):
    def evaluate(self, model):
        pass
    

In [206]:
class Comparison(FunctionEvaluator):
    def evaluate(self, full_model, model, full_X, y, mask):
        pass

In [207]:
class RSS(Metric):
    def __init__(self):
        super(RSS, self).__init__(residual_square_sum)

In [208]:
class Adjusted_Determination_Coefficient(Metric):
    def __init__(self):
        super(Adjusted_Determination_Coefficient, self).__init__(determination_coefficient)
        
class Determination_Coefficient(Metric):
    def __init__(self):
        super(Determination_Coefficient, self).__init__(determination_coefficient)
    def evaluate(self, model, X, y):
        z = self.function(y, model.predict(X), False)
        #print("Z", z)
        return z

In [209]:
class VIF(Metric):
    def __init__(self):
        super(VIF, self).__init__(variance_inflation_factor)

In [210]:
class BIC(Metric):
    def __init__(self):
        super(BIC, self).__init__(bayesian_information_criterion)
    def evaluate(self, model, X, y):
        num_features = X.shape[1]
        z = self.function(y, model.predict(X), num_features)
        #print("Z", z)
        return z

In [211]:
class Cp(Comparison):
    def __init__(self):
        super(Cp, self).__init__(mallows_Cp)
    def evaluate(self, full_model, model, full_X, reduced_X, y):
        y_full = full_model.predict(full_X)
        y_p = model.predict(reduced_X)
        p = reduced_X.shape[1]
        z = self.function(y, y_full, y_p, p)
        #print(*mask)
        #print(*full_model.coef_)
        #print(*model.coef_)
        #print("A", y_full, y_p, z)
        return z
        
        

In [212]:
class Condition_Number(Characteristic):
    def __init__(self):
        super(Condition_Number, self).__init__(condition_number_xtx)
    def evaluate(self, model):
        X = model.coef_
        return self.function(X)

In [213]:
c = Condition_Number()
c.evaluate(model)

NameError: name 'model' is not defined

In [247]:
class EvaluateInfo():
    def __init__(self, model, metrics = [], comparisons = [], characteristics = []):
        self.model = model
        self.characteristics = characteristics # characteristic of the fitted model itself
        self.metrics = metrics # compares y and y_pred
        self.comparisons = comparisons #compares y, y_pred, y_pred_with_reduced_features
        
    def _initResult(self):
        self.result_metrics = np.zeros((len(self.metrics), len(self.masks), self.n_samples))
        self.result_comparisons = np.zeros((len(self.comparisons), len(self.masks), self.n_samples))
        self.result_characteristics = np.zeros((len(self.characteristics), len(self.masks), self.n_samples))
    
    def _calcQuality(self):     
        self.quality_metrics = np.zeros((len(self.metrics), len(self.masks)))
        self.quality_comparisons = np.zeros((len(self.comparisons), len(self.masks)))
        self.quality_characteristics = np.zeros((len(self.characteristics), len(self.masks)))
        model = self.model
        for (m, mask) in enumerate(self.masks):
            print(mask)
            reduced_X_test = self.X_test[:, mask]
            print(reduced_X_test.shape)
            model.fit(self.X_train.T[mask].T, self.y_train)
            for (i, metric) in enumerate(self.metrics):
                print("E", str(metric))
                self.quality_metrics[i][m] = metric.evaluate(model, reduced_X_test, self.y_test)

            for (i, char) in enumerate(self.characteristics):
                self.quality_characteristics[i][m] = char.evaluate(model)

            for (i, comp) in enumerate(self.comparisons):
                self.quality_comparisons[i][m] = comp.evaluate(self.full, model, self.X_test,
                                                          reduced_X_test, self.y_test)
        
        
    def fit(self, X_train, y_train, X_test, y_test, masks = None, n_samples=20, len_sample=None):
        if masks is None:
            masks = np.ones((1, X.shape[1]), dtype=bool)
        masks = np.array(masks, dtype=bool)
        if len(masks.shape) == 1:
            masks = masks.reshape((1, len(masks.shape)))
        self.masks = masks
        
        if len_sample is None:
            len_sample = X_train.shape[1]
        self.len_sample = len_sample
        
        self.n_samples = n_samples

        self.X_train = X_train
        self.y_train = y_train

        self.X_test = X_test
        self.y_test = y_test
        
        self.full = deepcopy(self.model.fit(self.X_train, self.y_train))
        
        self._initResult()
        self._calcQuality()
    
    def qualityMetrics(self):
        return self.quality_metrics
    
    def qualityComparisons(self):
        return self.quality_comparisons
    
    def qualityCharacteritics(self):
        return self.quality_characteristics
        
    def resultMetrics(self):
        return self.result_metrics
    
    def resultComparisons(self):
        return self.result_comparisons

    def resultCharacteristics(self):
        return self.result_comparisons
    

In [248]:
from copy import deepcopy
class EvaluateStaticInfo(EvaluateInfo):
    def __init__(self, model, metrics = [], comparisons = []):
        super(EvaluateStaticInfo, self).__init__(model, metrics, comparisons, [])
    
    def __eval(self):
        model = self.model
        
        sample_X, sample_y = self.boot.values()
        
        self.models = []
        for mask in self.masks:
            self.models += [deepcopy(model.fit(self.X_train[:, mask], self.y_train))]
        
        for (m, mask) in enumerate(self.masks):
            for it in range(self.n_samples):
                reduced_X_cur = (sample_X[it].T[mask]).T
                for (i, metric) in enumerate(self.metrics):
                    self.result_metrics[i][m][it] = metric.evaluate(self.models[m], reduced_X_cur, sample_y[it])
                for(i, comp) in enumerate(self.comparisons):
                    self.result_comparisons[i][m][it] = comp.evaluate(self.full, self.models[m], sample_X[it], 
                                                              reduced_X_cur, sample_y[it])
                
        return result_metrics, result_comparisons

    def fit(self, X_train, y_train, X_test, y_test, masks = None, n_samples=20, len_sample=None):
        super(EvaluateStaticInfo, self).fit(X_train, y_train, X_test, y_test, masks, n_samples, len_sample)
        self.boot = Bootstrap(self.X_test, self.y_test, self.n_samples, self.len_sample)
        self.__eval()
        

In [249]:
from copy import deepcopy
class EvaluateDynamicInfo(EvaluateInfo):
    def __init__(self, model, metrics = [], comparisons = [], characteristics = []):
        super(EvaluateDynamicInfo, self).__init__(model, metrics, comparisons, characteristics)
    
    def __eval(self):
        model = self.model
        
        sample_X, sample_y = self.boot.values()
            
        for (m, mask) in enumerate(self.masks):
            reduced_X_test = (self.X_test.T[mask]).T
            for it in range(self.n_samples):
                model.fit((sample_X[it].T[mask]).T, sample_y[it])
                for (i, metric) in enumerate(self.metrics):
                    self.result_metrics[i][m][it] = metric.evaluate(model, reduced_X_test, self.y_test)

                for (i, char) in enumerate(self.characteristics):
                    self.result_characteristics[i][m][it] = char.evaluate(model)

                for (i, comp) in enumerate(self.comparisons):
                    self.result_comparisons[i][m][it] = comp.evaluate(self.full, model, self.X_test,
                                                              reduced_X_test, self.y_test)
                

    def fit(self, X_train, y_train, X_test, y_test, masks = None, n_samples=20, len_sample=None):
        super(EvaluateDynamicInfo, self).fit(X_train, y_train, X_test, y_test, masks, n_samples, len_sample)
        self.boot = Bootstrap(self.X_test, self.y_test, self.n_samples, self.len_sample)
        self.__eval()
    

In [250]:
from sklearn.linear_model import LinearRegression
mo = LinearRegression()

In [251]:
import scipy.stats as sps

XX = sps.norm.rvs(size=(10, 5))
X2 = sps.norm.rvs(size=(10, 5))
zz = sps.uniform.rvs(size=(5, 3))
yy = XX @ zz + sps.norm.rvs(scale=0.1, size=(10, 3))
y2 = X2 @ zz + sps.norm.rvs(scale=0.1, size=(10, 3))
maskk = np.ones(XX.shape[1])
maskk[2]=0
maskk[1] = 0
maskk[3] = 0
print(maskk)

[ 1.  0.  0.  0.  1.]


In [252]:
metric1 = RSS()
metric2 = Adjusted_Determination_Coefficient()
metric3 = Determination_Coefficient()
metric4 = VIF()
metric5 = BIC()
char1 = Condition_Number()
comp1 = Cp()

In [253]:
info = EvaluateDynamicInfo(mo, [metric1, metric2, metric3, metric4, metric5],
                           [comp1], [char1])

In [254]:
info.fit(XX, yy, X2, y2, masks=[np.ones(len(XX.T)), maskk])

[ True  True  True  True  True]
(10, 5)
E <__main__.RSS object at 0x7f39a5c12898>
[[ 0.61043289  0.42940392  0.30208243]
 [-0.51260997 -1.05712176 -0.19615899]
 [-1.06669633 -0.51003652 -0.73184268]
 [ 0.24999018 -0.20660187  0.90761798]
 [-2.31407292 -2.72069739 -1.65572524]
 [-0.72806366 -1.00912611  0.19320815]
 [-1.9850471  -1.64866793 -1.34278288]
 [ 0.41849935 -0.52536574  0.31202744]
 [-0.05716382  0.19365284  0.11948029]
 [ 0.10978794  0.29124653 -0.42642579]] [[ 0.53944451  0.47704835  0.29275162]
 [-0.4331041  -1.18479855 -0.09837907]
 [-1.29367093 -0.59439851 -0.95479911]
 [ 0.48991548 -0.35653476  0.74962713]
 [-1.99610116 -2.58627124 -1.72228596]
 [-0.46770367 -1.10796795  0.08437397]
 [-1.88149821 -1.65809712 -1.28923566]
 [ 0.45451316 -0.68221561  0.21394022]
 [ 0.19450988  0.27403349  0.39666803]
 [ 0.25607192  0.3977288  -0.39190902]]
0.695698837322
E <__main__.Adjusted_Determination_Coefficient object at 0x7f39a5c12a20>
[[ 0.61043289  0.42940392  0.30208243]
 [-0.5126

In [255]:
info.resultComparisons()

array([[[  28.28827876,    2.44252138,   15.6082432 ,   26.34142527,
            1.14826606,   37.49813849,    7.771687  ,    3.15307612,
           39.66553646,    1.17931302,   19.58739932,   24.38826303,
           24.81187679,    8.95238011,   23.43012211,   65.24380347,
           15.41600477,  145.16310535,   25.30422979,   25.05471643],
        [  22.56001391,   48.65172046,   24.40923774,   14.68192206,
           18.92194688,   30.66344822,   49.68942155,   23.8948502 ,
          705.9606675 ,   39.42464372,   28.34470398,   20.39648395,
           45.60116789,   62.28217161,  178.04552246,   41.80142262,
           71.0938821 ,   27.73607313,   15.30179027,   31.34350093]]])

In [256]:
info.resultMetrics()


array([[[  1.96801226e+01,   1.69925929e+00,   1.08586366e+01,
           1.83256989e+01,   7.98847362e-01,   2.60874113e+01,
           5.40675361e+00,   2.19359139e+00,   2.75952676e+01,
           8.20446700e-01,   1.36269309e+01,   1.69668862e+01,
           1.72615938e+01,   6.22816043e+00,   1.63003087e+01,
           4.53900382e+01,   1.07248966e+01,   1.00989804e+02,
           1.76041232e+01,   1.74305371e+01],
        [  1.98691685e+01,   3.80211384e+01,   2.11556713e+01,
           1.43883891e+01,   1.73381695e+01,   2.55067183e+01,
           3.87430658e+01,   2.07978125e+01,   4.95310209e+02,
           3.16018718e+01,   2.38935706e+01,   1.83640032e+01,
           3.58988725e+01,   4.75038274e+01,   1.28040256e+02,
           3.32553941e+01,   5.36341241e+01,   2.34701469e+01,
           1.48196307e+01,   2.59798302e+01]],

       [[  3.69946203e-01,   9.45598674e-01,   6.52363688e-01,
           4.13307711e-01,   9.74425118e-01,   1.64818590e-01,
           8.26904247e-0

In [257]:
info.qualityMetrics()

array([[  0.69569884,  28.83147153],
       [  0.97772739,   0.07696825],
       [  0.97136379,  -0.18675511],
       [ 44.89819581,   1.08338635],
       [ 12.2086243 ,  33.43664172]])

In [258]:
info.qualityCharacteritics()

array([[ 72.27478848,   6.1855962 ]])

In [259]:
info.qualityComparisons()

array([[  1.        ,  35.44246043]])