In [1]:
from metrics import *
from bootstrap import *
from metric_classes import *

<function condition_number_xtx at 0x7f1c540d6048>


In [2]:
str(Cp())

"Mallow's Cp"

In [3]:
c = Condition_Number()
#c.evaluate(model)

In [27]:
class EvaluateInfo():
    def __init__(self, model, metrics = [], comparisons = [], characteristics = []):
        self.model = model
        self.characteristics = characteristics # characteristic of the fitted model itself
        self.metrics = metrics # compares y and y_pred
        self.comparisons = comparisons #compares y, y_pred, y_pred_with_reduced_features
        
        all_names = [str(el) for el in metrics] +\
                     [str(el) for el in comparisons] +\
                     [str(el) for el in characteristics]
        
        self.names = dict(zip(all_names, [i for i in range(len(all_names))]))
        print(self.names)
        
    def _calcQuality(self):     
        model = self.model
        for (m, mask) in enumerate(self.masks):
            reduced_X_test = self.X_test[:, mask]
            model.fit(self.X_train.T[mask].T, self.y_train)
            for (i, metric) in enumerate(self.metrics):
                self.quality[i][m] = metric.evaluate(model, reduced_X_test, self.y_test)

            for (i, comp) in enumerate(self.comparisons):
                ind = i + len(self.metrics)
                self.quality[ind][m] = comp.evaluate(self.full, model, self.X_test,
                                                                 reduced_X_test, self.y_test)
            for (i, char) in enumerate(self.characteristics):
                ind = i + len(self.metrics) + len(self.comparisons)
                self.quality[ind][m] = char.evaluate(model)
        
    def fit(self, X_train, y_train, X_test, y_test, masks = None, n_samples=20, len_sample=None):
        if masks is None:
            masks = np.ones((1, X.shape[1]), dtype=bool)
        masks = np.array(masks, dtype=bool)
        if len(masks.shape) == 1:
            masks = masks.reshape((1, len(masks.shape)))
        self.masks = masks
        self.len_sample = len_sample
        
        self.n_samples = n_samples

        self.X_train = X_train
        self.y_train = y_train

        self.X_test = X_test
        self.y_test = y_test
        
        self.full = deepcopy(self.model.fit(self.X_train, self.y_train))
        
        self.result = np.zeros((len(self.metrics) + len(self.comparisons) 
                                + len(self.characteristics), len(self.masks), self.n_samples))
        
        self.quality = np.zeros((len(self.metrics) + len(self.comparisons) 
                                + len(self.characteristics), len(self.masks)))
        self._calcQuality()
        
    
    
    def get_quality(self):
        return self.quality
    
    def get_result(self):
        return self.result
    def _get_mapped(self, array):
        return dict(zip(self.names.keys(), list(array)))
    
    def get_mapped_quality(self):
        return self._get_mapped(self.quality)
    
    def get_mapped_result(self):
        return self._get_mapped(self.result)
    
    def _by_name(self, name, array):
        if name in self.names:
            return array[self.names[name]]
        else:
            raise NameError('Invalid argument')
        
    def quality_by_name(self, name):
        return self._by_name(name, self.quality)
            
    def result_by_name(self, name):
        return self._by_name(name, self.result)
            
    

In [28]:
from copy import deepcopy
class EvaluateStaticInfo(EvaluateInfo):
    def __init__(self, model, metrics = [], comparisons = []):
        super(EvaluateStaticInfo, self).__init__(model, metrics, comparisons, [])
    
    def __eval(self):
        model = self.model
        
        sample_X, sample_y = self.boot.values()
        
        self.models = []
        for mask in self.masks:
            self.models += [deepcopy(model.fit(self.X_train[:, mask], self.y_train))]
        
        for (m, mask) in enumerate(self.masks):
            for it in range(self.n_samples):
                reduced_X_cur = (sample_X[it].T[mask]).T
                for (i, metric) in enumerate(self.metrics):
                    self.result[i][m][it] = metric.evaluate(self.models[m], reduced_X_cur, sample_y[it])
                for(i, comp) in enumerate(self.comparisons):
                    ind = i + len(self.metrics)
                    self.result[ind][m][it] = comp.evaluate(self.full, self.models[m], sample_X[it], 
                                                              reduced_X_cur, sample_y[it])

    def fit(self, X_train, y_train, X_test, y_test, masks = None, n_samples=20, len_sample=None):
        super(EvaluateStaticInfo, self).fit(X_train, y_train, X_test, y_test, masks, n_samples, len_sample)
        if self.len_sample is None:
            self.len_sample = len(X_test)
        self.boot = Bootstrap(self.X_test, self.y_test, self.n_samples, self.len_sample)
        self.__eval()
        

In [50]:
from copy import deepcopy
class EvaluateDynamicInfo(EvaluateInfo):
    def __init__(self, model, metrics = [], comparisons = [], characteristics = []):
        super(EvaluateDynamicInfo, self).__init__(model, metrics, comparisons, characteristics)
    
    def __eval(self):
        model = self.model
        
        sample_X, sample_y = self.boot.values()
            
        for (m, mask) in enumerate(self.masks):
            reduced_X_test = (self.X_test.T[mask]).T
            for it in range(self.n_samples):
                model.fit((sample_X[it].T[mask]).T, sample_y[it])
                for (i, metric) in enumerate(self.metrics):
                    #print(i)
                    self.result[i][m][it] = metric.evaluate(model, reduced_X_test, self.y_test)
                
                for (i, comp) in enumerate(self.comparisons):
                    ind = i + len(self.metrics) 
                    #print(i)
                    self.result[ind][m][it] = comp.evaluate(self.full, model, self.X_test,
                                                              reduced_X_test, self.y_test)
                for (i, char) in enumerate(self.characteristics):
                    ind = i + len(self.metrics) + len(self.comparisons)
                    #print(i)
                    self.result[ind][m][it] = char.evaluate(model)

                
                

    def fit(self, X_train, y_train, X_test, y_test, masks = None, n_samples=20, len_sample=None):
        super(EvaluateDynamicInfo, self).fit(X_train, y_train, X_test, y_test, masks, n_samples, len_sample)
        if self.len_sample is None:
            self.len_sample = len(X_test)
        self.boot = Bootstrap(self.X_test, self.y_test, self.n_samples, self.len_sample)
        self.__eval()
    

In [51]:
from sklearn.linear_model import LinearRegression
mo = LinearRegression()

In [52]:
import scipy.stats as sps

XX = sps.norm.rvs(size=(10, 5))
X2 = sps.norm.rvs(size=(10, 5))
zz = sps.uniform.rvs(size=(5, 3))
yy = XX @ zz + sps.norm.rvs(scale=0.1, size=(10, 3))
y2 = X2 @ zz + sps.norm.rvs(scale=0.1, size=(10, 3))
maskk = np.ones(XX.shape[1])
maskk[2]=0
maskk[1] = 0
maskk[3] = 0
print(maskk)

metric1 = RSS()
metric2 = Adjusted_Determination_Coefficient()
metric3 = Determination_Coefficient()
metric4 = VIF()
metric5 = BIC()
char1 = Condition_Number()
comp1 = Cp()

[ 1.  0.  0.  0.  1.]


In [53]:
info = EvaluateDynamicInfo(mo, [metric1, metric2, metric3, metric4, metric5],
                           [comp1])#, [char1])

{'Adj Det Coef': 1, "Mallow's Cp": 5, 'Det Coef': 2, 'RSS': 0, 'VIF': 3, 'BIC': 4}


In [54]:
info.fit(XX, yy, X2, y2, masks=[np.ones(len(XX.T)), maskk])

In [55]:
info.get_result()

array([[[  5.83169687e+00,   3.90827961e+00,   2.40073626e+00,
           5.24101326e+00,   2.84350862e-01,   7.63214112e-01,
           1.63806976e+00,   1.31195148e-01,   4.13042515e-01,
           6.39928177e+00,   3.36361780e+00,   2.57514549e-01,
           2.62735282e+01,   9.68538040e-02,   8.59152430e-01,
           1.65832923e-01,   1.26038410e-01,   4.13430726e+00,
           6.89977831e-01,   2.65776272e-01],
        [  2.13529473e+01,   4.97153127e+01,   1.80775265e+02,
           2.00827438e+01,   2.27988409e+01,   1.79851162e+01,
           2.13808536e+01,   1.70478065e+01,   1.83013894e+01,
           3.38263252e+01,   4.39351852e+01,   1.94861223e+01,
           2.60001972e+01,   1.68744087e+01,   2.97064027e+01,
           1.93756006e+01,   1.80588326e+01,   1.99846142e+01,
           2.29526241e+01,   2.02492978e+01]],

       [[  8.13140885e-01,   8.74770983e-01,   9.23075656e-01,
           8.32067557e-01,   9.90888835e-01,   9.75545109e-01,
           9.47513001e-0

In [56]:
info.get_quality()

array([[  3.24678562,  52.53182276],
       [  0.89596656,  -0.68322362],
       [  0.86624272,  -1.16414465],
       [  9.61229387,   0.59409813],
       [ 14.75971109,  57.13699294],
       [  1.        ,  10.17964007]])

In [57]:
info.result_by_name("Mallow's Cp")

array([[  1.79614473e+00,   1.20373812e+00,   7.39419396e-01,
          1.61421599e+00,   8.75791922e-02,   2.35067602e-01,
          5.04520456e-01,   4.04077028e-02,   1.27215826e-01,
          1.97095913e+00,   1.03598395e+00,   7.93136902e-02,
          8.09216600e+00,   2.98306742e-02,   2.64616310e-01,
          5.10760308e-02,   3.88194433e-02,   1.27335394e+00,
          2.12511053e-01,   8.18582754e-02],
       [  5.76642176e-01,   9.31216362e+00,   4.96782263e+01,
          1.85423410e-01,   1.02197298e+00,  -4.60639459e-01,
          5.85237248e-01,  -7.49327968e-01,  -3.63228267e-01,
          4.41840426e+00,   7.53190209e+00,   1.66583043e-03,
          2.00798090e+00,  -8.02733950e-01,   3.14948080e+00,
         -3.23745366e-02,  -4.37935019e-01,   1.55199784e-01,
          1.06933772e+00,   2.36721544e-01]])

In [58]:
info.get_mapped_result()

{'Adj Det Coef': array([[  5.83169687e+00,   3.90827961e+00,   2.40073626e+00,
           5.24101326e+00,   2.84350862e-01,   7.63214112e-01,
           1.63806976e+00,   1.31195148e-01,   4.13042515e-01,
           6.39928177e+00,   3.36361780e+00,   2.57514549e-01,
           2.62735282e+01,   9.68538040e-02,   8.59152430e-01,
           1.65832923e-01,   1.26038410e-01,   4.13430726e+00,
           6.89977831e-01,   2.65776272e-01],
        [  2.13529473e+01,   4.97153127e+01,   1.80775265e+02,
           2.00827438e+01,   2.27988409e+01,   1.79851162e+01,
           2.13808536e+01,   1.70478065e+01,   1.83013894e+01,
           3.38263252e+01,   4.39351852e+01,   1.94861223e+01,
           2.60001972e+01,   1.68744087e+01,   2.97064027e+01,
           1.93756006e+01,   1.80588326e+01,   1.99846142e+01,
           2.29526241e+01,   2.02492978e+01]]),
 'BIC': array([[  1.79614473e+00,   1.20373812e+00,   7.39419396e-01,
           1.61421599e+00,   8.75791922e-02,   2.35067602e-01,
 