In [1]:
from metrics import *
from bootstrap import *
from metric_classes import *

<function condition_number_xtx at 0x7f1c540d6048>


In [2]:
str(Cp())

"Mallow's Cp"

In [3]:
c = Condition_Number()
#c.evaluate(model)

In [27]:
class EvaluateInfo():
    def __init__(self, model, metrics = [], comparisons = [], characteristics = []):
        self.model = model
        self.characteristics = characteristics # characteristic of the fitted model itself
        self.metrics = metrics # compares y and y_pred
        self.comparisons = comparisons #compares y, y_pred, y_pred_with_reduced_features
        
        all_names = [str(el) for el in metrics] +\
                     [str(el) for el in comparisons] +\
                     [str(el) for el in characteristics]
        
        self.names = dict(zip(all_names, [i for i in range(len(all_names))]))
        print(self.names)
        
    def _calcQuality(self):     
        model = self.model
        for (m, mask) in enumerate(self.masks):
            reduced_X_test = self.X_test[:, mask]
            model.fit(self.X_train.T[mask].T, self.y_train)
            for (i, metric) in enumerate(self.metrics):
                self.quality[i][m] = metric.evaluate(model, reduced_X_test, self.y_test)

            for (i, comp) in enumerate(self.comparisons):
                ind = i + len(self.metrics)
                self.quality[ind][m] = comp.evaluate(self.full, model, self.X_test,
                                                                 reduced_X_test, self.y_test)
            for (i, char) in enumerate(self.characteristics):
                ind = i + len(self.metrics) + len(self.comparisons)
                self.quality[ind][m] = char.evaluate(model)
        
    def fit(self, X_train, y_train, X_test, y_test, masks = None, n_samples=20, len_sample=None):
        if masks is None:
            masks = np.ones((1, X.shape[1]), dtype=bool)
        masks = np.array(masks, dtype=bool)
        if len(masks.shape) == 1:
            masks = masks.reshape((1, len(masks.shape)))
        self.masks = masks
        self.len_sample = len_sample
        
        self.n_samples = n_samples

        self.X_train = X_train
        self.y_train = y_train

        self.X_test = X_test
        self.y_test = y_test
        
        self.full = deepcopy(self.model.fit(self.X_train, self.y_train))
        
        self.result = np.zeros((len(self.metrics) + len(self.comparisons) 
                                + len(self.characteristics), len(self.masks), self.n_samples))
        
        self.quality = np.zeros((len(self.metrics) + len(self.comparisons) 
                                + len(self.characteristics), len(self.masks)))
        self._calcQuality()
        
    
    
    def get_quality(self):
        return self.quality
    
    def get_result(self):
        return self.result
    def _get_mapped(self, array):
        return dict(zip(self.names.keys(), list(array)))
    
    def get_mapped_quality(self):
        return self._get_mapped(self.quality)
    
    def get_mapped_result(self):
        return self._get_mapped(self.result)
    
    def _by_name(self, name, array):
        if name in self.names:
            return array[self.names[name]]
        else:
            raise NameError('Invalid argument')
        
    def quality_by_name(self, name):
        return self._by_name(name, self.quality)
            
    def result_by_name(self, name):
        return self._by_name(name, self.result)
            
    

In [28]:
from copy import deepcopy
class EvaluateStaticInfo(EvaluateInfo):
    def __init__(self, model, metrics = [], comparisons = []):
        super(EvaluateStaticInfo, self).__init__(model, metrics, comparisons, [])
    
    def __eval(self):
        model = self.model
        
        sample_X, sample_y = self.boot.values()
        
        self.models = []
        for mask in self.masks:
            self.models += [deepcopy(model.fit(self.X_train[:, mask], self.y_train))]
        
        for (m, mask) in enumerate(self.masks):
            for it in range(self.n_samples):
                reduced_X_cur = (sample_X[it].T[mask]).T
                for (i, metric) in enumerate(self.metrics):
                    self.result[i][m][it] = metric.evaluate(self.models[m], reduced_X_cur, sample_y[it])
                for(i, comp) in enumerate(self.comparisons):
                    ind = i + len(self.metrics)
                    self.result[ind][m][it] = comp.evaluate(self.full, self.models[m], sample_X[it], 
                                                              reduced_X_cur, sample_y[it])

    def fit(self, X_train, y_train, X_test, y_test, masks = None, n_samples=20, len_sample=None):
        super(EvaluateStaticInfo, self).fit(X_train, y_train, X_test, y_test, masks, n_samples, len_sample)
        if self.len_sample is None:
            self.len_sample = len(X_test)
        self.boot = Bootstrap(self.X_test, self.y_test, self.n_samples, self.len_sample)
        self.__eval()
        

In [41]:
from copy import deepcopy
class EvaluateDynamicInfo(EvaluateInfo):
    def __init__(self, model, metrics = [], comparisons = [], characteristics = []):
        super(EvaluateDynamicInfo, self).__init__(model, metrics, comparisons, characteristics)
    
    def __eval(self):
        model = self.model
        
        sample_X, sample_y = self.boot.values()
            
        for (m, mask) in enumerate(self.masks):
            reduced_X_test = (self.X_test.T[mask]).T
            for it in range(self.n_samples):
                model.fit((sample_X[it].T[mask]).T, sample_y[it])
                for (i, metric) in enumerate(self.metrics):
                    #print(i)
                    self.result[i][m][it] = metric.evaluate(model, reduced_X_test, self.y_test)
                
                for (i, comp) in enumerate(self.comparisons):
                    ind = i + len(self.metrics) 
                    #print(i)
                    self.result[ind][m][it] = comp.evaluate(self.full, model, self.X_test,
                                                              reduced_X_test, self.y_test)
                for (i, char) in enumerate(self.characteristics):
                    ind = i + len(self.metrics) + len(self.comparisons)
                    #print(i)
                    self.result[ind][m][it] = char.evaluate(model)

                
                

    def fit(self, X_train, y_train, X_test, y_test, masks = None, n_samples=20, len_sample=None):
        super(EvaluateDynamicInfo, self).fit(X_train, y_train, X_test, y_test, masks, n_samples, len_sample)
        if self.len_sample is None:
            self.len_sample = len(X_test)
        self.boot = Bootstrap(self.X_test, self.y_test, self.n_samples, self.len_sample)
        self.__eval()
    

In [42]:
from sklearn.linear_model import LinearRegression
mo = LinearRegression()

In [43]:
import scipy.stats as sps

XX = sps.norm.rvs(size=(10, 5))
X2 = sps.norm.rvs(size=(10, 5))
zz = sps.uniform.rvs(size=(5, 3))
yy = XX @ zz + sps.norm.rvs(scale=0.1, size=(10, 3))
y2 = X2 @ zz + sps.norm.rvs(scale=0.1, size=(10, 3))
maskk = np.ones(XX.shape[1])
maskk[2]=0
maskk[1] = 0
maskk[3] = 0
print(maskk)

metric1 = RSS()
metric2 = Adjusted_Determination_Coefficient()
metric3 = Determination_Coefficient()
metric4 = VIF()
metric5 = BIC()
char1 = Condition_Number()
comp1 = Cp()

[ 1.  0.  0.  0.  1.]


In [44]:
info = EvaluateDynamicInfo(mo, [metric1, metric2, metric3, metric4, metric5],
                           [comp1])#, [char1])

{'Adj Det Coef': 1, "Mallow's Cp": 5, 'Det Coef': 2, 'RSS': 0, 'VIF': 3, 'BIC': 4}


In [45]:
info.fit(XX, yy, X2, y2, masks=[np.ones(len(XX.T)), maskk])

In [46]:
info.get_result()

array([[[  2.56151786e-01,   1.87339522e-01,   1.29345312e-01,
           4.74087355e-01,   3.12626804e+00,   1.50161789e+00,
           6.62864679e+01,   1.63049500e-01,   1.17217541e-01,
           9.47947895e-01,   1.02369264e+00,   1.93949082e-01,
           1.05053845e+02,   1.54932808e+01,   1.50932009e+00,
           5.73883204e-01,   7.68882913e-01,   6.94143258e-01,
           7.37548218e-01,   3.58174232e-01],
        [  1.79723933e+01,   1.48991675e+01,   1.48639541e+01,
           1.60443712e+01,   1.47889163e+01,   4.66682332e+01,
           1.04700625e+02,   1.44686337e+01,   1.40718230e+01,
           1.80298138e+01,   3.54543243e+01,   1.57430972e+01,
           1.72745406e+01,   1.22085738e+02,   2.45762260e+01,
           1.43665416e+01,   1.51558977e+01,   1.53588505e+01,
           1.44160628e+01,   1.49140891e+01]],

       [[  9.92819700e-01,   9.94748606e-01,   9.96374267e-01,
           9.86710656e-01,   9.12366251e-01,   9.57907510e-01,
          -8.58104172e-0

In [47]:
info.get_quality()

array([[  0.44157362,  20.4642971 ],
       [  0.98762206,   0.42635659],
       [  0.98408551,   0.26245847],
       [ 80.78889996,   1.74324323],
       [ 11.95449909,  25.06946728],
       [  1.        ,  40.34402042]])

In [48]:
info.result_by_name("Mallow's Cp")

array([[  5.80088510e-01,   4.24254329e-01,   2.92919017e-01,
          1.07363150e+00,   7.07983418e+00,   3.40060593e+00,
          1.50114192e+02,   3.69246465e-01,   2.65454128e-01,
          2.14674936e+00,   2.31828302e+00,   4.39222524e-01,
          2.37907879e+02,   3.50865177e+01,   3.41804854e+00,
          1.29963198e+00,   1.74123378e+00,   1.57197627e+00,
          1.67027235e+00,   8.11131398e-01],
       [  3.47007854e+01,   2.77410721e+01,   2.76613269e+01,
          3.03345324e+01,   2.74913940e+01,   9.96861881e+01,
          2.31107969e+02,   2.67660733e+01,   2.58674445e+01,
          3.48308215e+01,   7.42908559e+01,   2.96522590e+01,
          3.31204084e+01,   2.70478782e+02,   4.96560099e+01,
          2.65348726e+01,   2.83224705e+01,   2.87820830e+01,
          2.66470195e+01,   2.77748640e+01]])

In [49]:
info.get_mapped_result()

{'Adj Det Coef': array([[  2.56151786e-01,   1.87339522e-01,   1.29345312e-01,
           4.74087355e-01,   3.12626804e+00,   1.50161789e+00,
           6.62864679e+01,   1.63049500e-01,   1.17217541e-01,
           9.47947895e-01,   1.02369264e+00,   1.93949082e-01,
           1.05053845e+02,   1.54932808e+01,   1.50932009e+00,
           5.73883204e-01,   7.68882913e-01,   6.94143258e-01,
           7.37548218e-01,   3.58174232e-01],
        [  1.79723933e+01,   1.48991675e+01,   1.48639541e+01,
           1.60443712e+01,   1.47889163e+01,   4.66682332e+01,
           1.04700625e+02,   1.44686337e+01,   1.40718230e+01,
           1.80298138e+01,   3.54543243e+01,   1.57430972e+01,
           1.72745406e+01,   1.22085738e+02,   2.45762260e+01,
           1.43665416e+01,   1.51558977e+01,   1.53588505e+01,
           1.44160628e+01,   1.49140891e+01]]),
 'BIC': array([[  5.80088510e-01,   4.24254329e-01,   2.92919017e-01,
           1.07363150e+00,   7.07983418e+00,   3.40060593e+00,
 