In [1]:
from metrics import *
from bootstrap import *
from metric_classes import *

<function condition_number_xtx at 0x7fb0600d9048>


In [2]:
str(Cp())

"Mallow's Cp"

In [3]:
c = Condition_Number()
#c.evaluate(model)

In [134]:
class EvaluateInfo():
    def __init__(self, model, metrics = [], comparisons = [], characteristics = []):
        self.model = model
        self.characteristics = characteristics # characteristic of the fitted model itself
        self.metrics = metrics # compares y and y_pred
        self.comparisons = comparisons #compares y, y_pred, y_pred_with_reduced_features
        
        all_names = [str(el) for el in metrics] +\
                     [str(el) for el in comparisons] +\
                     [str(el) for el in characteristics]
        
        self.names = dict(zip(all_names, [i for i in range(len(all_names))]))
        print(self.names)
        
    def _calcQuality(self):     
        model = self.model
        for (m, mask) in enumerate(self.masks):
            reduced_X_test = self.X_test[:, mask]
            model.fit(self.X_train.T[mask].T, self.y_train)
            for (i, metric) in enumerate(self.metrics):
                self.quality[i][m] = metric.evaluate(model, reduced_X_test, self.y_test)

            for (i, comp) in enumerate(self.comparisons):
                ind = i + len(self.metrics)
                self.quality[ind][m] = comp.evaluate(self.full, model, self.X_test,
                                                                 reduced_X_test, self.y_test)
            for (i, char) in enumerate(self.characteristics):
                ind = i + len(self.metrics) + len(self.comparisons)
                self.quality[ind][m] = char.evaluate(model)
        
    def fit(self, X_train, y_train, X_test, y_test, masks = None, n_samples=20, len_sample=None):
        if masks is None:
            masks = np.ones((1, X.shape[1]), dtype=bool)
        masks = np.array(masks, dtype=bool)
        if len(masks.shape) == 1:
            masks = masks.reshape((1, len(masks.shape)))
        self.masks = masks
        
        if len_sample is None:
            len_sample = X_train.shape[1]
        self.len_sample = len_sample
        
        self.n_samples = n_samples

        self.X_train = X_train
        self.y_train = y_train

        self.X_test = X_test
        self.y_test = y_test
        
        self.full = deepcopy(self.model.fit(self.X_train, self.y_train))
        
        self.result = np.zeros((len(self.metrics) + len(self.comparisons) 
                                + len(self.characteristics), len(self.masks), self.n_samples))
        
        self.quality = np.zeros((len(self.metrics) + len(self.comparisons) 
                                + len(self.characteristics), len(self.masks)))
        self._calcQuality()
        
    
    
    def get_quality(self):
        return self.quality
    
    def get_result(self):
        return self.result
    
    def _by_name(self, name, array):
        if name in self.names:
            return array[self.names[name]]
        else:
            raise NameError('Invalid argument')
        
    def quality_by_name(self, name):
        return self._by_name(name, self.quality)
            
    def result_by_name(self, name):
        return self._by_name(name, self.result)
            
    

In [135]:
from copy import deepcopy
class EvaluateStaticInfo(EvaluateInfo):
    def __init__(self, model, metrics = [], comparisons = []):
        super(EvaluateStaticInfo, self).__init__(model, metrics, comparisons, [])
    
    def __eval(self):
        model = self.model
        
        sample_X, sample_y = self.boot.values()
        
        self.models = []
        for mask in self.masks:
            self.models += [deepcopy(model.fit(self.X_train[:, mask], self.y_train))]
        
        for (m, mask) in enumerate(self.masks):
            for it in range(self.n_samples):
                reduced_X_cur = (sample_X[it].T[mask]).T
                for (i, metric) in enumerate(self.metrics):
                    self.result[i][m][it] = metric.evaluate(self.models[m], reduced_X_cur, sample_y[it])
                for(i, comp) in enumerate(self.comparisons):
                    ind = i + len(self.metrics)
                    self.result[ind][m][it] = comp.evaluate(self.full, self.models[m], sample_X[it], 
                                                              reduced_X_cur, sample_y[it])

    def fit(self, X_train, y_train, X_test, y_test, masks = None, n_samples=20, len_sample=None):
        super(EvaluateStaticInfo, self).fit(X_train, y_train, X_test, y_test, masks, n_samples, len_sample)
        self.boot = Bootstrap(self.X_test, self.y_test, self.n_samples, self.len_sample)
        self.__eval()
        

In [136]:
from copy import deepcopy
class EvaluateDynamicInfo(EvaluateInfo):
    def __init__(self, model, metrics = [], comparisons = [], characteristics = []):
        super(EvaluateDynamicInfo, self).__init__(model, metrics, comparisons, characteristics)
    
    def __eval(self):
        model = self.model
        
        sample_X, sample_y = self.boot.values()
            
        for (m, mask) in enumerate(self.masks):
            reduced_X_test = (self.X_test.T[mask]).T
            for it in range(self.n_samples):
                model.fit((sample_X[it].T[mask]).T, sample_y[it])
                for (i, metric) in enumerate(self.metrics):
                    self.result[i][m][it] = metric.evaluate(model, reduced_X_test, self.y_test)

                for (i, char) in enumerate(self.characteristics):
                    ind = i + len(self.metrics)
                    self.result[ind][m][it] = char.evaluate(model)

                for (i, comp) in enumerate(self.comparisons):
                    ind = i + len(self.metrics) + len(self.comparisons)
                    self.result[ind][m][it] = comp.evaluate(self.full, model, self.X_test,
                                                              reduced_X_test, self.y_test)
                

    def fit(self, X_train, y_train, X_test, y_test, masks = None, n_samples=20, len_sample=None):
        super(EvaluateDynamicInfo, self).fit(X_train, y_train, X_test, y_test, masks, n_samples, len_sample)
        self.boot = Bootstrap(self.X_test, self.y_test, self.n_samples, self.len_sample)
        self.__eval()
    

In [137]:
from sklearn.linear_model import LinearRegression
mo = LinearRegression()

In [138]:
import scipy.stats as sps

XX = sps.norm.rvs(size=(10, 5))
X2 = sps.norm.rvs(size=(10, 5))
zz = sps.uniform.rvs(size=(5, 3))
yy = XX @ zz + sps.norm.rvs(scale=0.1, size=(10, 3))
y2 = X2 @ zz + sps.norm.rvs(scale=0.1, size=(10, 3))
maskk = np.ones(XX.shape[1])
maskk[2]=0
maskk[1] = 0
maskk[3] = 0
print(maskk)

[ 1.  0.  0.  0.  1.]


In [139]:
metric1 = RSS()
metric2 = Adjusted_Determination_Coefficient()
metric3 = Determination_Coefficient()
metric4 = VIF()
metric5 = BIC()
char1 = Condition_Number()
comp1 = Cp()

In [140]:
info = EvaluateStaticInfo(mo, [metric1, metric2, metric3, metric4, metric5],
                           [comp1])#, [char1])

{"Mallow's Cp": 5, 'RSS': 0, 'Adj Det Coef': 1, 'BIC': 4, 'Det Coef': 2, 'VIF': 3}


In [141]:
info.fit(XX, yy, X2, y2, masks=[np.ones(len(XX.T)), maskk])

In [142]:
info.get_result()

array([[[  3.65303065e-01,   3.77542158e-01,   3.52642787e-01,
           4.44717293e-01,   3.40370354e-01,   4.08389367e-01,
           1.83898028e-01,   4.93958860e-01,   3.63469425e-01,
           4.18486901e-01,   3.21148288e-01,   2.12781777e-01,
           2.35032818e-01,   2.74819525e-01,   3.95773627e-01,
           4.86553596e-01,   2.55655849e-01,   4.35944837e-01,
           2.87268764e-01,   1.52029244e-01],
        [  5.72833926e+01,   5.04199209e+01,   3.81887928e+01,
           1.92035744e+01,   5.71282509e+01,   1.90567553e+01,
           1.90812871e+01,   6.62374156e+01,   7.59825121e+01,
           6.60007665e+01,   5.32848852e+01,   1.07863106e+01,
           9.50734507e+01,   2.29862751e+01,   5.42838099e+01,
           6.33042007e+01,   6.40912622e+01,   7.58829725e+01,
           5.98565568e+01,   6.29992975e+01]],

       [[  9.76804989e-01,   9.90541356e-01,   9.76662453e-01,
           9.80653888e-01,   9.89739241e-01,   9.68311673e-01,
           9.77947679e-0

In [143]:
info.get_quality()

array([[   0.66515697,  100.08175463],
       [   0.99034927,   -0.45208135],
       [   0.98759192,   -0.86696173],
       [ 103.61910017,    0.68866665],
       [  12.17808244,  104.68692482],
       [   1.        ,  144.46336265]])

In [148]:
info.result_by_name("Mallow's Cp")

array([[   6.        ,    6.        ,    6.        ,    6.        ,
           6.        ,    6.        ,    6.        ,    6.        ,
           6.        ,    6.        ,    6.        ,    6.        ,
           6.        ,    6.        ,    6.        ,    6.        ,
           6.        ,    6.        ,    6.        ,    6.        ],
       [ 155.81059944,  132.54779017,  107.29313458,   42.18153282,
         166.84144154,   45.66320137,  102.76015075,  133.09500471,
         208.04787815,  156.71286134,  164.91987934,   49.69189076,
         403.51138492,   82.64134646,  136.15873478,  129.10735342,
         249.69351024,  173.06553769,  207.36430647,  413.38933631]])