In [1]:
%run "./metrics/metrics"

In [2]:
import numpy as np

class Bootstrap():
    def __init__(self, X, y, n_samples, len_sample):
        self.n_samples = n_samples
        self.len_sample = len_sample
        self.X = np.array(X)
        self.y = np.array(y)
        self.sample_X = np.zeros((n_samples, len_sample, self.X.shape[1]))
        self.sample_y = np.zeros((n_samples, len_sample, self.y.shape[1]))
        for i in range(n_samples):
            idx = np.random.choice(np.arange(len(X)), len_sample, replace=True)
            self.sample_X[i] = self.X[idx]
            self.sample_y[i] = self.y[idx]
        #print('a', *self.sample_X)
        #print('b', *self.sample_y)
    
    def values(self):
        return self.sample_X, self.sample_y
        


In [309]:
X = [[1, 2], [3, 4], [5, 6]]
b = Bootstrap(X, X, 5, 3)


In [636]:
class Model():
    def __init__(self):
        pass
    def fit(self, X, y):
        pass
    def predict(self, X):
        pass
    def predict_params(self, X):
        pass
        

In [3]:
%run "./metrics/metrics"

In [4]:
class FunctionEvaluator():
    def __init__(self, func=None):
        if func == None:
            self.function = lambda a, b: 0
        else:
            self.function = func

In [5]:
class Metric(FunctionEvaluator):
    def evaluate(self, model, X, y):
        z = self.function(y, model.predict(X))
        return z
    

In [6]:
class Characteristic(FunctionEvaluator):
    def evaluate(self, model):
        pass
    

In [7]:
class Comparison(FunctionEvaluator):
    def evaluate(self, full_model, model, full_X, y, mask):
        pass

In [8]:
class RSS(Metric):
    def __init__(self):
        super(RSS, self).__init__(residual_square_sum)

In [9]:
class Adjusted_Determination_Coefficient(Metric):
    def __init__(self):
        super(Adjusted_Determination_Coefficient, self).__init__(determination_coefficient)
        
class Determination_Coefficient(Metric):
    def __init__(self):
        super(Determination_Coefficient, self).__init__(determination_coefficient)
    def evaluate(self, model, X, y):
        z = self.function(y, model.predict(X), False)
        #print("Z", z)
        return z

In [10]:
class VIF(Metric):
    def __init__(self):
        super(VIF, self).__init__(variance_inflation_factor)

In [11]:
class BIC(Metric):
    def __init__(self):
        super(BIC, self).__init__(bayesian_information_criterion)
    def evaluate(self, model, X, y):
        num_features = X.shape[1]
        z = self.function(y, model.predict(X), num_features)
        #print("Z", z)
        return z

In [12]:
class Cp(Comparison):
    def __init__(self):
        super(Cp, self).__init__(mallows_Cp)
    def evaluate(self, full_model, model, full_X, y, mask):
        y_full = full_model.predict(full_X)
        y_p = model.predict((full_X.T[mask]).T)
        p = mask.sum()
        z = self.function(y, y_full, y_p, p)
        #print(*mask)
        #print(*full_model.coef_)
        #print(*model.coef_)
        #print("A", y_full, y_p, z)
        return z
        
        

In [13]:
class Condition_Number(Characteristic):
    def __init__(self):
        super(Condition_Number, self).__init__(condition_number_xtx)
    def evaluate(self, model):
        X = model.coef_
        return self.function(X)

In [847]:
c = Condition_Number()
c.evaluate(model)

27.913870044034976

In [24]:
class EvaluateInfo():
    def __init__(self, model, metrics = [], characteristics = [], comparisons = []):
        self.model = model
        self.characteristics = characteristics # characteristic of the fitted model itself
        self.metrics = metrics # compares y and y_pred
        self.comparisons = comparisons #compares y, y_pred, y_pred_with_reduced_features
        #self.mode = mode
    
    def fit(self, X_train, y_train, X_test, y_test, mask = None, n_samples=20, len_sample=None):
        if mask is None:
            mask = np.ones(X.shape[1], dtype=np.bool)
        #print("MASK", mask)
        self.mask = np.array(mask, dtype=bool)
        if len_sample is None:
            len_sample = X.shape[1]
        self.len_sample = len_sample
        self.n_samples = n_samples

        self.X_train = X_train
        self.y_train = y_train

        self.X_test = X_test
        self.y_test = y_test
        
        self.full = deepcopy(self.model.fit(self.X_train, self.y_train))

In [25]:
from copy import deepcopy
class EvaluateStaticInfo(EvaluateInfo):
    def __init__(self, model, metrics = [], comparisons = []):
        super(EvaluateStaticInfo, self).__init__(model, metrics, [], comparisons)
    
    def __eval(self, b):
        model = self.model
        
        sample_X, sample_y = b.values()
        result_metrics = np.zeros((len(self.metrics), self.n_samples))
        result_comparisons = np.zeros((len(self.comparisons), self.n_samples))
        
        model.fit((self.X_train.T[self.mask]).T, self.y_train)
        
        for it in range(self.n_samples):
            for (i, metric) in enumerate(self.metrics):
                result_metrics[i][it] = metric.evaluate(model, (sample_X[it].T[self.mask]).T, sample_y[it])
            for(i, comp) in enumerate(self.comparisons):
                result_comparisons[i][it] = comp.evaluate(self.full, model, sample_X[it], sample_y[it], self.mask)
                
        return result_metrics, result_comparisons

    def fit(self, X_train, y_train, X_test, y_test, mask = None, n_samples=20, len_sample=None):
        #self.b_train = Bootstrap(self.X_train, self.y_train, self.n_samples, self.len_sample)
        super(EvaluateStaticInfo, self).fit(X_train, y_train, X_test, y_test, mask, n_samples, len_sample)
        self.b = Bootstrap(self.X_test, self.y_test, self.n_samples, self.len_sample)
        return (self.__eval(self.b))
        

In [26]:
from copy import deepcopy
class EvaluateStaticInfo(EvaluateInfo):
    def __init__(self, model, metrics = [], comparisons = []):
        super(EvaluateStaticInfo, self).__init__(model, metrics, [], comparisons)
    
    def __eval(self, b):
        model = self.model
        
        sample_X, sample_y = b.values()
        result_metrics = np.zeros((len(self.metrics), self.n_samples))
        result_comparisons = np.zeros((len(self.comparisons), self.n_samples))
        
        model.fit((self.X_train.T[self.mask]).T, self.y_train)
        
        for it in range(self.n_samples):
            for (i, metric) in enumerate(self.metrics):
                result_metrics[i][it] = metric.evaluate(model, (sample_X[it].T[self.mask]).T, sample_y[it])
            for(i, comp) in enumerate(self.comparisons):
                result_comparisons[i][it] = comp.evaluate(self.full, model, sample_X[it], sample_y[it], self.mask)
                
        return result_metrics, result_comparisons

    def fit(self, X_train, y_train, X_test, y_test, mask = None, n_samples=20, len_sample=None):
        #self.b_train = Bootstrap(self.X_train, self.y_train, self.n_samples, self.len_sample)
        super(EvaluateStaticInfo, self).fit(X_train, y_train, X_test, y_test, mask, n_samples, len_sample)
        self.b = Bootstrap(self.X_test, self.y_test, self.n_samples, self.len_sample)
        return (self.__eval(self.b))

In [27]:
from sklearn.linear_model import LinearRegression
mo = LinearRegression()

In [28]:
import scipy.stats as sps

XX = sps.norm.rvs(size=(10, 5))
X2 = sps.norm.rvs(size=(10, 5))
zz = sps.uniform.rvs(size=(5, 3))
yy = XX @ zz + sps.norm.rvs(scale=0.1, size=(10, 3))
y2 = X2 @ zz + sps.norm.rvs(scale=0.1, size=(10, 3))
maskk = np.ones(XX.shape[1])
maskk[2]=0
maskk[1] = 0
maskk[3] = 0
print(maskk)

[ 1.  0.  0.  0.  1.]


In [29]:
metric1 = RSS()
metric2 = Adjusted_Determination_Coefficient()
metric3 = Determination_Coefficient()
metric4 = VIF()
metric5 = BIC()
comp1 = Cp()

In [30]:
info = EvaluateStaticInfo(model=mo, metrics = [], comparisons=[comp1])

In [854]:
info.fit(XX, yy, X2, y2, mask=maskk)

(array([], shape=(0, 20), dtype=float64),
 array([[  56.77635917,   11.20013717,   43.16441014,    5.27139937,
          108.07203138,  198.31122074,  126.02825798,  183.80257228,
           58.63601802,   44.87887878,   61.79998661,  161.0235449 ,
           55.7222075 ,   36.30970869,   57.03498073,   80.04724727,
           32.63343492,  117.85588343,   39.83289258,  162.69393357]]))