In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso, Ridge
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.feature_selection import RFE

In [2]:
dataset = load_iris()
X, X_t, y, y_t = train_test_split(dataset['data'], dataset['target'], test_size=0.30)

In [3]:
model_descr = {
    "components": {
        "Lasso_component": {
            "lasso": {"alpha": 0.1}
        },
        "Ridge_component":{
            "ridge": {"alpha": 0.1},
        },
        "Forrest_component":{
            "forrest": {"n_estimators": 100}
        },
        "RFE_SVM_component":{
            "rfe": {
                "svm": {
                    "gamma": "scale", 
                    "kernel": "linear"
                },
                "verbose": 1
            }
        },
        "RFE_Ridge_component":{
            "rfe": { "ridge": {"alpha": 0.1} }
        },
    }
}

In [4]:
available_components = {"lasso": Lasso, "ridge": Ridge, "forrest": RandomForestClassifier}
available_wrappers = {"rfe": RFE}
available_classifiers = {"svm": SVC}
extraction_methods = ["coef_", "feature_importances_", "ranking_"]

In [5]:
class Model:
    def __init__(self, model_descr):
        self.components = {}
        available_wrapable = {**available_components, **available_classifiers}
        for name, description in model_descr["components"].items():
            component, cmp_params = next(iter(description.items()))
            if component in available_wrappers.keys():
                wrapper = available_wrappers[component]
                wrapable_keys = cmp_params.keys() & available_wrapable.keys()
                if not wrapable_keys or len(wrapable_keys) > 1 :
                    pass # TODO: Throw warning that wrapper won't be included because of lack of classifier/too many classifiers
                else:
                    wrapable_key = wrapable_keys.pop()
                    wrapable = available_wrapable[wrapable_key](**cmp_params[wrapable_key])
                    parsed_params = cmp_params.copy()
                    parsed_params["estimator"] = wrapable
                    parsed_params.pop(wrapable_key,None)
                    self.components[name] = wrapper(**parsed_params)
            else:
                self.components[name] = available_components[component](**cmp_params)
        
    def fit(self, X, y):
        for component in self.components.values():
            component.fit(X, y)
    
    def predict(self, X):
        return np.array([component.predict(X) for component in self.components.values()])
    
    def validate(self, X, y):
        def missclassification_rate(X_pred, y_t):
            return np.sum(np.round(X_pred) != y_t)/X_pred.shape[0]
        predictions = self.predict(X)
        return np.array([missclassification_rate(pred, y_t) for pred in predictions])
    
    def feature_ranking(self):
        def _get_proper_attribute(component):
            component_methods = set(dir(component))
            found_method = (component_methods & set(extraction_methods)).pop()
            return getattr(component, found_method)
        return np.array([_get_proper_attribute(component) for component in self.components.values()])
        
    
    def __repr__(self):
        return "Components:\n\n" + "\n\n".join([str(c) for c in self.components.items()]) + "\n\nEnsemble method:\n\n[TODO]"

In [6]:
model = Model(model_descr)
model

Components:

('Lasso_component', Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False))

('Ridge_component', Ridge(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001))

('Forrest_component', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False))

('RFE_SVM_component', RFE(estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
     

In [7]:
model.fit(X, y)

Fitting estimator with 4 features.
Fitting estimator with 3 features.


In [8]:
model.validate(X_t, y_t)

array([0.08888889, 0.04444444, 0.08888889, 0.06666667, 0.08888889])

In [9]:
model.feature_ranking()

array([[ 0.        , -0.        ,  0.41497871,  0.        ],
       [-0.13847884,  0.00695683,  0.2958312 ,  0.50294318],
       [ 0.09266584,  0.00651644,  0.46567182,  0.4351459 ],
       [ 3.        ,  2.        ,  1.        ,  1.        ],
       [ 2.        ,  3.        ,  1.        ,  1.        ]])