In [1]:
import sys
sys.path.append("../src") # relative path to src dir

from model.model import * 

In [2]:
# import numpy as np
# import matplotlib.pyplot as plt
from sklearn.datasets import load_iris 
from sklearn.model_selection import train_test_split
# from sklearn.linear_model import Lasso, Ridge
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.svm import SVC
# from sklearn.feature_selection import RFE

In [26]:
dataset = load_iris()
X, X_t, y, y_t = train_test_split(dataset['data'], dataset['target'], test_size=0.30)

In [325]:
available_components = {"lasso": Lasso, "ridge": Ridge, "forrest": RandomForestClassifier}
available_wrappers = {"rfe": RFE}
available_classifiers = {"svm": SVC}
extraction_methods = ["coef_", "feature_importances_", "ranking_"]

In [32]:
class MissclassificationRate:
    def validate(X_pred, y_t):
        return np.sum(np.round(X_pred) != y_t)/X_pred.shape[0]

class JsonIOMixin:
    def from_json(self, model_descr):
        self.components = {}
        available_wrapable = {**available_components, **available_classifiers}
        for name, description in model_descr["components"].items():
            component, cmp_params = next(iter(description.items()))
            if component in available_wrappers.keys():
                wrapper = available_wrappers[component]
                wrapable_keys = cmp_params.keys() & available_wrapable.keys()
                if not wrapable_keys or len(wrapable_keys) > 1 :
                    pass # TODO: Throw warning that wrapper won't be included because of lack of classifier/too many classifiers
                else:
                    wrapable_key = wrapable_keys.pop()
                    wrapable = available_wrapable[wrapable_key](**cmp_params[wrapable_key])
                    parsed_params = cmp_params.copy()
                    parsed_params["estimator"] = wrapable
                    parsed_params.pop(wrapable_key,None)
                    self.components[name] = wrapper(**parsed_params)
            else:
                self.components[name] = available_components[component](**cmp_params)
        return self
    
    def to_json(self):
        def reverse_dict(x):
            return {j:i for i,j in x.items()}
        def get_arguments(component):
            parameter_values = component.__dict__.items()
            constructor_argument_names = component.__init__.__code__.co_varnames
            return {k: v for k,v in parameter_values if k in constructor_argument_names}
        rev_components = reverse_dict(available_components)
        rev_wrappers = reverse_dict(available_wrappers)
        rev_wrappable = reverse_dict({**available_components, **available_classifiers})
        output = {}
        output['components'] = {}
        components = self.components
        for name, component in components.items():
            output['components'][name] = {}
            component_key = rev_components.get(component.__class__)
            if component_key is not None:
                component_params = get_arguments(component)
                output['components'][name][component_key] = component_params
            else:
                component_key = rev_wrappers.get(component.__class__)
                if component_key is not None:
                    output['components'][name][component_key] = {}
                    component_params = get_arguments(component)
                    wrapped = component_params["estimator"]
                    wrapped_key = rev_wrappable.get(wrapped.__class__)
                    if wrapped_key is not None:    
                        output['components'][name][component_key][wrapped_key] = get_arguments(wrapped)
                        for k,v in component_params.items():
                            if k is not 'estimator':
                                output['components'][name][component_key][k] = v
                    else:
                        pass # unsuported wrappable error
                    
                else:
                    pass # unsuported component/wrapper 
        return output
    
class Model(JsonIOMixin):
    def __init__(self, components=[], ensemble=None, validation=None):
        self.ensemble = ensemble
        self.validation = validation
        if type(components) is dict:
            self.components = components
        elif type(components) is list:
            self.components = {"Component_{0}".format(name) : component for name, component in enumerate(components)}
        else:
            pass # throw custom error
    
    def add_component(self, new_component):
        self.components.append(new_component)
    
    def set_ensemble(self, ensemble_method):
        self.ensemble_method = ensemble_method
        
    def set_validation(self, validation):
        self.validation = validation
    
    def fit(self, X, y):
        for component in self.components.values():
            component.fit(X, y)
    
    def predict(self, X):
        return np.array([component.predict(X) for component in self.components.values()])
    
    def validate(self, X_t, y_t):
        if self.validation is not None:
            predictions = self.predict(X_t)
            return np.array([self.validation.validate(pred, y_t) for pred in predictions])
        else:
            pass # throw custom exception NoValidationSpecified
    
    def feature_ranking(self):
        def _get_proper_attribute(component):
            component_methods = set(dir(component))
            found_method = (component_methods & set(extraction_methods)).pop()
            return getattr(component, found_method)
        return np.array([_get_proper_attribute(component) for component in self.components.values()])
        
    def __repr__(self):
        return "Components:\n\n" + "\n\n".join([str(c) for c in self.components.items()]) + "\n\nEnsemble method:\n\n[TODO]"

In [27]:
model = Model([Lasso(alpha=0.1), 
               Ridge(alpha=0.1), 
               RandomForestClassifier(n_estimators=100),
               RFE(estimator=SVC(gamma="scale", kernel="linear"),verbose=1),
               RFE(estimator=Ridge(alpha=0.1))
              ])
model

Components:

('Component_0', Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False))

('Component_1', Ridge(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001))

('Component_2', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False))

('Component_3', RFE(estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
                  decisio

In [28]:
json_test = model.to_json()
json_test

{'components': {'Component_0': {'lasso': {'alpha': 0.1,
    'fit_intercept': True,
    'normalize': False,
    'precompute': False,
    'max_iter': 1000,
    'copy_X': True,
    'tol': 0.0001,
    'warm_start': False,
    'positive': False,
    'random_state': None,
    'selection': 'cyclic'}},
  'Component_1': {'ridge': {'alpha': 0.1,
    'fit_intercept': True,
    'normalize': False,
    'copy_X': True,
    'max_iter': None,
    'tol': 0.001,
    'solver': 'auto',
    'random_state': None}},
  'Component_2': {'forrest': {'n_estimators': 100,
    'bootstrap': True,
    'oob_score': False,
    'n_jobs': None,
    'random_state': None,
    'verbose': 0,
    'warm_start': False,
    'class_weight': None,
    'criterion': 'gini',
    'max_depth': None,
    'min_samples_split': 2,
    'min_samples_leaf': 1,
    'min_weight_fraction_leaf': 0.0,
    'max_features': 'auto',
    'max_leaf_nodes': None,
    'min_impurity_decrease': 0.0,
    'min_impurity_split': None}},
  'Component_3': {'rfe':

In [29]:
model = Model().from_json(json_test)
model

Components:

('Component_0', Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False))

('Component_1', Ridge(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001))

('Component_2', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False))

('Component_3', RFE(estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
                  decisio

In [30]:
model.fit(X, y)

Fitting estimator with 4 features.
Fitting estimator with 3 features.


In [31]:
model.set_validation(MissclassificationRate)
model.validate(X_t, y_t)

NameError: name 'y_t' is not defined

In [322]:
model.feature_ranking()

array([[ 0.        , -0.        ,  0.40912833,  0.        ],
       [-0.16387339,  0.01387292,  0.25413989,  0.62046622],
       [ 0.12863782,  0.02991082,  0.42897123,  0.41248013],
       [ 3.        ,  2.        ,  1.        ,  1.        ],
       [ 2.        ,  3.        ,  1.        ,  1.        ]])