# A More General Approach

In [2]:
import warnings  # Ignore deprecation warnings in sklearn (clutters results)
warnings.filterwarnings('ignore')

import pandas as pd
from sklearn import model_selection
from sklearn.datasets import load_iris

# Bring in all the models we're willing to consider:

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

In [3]:
# Using classes, we can abstract out much of the ugliness we touched on in our iris example.
# While it looks ugly compared to the imperative code we used earlier, there is no additional
# functionality here; this is purely an exercise in iteration.

In [4]:
class Selector():
        
    '''Non-operational bag of Methods for selecting between model types and feature inputs given a model.  Parent of the
    operant classes GenericClassifier and GenericRegressor'''    
    
    def ModelSelection(self, folds=10, rstate=420):
        
        cv_scores, cv_summary = {}, {}
        
        
        for name, model in self.Models.items():
            
            try:
            
                kfold = model_selection.KFold(n_splits=folds, random_state=rstate) 
                cv_result = model_selection.cross_val_score(model, self.X, self.y, cv=kfold, scoring='accuracy')
                cv_summary = "%s: %f (%f)" % (name, cv_result.mean(), cv_result.std())
                cv_scores[name] = cv_result       
                
            
            except Exception as e:
                
                cv_scores[name] = e
                cv_summary[name] = e
        
        self.cv_scores = cv_scores
        
        # Print Summary
        print('Model |', 'MSE |', 'Standard Deviation', '\n')
        for k, v in self.cv_scores.items():
    
            msg = "%s: %f (%f)" % (k, v.mean(), v.std())
            print(msg)

In [5]:
class GenericClassifier(Selector):
    
    def __init__(self, X, y):
        
        self.X = X
        self.y = y
        
        self.Models = {
                       
            'LR': LogisticRegression(),
            'KNN': KNeighborsClassifier(),
            'GBT': GradientBoostingClassifier(),
            'NB': GaussianNB(),
            'SVM': SVC(),
            'DT': DecisionTreeClassifier()
        
        }

In [6]:
# Read in the iris data
iris = load_iris()

# Create X (features) and Y (targets)
X = iris.data
Y, y = iris.target, iris.target

Classifier = GenericClassifier(X, y) # Create an abstract classifier that holds several algorithm types

In [7]:
Classifier.ModelSelection() # Show me the results of a standard cross validation on all discrete models I'm considering

Model | MSE | Standard Deviation 

LR: 0.880000 (0.148474)
KNN: 0.933333 (0.084327)
GBT: 0.926667 (0.096379)
NB: 0.946667 (0.058119)
SVM: 0.953333 (0.052068)
DT: 0.953333 (0.052068)


In [8]:
# That's it; all we have to do is spin up a classifier of the instance that reported highest accuracy (SVM in my case),
# retrain it on all our data (if appropriate) and we're done. 

In [9]:
# We could even write our helper classes to a library of their own, say 'HubDataTools', so that future users would 
# get all of the functionality listed above by simply entering: 

    # import HubDataTools as hub
    
    # classifier = hub.GenericClassifier()
    # classifier.fit(X, y)
    # etc...

In [10]:
# In principle, we can apply these methods directly to Hub problems and see if the out of the box solutions
# are superior to more manual methods, or do a longer term business analysis with data transformations and formal priors
# to get an intellectual defensible model.   

# A handful of off-the-cuff ideas for grins:

# Predicting the liklihood of types of OTM exceptions for orders before an order is released
# Regression analysis to infer maximum willingness to pay for specific services
# Assigning value to contracts based on historic profitability and service level data
# etc.

# There's *alot* more to model(and feature selection) than what's here, and I'm by no means an expert, but I think
# this is the general idea.