In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%%file utils.py

import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split

pd.options.mode.chained_assignment = None

error_messages = {
    "No clear target in training data": 
        ("The training data must have " 
         "exactly one more column than " 
         "the test data."),
    "Training data has too many columns":
        ("The training data has more "
         "than one column different than "
         "the testing data: %s"),
    "Column names inconsistent":
        ("The training columns and the "
         "test columns must have "
         "identical names excepts for "
         "the target variables. "
         "Different columns: %s")
    }

def X_y_split(X_train, X_test):
    """
    Determines which variables are the target
    and which are the features. Returns just
    The X and y data in the training dataset
    as a tuple.
    
    Example usage:
    X, y = learn.X_y_split(X_train, X_test)
    
    Parameters
    ----------
    X_train: pandas dataframe
        The data that has the target in it.
    
    X_test: pandas dataframe
        The data that does not have the target in it.
    """
    X_train = X_train.copy()
    n_train_cols = X_train.shape[1]
    n_test_cols = X_test.shape[1]
    
    if n_train_cols != n_test_cols + 1:
        msg = error_messages["No clear target in training data"]
        raise ValueError(msg)
        
    test_columns = set(X_test.columns)
    train_columns = set(X_train.columns)
    target_columns = train_columns - test_columns
    if len(target_columns) > 1:
        key = "Training data has too many columns"
        msg_ = error_messages[key]
        msg = msg_ % str(target_columns)
        raise ValueError(msg)

    extra_columns_in_test = test_columns - train_columns
    if extra_columns_in_test:
        key = "Column names inconsistent"
        msg_ = error_messages[key]
        msg = msg_ % str(extra_columns_in_test)
        raise ValueError(msg)     

    y_name = target_columns.pop()
    y = X_train.pop(y_name)
    return X_train, y


def X_to_train_test(X, target_name, test_size=.05):
    X = X.copy()
    y = X.pop(target_name)
    X_train, X_test, y_train, _ = train_test_split(X, 
                                                   y, 
                                                   test_size=test_size,
                                                   random_state=42)
    X_train[target_name] = y_train
    return X_train, X_test


def make_data(source):
    """
    Utility function to assist in loading different 
    sample datasets. Returns training data (that 
    contains the target) and testing data (that
    does not contain the target).
    
    Parameters
    ----------
    source: string, optional (default="boston")
        The specific dataset to load. Options:
        - Regression: "boston", "diabetes"
        - Classification: "cancer", "digits", "iris", "titanic"
    """
    if source == "boston":
        data = datasets.load_boston()
    elif source == "diabetes":
        data = datasets.load_diabetes()
        data["feature_names"] = ["f{}".format(v) 
                                 for v in range(10)]
    elif source == "cancer":
        data = datasets.load_breast_cancer()
    elif source == "digits":
        data = datasets.load_digits()
        data["feature_names"] = ["f{}".format(v) 
                                 for v in range(64)]        
    elif source == "iris":
        data = datasets.load_iris()
    elif source == "titanic":
        train_data_path = "../tests/test_data/titanic/train.csv"
        test_data_path = "../tests/test_data/titanic/test.csv"

        X_train = pd.read_csv(train_data_path)
        X_test = pd.read_csv(test_data_path)
        return X_train, X_test
    elif source == "abalone":
        train_data_path = "../tests/test_data/abalone_age/abalone.data"
        col_names = ["Sex", "Length", "Diameter", "Height", 
                     "Whole_weight", "Shucked_weight", 
                     "Viscera_weight", "Shell_weight", "Rings"]
        X = pd.read_csv(train_data_path, header=None, names=col_names)
        X["Rings"] = (X.Rings >= 9).astype(int)
        return X_to_train_test(X, "Rings")
    elif source == "bank_marketing":
        train_data_path = "../tests/test_data/bank_marketing/bank-full.csv"
        X = pd.read_csv(train_data_path, sep=";")
        return X_to_train_test(X, "y")
    elif source == "car_evaluation":
        train_data_path = "../tests/test_data/car_evaluation/car.data"
        col_names = ["buying", "maint", "doors", 
                     "persons", "lug_boot", "safety", "car_evaluation"]
        X = pd.read_csv(train_data_path, header=None, names=col_names)
        return X_to_train_test(X, "car_evaluation")
    elif source == "income":
        train_data_path = "../tests/test_data/census_income/adult.data"
        col_names = ["age", "workclass", "fnlwgt", 
                     "education", "education-num", 
                     "marital-status", "occupation", 
                     "relationship", "race", "sex",
                     "capital-gain", "capital-loss", 
                     "hours-per-week", "native-country",
                     "income"]
        train = pd.read_csv(train_data_path, skiprows=[0], 
                            header=None, names=col_names)
        test_data_path = "../tests/test_data/census_income/adult.test"
        test = pd.read_csv(test_data_path, skiprows=[0], 
                           header=None, names=col_names)
        X = pd.concat([train,test])
        return X_to_train_test(X, "income")
    elif source == "chess":
        train_data_path = "../tests/test_data/chess/kr-vs-kp.data"
        X = pd.read_csv(train_data_path, header=None)
        return X_to_train_test(X, 36)
    elif source == "mushrooms":
        train_data_path = "../tests/test_data/mushroom/agaricus-lepiota.data"
        X = pd.read_csv(train_data_path, header=None)
        return X_to_train_test(X, 0)
    elif source == "tictactoe":
        train_data_path = "../tests/test_data/tictactoe/tic-tac-toe.data"
        X = pd.read_csv(train_data_path, header=None)
        return X_to_train_test(X, 9)
    elif source == "wine-origin":
        train_data_path = "../tests/test_data/wine_origin/wine.data"
        X = pd.read_csv(train_data_path, header=None)
        return X_to_train_test(X, 0)
    elif source == "wine-quality":
        train_data_path = "../tests/test_data/wine_quality/winequality-white.csv"
        X = pd.read_csv(train_data_path, sep=";")
        X["quality"] = (X.quality > 5).astype(int)
        return X_to_train_test(X, "quality")
    else:
        raise ValueError("Not a valid dataset.")
    X = pd.DataFrame(data=data.data, 
                     columns=data.feature_names)
    y = pd.Series(data=data.target)
    X_train, X_test, y_train, _ = train_test_split(X, 
                                                   y, 
                                                   test_size=.05,
                                                   random_state=42)
    X_train["target"] = y_train
    return X_train, X_test


def is_categorical(x, 
                   max_classes="auto", 
                   strings_are_categorical=True):
    """
    Check if a target variable is a classification
    problem or a regression problem. Returns True if
    classification and False if regression. On failure,
    raises a ValueError.
    
    Parameters
    ----------
    x: array-like
        This should be the target variable. Ideally, 
        you should convert it to be numeric before 
        using this function.
        
    max_classes: int or float, optional (default="auto")
        Determines the max number of unique values
        there can be for it being a categorical variable
        
        If "auto" - sets it equal to 10% of the dataset or
            100, whichever is smaller
        If float - interprets as percent of dataset size
        If int - interprets as number of classes
        
    strings_are_categorical: bool, optional (default=True)
        If a variable is a string and cannot be coerced
        to a number, returns True regardless of the number
        of unique values. 
    """
    x = pd.Series(x)
    n = len(x)
    n_unique = len(x.unique())
    if max_classes == "auto":
        auto_n_classes = .05
        n_max_classes = int(n*auto_n_classes)
        max_classes = min(n_max_classes, 100)
    if isinstance(max_classes, float):
        n_max_classes = int(n*max_classes)
        max_classes = min(n_max_classes, int(n/2))
    # If x is numeric
    if x.dtype.kind in 'bifc':
        # If there are more than max_classes
        # classify as a regression problem
        if n_unique > max_classes:
            return False
        # If there are floating point numbers
        # classify as a regression problem
        decimals = (x - x.astype(int)).mean()
        if decimals > .01:
            return False
    if n_unique <= max_classes:
        return True
    try:
        x.astype(float)
        return False
    except ValueError:
        if strings_are_categorical:
            return True
        msg = ("Malformed data. "
               "Variable is non-numeric "
               "and there are more "
               "unique values than allowed "
               "by max_classes")
        raise ValueError(msg)
        
        
def categorical_columns(X):
    """Returns a list of all categorical columns"""
    cats = X.apply(is_categorical, axis=0)
    categoricals = cats[cats].index.tolist()
    return categoricals

Overwriting utils.py


In [3]:
%%file ../tests/test_utils.py
import unittest
from learn import utils

class TestUtils(unittest.TestCase):
    def test_making_data_simple(self):
        for data in ["boston", "iris"]:
            X_train, X_test = utils.make_data(source=data)
            train_cols = X_train.columns
            test_cols = X_test.columns
            # Training data should have exactly one additional column
            self.assertEqual(len(train_cols), len(test_cols)+1)
            # Ensure only one column name is different
            n_diff_cols = len(set(X_train.columns) - set(X_test.columns))
            self.assertEqual(1, n_diff_cols)
        
    def test_is_classification_problem(self):
        # Shorten function name
        icp = utils.is_categorical
        # Regression because floats
        result = icp([1.1, 2.1])
        self.assertEqual(result, 0)
        # Regression because number of unique
        result = icp([1,2,3,4])
        self.assertEqual(result, 0)
        # Classification because words
        result = icp(["cat"]*20+["dog"]*20)
        self.assertEqual(result, 1)
        # Classification because number of uniques
        result = icp([0]*20+[1]*20)
        self.assertEqual(result, 1)
        # Real data tests - Regression
        for dataset in ["boston", "diabetes"]:
            data = utils.make_data(source=dataset)
            X, y = utils.X_y_split(*data)
            self.assertEqual(icp(y), 0)
        # Real data tests - Classification
        for dataset in ["cancer", "digits", "iris"]:
            data = utils.make_data(source=dataset)
            X, y = utils.X_y_split(*data)
            self.assertEqual(icp(y), 1)
            
# class TestXYSplit(unittest.TestCase):
#     pass

if __name__ == '__main__':
    unittest.main()

Overwriting ../tests/test_utils.py


In [13]:
%%file forall.py

import pandas as pd
import numpy as np
from sklearn.preprocessing import label_binarize
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.linear_model import Perceptron
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_predict
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.base import BaseEstimator, TransformerMixin, ClassifierMixin, RegressorMixin
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import r2_score, roc_auc_score
from sklearn import metrics
from learn import utils

def categorical_unique_counts(X):
    """
    Returns series of categorical columns
    and count of unique values in each 
    column
    """
    cats = utils.categorical_columns(X)
    return X[cats].apply(pd.Series.nunique, axis=0)

def small_categorical(X, large_class_threshold=10):
    counts = categorical_unique_counts(X)
    mask = counts < large_class_threshold
    return counts[mask].index.tolist()

def large_categorical(X, large_class_threshold=10):
    counts = categorical_unique_counts(X)
    mask = counts>=large_class_threshold
    return counts[mask].index.tolist()

def word_to_num(word, max_char=5):
    """
    Assigns a number to a word that
    is the approximate sort order of
    the word
    
    Words with the same first max_char
    will have the same value.
    """
    word_val = 0
    for n, char in enumerate(str(word)):
        if n > max_char:
            break
        num = ord(char)/130
        den = 10**n
        total = num/den
        word_val += total
    return word_val

def word_size(word):
    """
    Returns the length of the word
    """
    return len(str(word))

class CategoricalImputer(BaseEstimator, TransformerMixin):
    """
    Adds a new "Missing" category for missing values
    """
    def __init__(self, fill_value="NULL"):
        self.fill_value = fill_value
    
    def fit(self, X, y=None):
        self.cat_cols = utils.categorical_columns(X)
        return self
    
    def transform(self, X, y=None):
        fill_values = {c:self.fill_value for c in self.cat_cols}
        return X.fillna(fill_values, axis=0)

class NumericImputer(BaseEstimator, TransformerMixin):
    """
    TODO: Add option for indicator variable if NaN
    """
    def __init__(self, method="mean"):
        self.method = method
        
    def fit(self, X, y=None):
        if self.method == "mean":
            self.fill_values = X.mean()
        if self.method == "max":
            self.fill_values = X.max() + 1
        return self
    
    def transform(self, X, y=None):
        X[~pd.np.isfinite(X)] = 0 #TODO: Fix
        return X.fillna(self.fill_values)

    
class KeepNumeric(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        self.numeric_columns = X.dtypes[X.dtypes != "object"].index.tolist()
        return self
        
    def transform(self, X, y=None):
        return X[self.numeric_columns]
    
    
class Categoricals(BaseEstimator, TransformerMixin):
    def __init__(self, large_class_threshold=10):
        """
        Anything large_class_threshold and larger
        will be treated as a categorical features
        with a large number of categories.
        """
        self.large_class_threshold = large_class_threshold
        
    def fit(self, X, y=None):
        lct = self.large_class_threshold
        self.small = small_categorical(X, lct)
        self.large = large_categorical(X, lct)
        self.all = self.small + self.large
        # Save category unique value counts for feature
        # engineering
        self.value_counts = {}
        for col in self.large:
            self.value_counts[col] = X[col].value_counts()
        return self
    
    def transform(self, X, y=None):
        # Add sort value based features
        for col in self.all:
            new_col = X[col].apply(word_to_num)
            col_name = str(col)+"__sort"
            X[col_name] = new_col
        
        for col in self.large:
            # Add count based features
            counts = self.value_counts[col]
            X = X.join(counts, 
                       on=col, 
                       rsuffix="__counts")
            # Add word length features
            new_col = X[col].apply(word_size)
            col_name = str(col)+"__length"
            X[col_name] = new_col
        return X

    
class Standardize(BaseException, TransformerMixin):
    def __init__(self):
        pass
    
    def fit(self, X, y):
        self.mean = X.mean(axis=0)
        self.std = X.std(axis=0)
        return self
    
    def transform(self, X, y=None):
        return (X - self.mean)/self.std
    

class DropBadColumns(BaseEstimator, TransformerMixin):
    def __init__(self):
        """
        Drops columns with:
        * NaN standard deviation
        * Zero standard deviation
        """
        pass
    
    def fit(self, X, y=None):
        std = X.std(axis=0)
        null_std = std.isnull()
        zero_std = std == 0
        bad_std_cols = std[null_std | zero_std].index.values.tolist()
        self.to_drop = bad_std_cols
        return self
    
    def transform(self, X):
        return X.drop(self.to_drop, axis=1)
    
    
def regression_metrics(y, y_hat):
    exp_var = metrics.explained_variance_score(y, y_hat)
    mae = metrics.mean_absolute_error(y, y_hat)
    mse = metrics.mean_squared_error(y, y_hat)
    medae = metrics.median_absolute_error(y, y_hat)
    r2 = metrics.r2_score(y, y_hat)
    results = {
        "Explained variance score": exp_var,
        "Mean absolute error": mae,
        "Mean squared error": mse,
        "Root mean squared error": mse**.5,
        "Median absolute error": medae,
        "R^2 score": r2
    }
    return results


class RegressionPredict(BaseEstimator):
    def __init__(self, time_to_compute=100):
        self.time_to_compute = time_to_compute
        
    def fit(self, X, y):
        self.lr = LinearRegression()
        self.lr.fit(X, y)
        lr_pred = cross_val_predict(self.lr, X, y, cv=10, n_jobs=-1).reshape(-1, 1)
        
        self.rf = RandomForestRegressor(n_estimators=self.time_to_compute, 
                                        random_state=42, 
                                        oob_score=True, 
                                        n_jobs=-1)
        self.rf.fit(X, y)
        rf_pred = self.rf.oob_prediction_.reshape(-1, 1)

        layer_1 = np.hstack([
            lr_pred, 
            rf_pred
        ])

        self.lr_1 = LinearRegression()
        self.generalized_predictions = cross_val_predict(self.lr_1, 
                                                         layer_1, 
                                                         y, 
                                                         cv=10, 
                                                         n_jobs=-1, 
                                                         method="predict")
        self.lr_1.fit(layer_1, y)
        return self
    
    def predict(self, X):
        lr_pred = self.lr.predict(X).reshape(-1, 1)
        rf_pred = self.rf.predict(X).reshape(-1, 1)
        layer_1 = np.hstack([
            lr_pred, 
            rf_pred
        ])
        final_predictions = self.lr_1.predict(layer_1)
        return final_predictions
    

class Regression(BaseEstimator, RegressorMixin):
    def __init__(self, time_to_compute=100):
        self.time_to_compute = time_to_compute
        
    def fit(self, X, y):
        self.model = RegressionPredict(time_to_compute=self.time_to_compute)
        self.model.fit(X, y)
        self.oob_predictions = self.model.generalized_predictions
        
        self.all_metrics = regression_metrics(y, self.oob_predictions)
        self.score_type = "R^2*100"
        self.score = int(self.all_metrics["R^2 score"]*100)
        self.display_score = "%d/100" % self.score
        self.understandable_metric_name = "Average prediction error"
        self.understandable_metric_description = "On average, the predictions will be off by this amount"
        self.understandable_metric_value = self.all_metrics["Mean absolute error"]
        return self
        
    def predict(self, X):
        predictions = self.model.predict(X)
        return predictions
    

def classification_metrics(y, y_hat):
    results = {}
    y_prob = y_hat[:, 1]
    y_pred = (y_prob > .5).astype(int)
    y_bin = label_binarize(y, 
                           sorted(pd.Series(y).unique()))
    binary = y_bin.shape[1] == 1
    if binary:
        # Fix the binary case returning a column vector
        y_bin = np.hstack((-(y_bin - 1), y_bin))
    ave_precision = metrics.average_precision_score(y_bin, y_hat)
    auc = metrics.roc_auc_score(y_bin, y_hat)
    log_loss = metrics.log_loss(y_bin, y_hat)
    data = {
        "Accuracy": (y_hat.argmax(axis=1) == y).mean(),
        "Average precision score": ave_precision,
        "AUC": auc,
        "Log loss (cross-entropy loss)": log_loss
            }
    if binary:
        brier = metrics.brier_score_loss(y, y_prob)
        f1 = metrics.f1_score(y, y_pred)
        cks = metrics.cohen_kappa_score(y, y_pred)
        hamming = metrics.hamming_loss(y, y_pred)
        hinge = metrics.hinge_loss(y, y_pred)
        jaccard = metrics.jaccard_similarity_score(y, y_pred)
        matt = metrics.matthews_corrcoef(y, y_pred)
        precision = metrics.precision_score(y, y_pred)
        recall = metrics.recall_score(y, y_pred)
        binary_data = {
            "Brier score loss": brier,
            "F1 score": f1,
            "Cohen's kappa": cks,
            "Average Hamming loss": hamming,
            "Hinge loss": hinge,
            "Jaccard similarity coefficient": jaccard,
            "Matthews correlation coefficient": matt,
            "Precision": precision,
            "Recall": recall
            }
        data.update(binary_data)
    return data


class ClassificationPredict(BaseEstimator):
    def __init__(self, time_to_compute=100):
        self.time_to_compute = time_to_compute
        
    def fit(self, X, y):
        self.lr = LogisticRegression(C=1)
        self.lr.fit(X, y)
        lr_pred = cross_val_predict(self.lr, X, y, cv=10, n_jobs=-1, method="predict_proba")
        
        self.rf = RandomForestClassifier(n_estimators=self.time_to_compute, random_state=42, oob_score=True, n_jobs=-1)
        self.rf.fit(X, y)
        rf_pred = self.rf.oob_decision_function_

        layer_1 = np.hstack([
            lr_pred, 
            rf_pred
        ])

        self.lr_1 = LogisticRegression(C=1)
        self.generalized_predictions = cross_val_predict(self.lr_1, 
                                                         layer_1, 
                                                         y, 
                                                         cv=10, 
                                                         n_jobs=-1, 
                                                         method="predict_proba")
        self.lr_1.fit(layer_1, y)
        return self
    
    def predict_proba(self, X):
        lr_pred = self.lr.predict_proba(X)
        rf_pred = self.rf.predict_proba(X)
        layer_1 = np.hstack([
            lr_pred, 
            rf_pred
        ])
        final_predictions = self.lr_1.predict_proba(layer_1)
        return final_predictions

    def predict(self, X):
        predictions = self.predict_proba(X)
        return predictions.argmax(1)
    

class Classification(BaseEstimator, ClassifierMixin):
    def __init__(self, time_to_compute=100):
        """
        """
        self.time_to_compute = time_to_compute
        
    def fit(self, X, y):
        y = pd.Series(y)
        self.n_classes = len(y.unique())
        self.label_encoder = None
        if y.dtype == "object":
            self.label_encoder = LabelEncoder().fit(y)
            y = self.label_encoder.transform(y)

        self.model = ClassificationPredict(time_to_compute=self.time_to_compute)
        self.model.fit(X, y)
        self.oob_predictions = self.model.generalized_predictions
        
        self.all_metrics = classification_metrics(y, 
                                                  self.oob_predictions)
        self.score_type = "(AUC - .5)*200"
        self.score = int((self.all_metrics["AUC"] - .5)*200)
        self.display_score = "%d/100" % self.score
        self.understandable_metric_name = "Accuracy"
        self.understandable_metric_description = "Each prediction is expected to be correct this often"
        self.understandable_metric_value = self.all_metrics["Accuracy"]
        return self
        
    def predict(self, X):
        predictions = self.model.predict(X)
        if self.label_encoder is not None:
            predictions = self.label_encoder.inverse_transform(predictions)
        return predictions

    
class All():
    def __init__(self, time_to_compute=None):
        self.time_to_compute = time_to_compute or 100
        
    def fit(self, X, y):
        X.columns = [str(col) for col in X.columns]
        # Determine type of problem
        self.classification = utils.is_categorical(y, max_classes=.1)
        if self.classification:
            model = Classification(time_to_compute=self.time_to_compute)
        else:
            model = Regression(time_to_compute=self.time_to_compute)
        # Create pipeline
        steps = [("categorical_imputation", CategoricalImputer()),
                 ("make_categoricals_numeric", Categoricals()),
                 ("keep_only_numeric", KeepNumeric()),
                 ("numeric_imputation", NumericImputer()),
                 ("drop_bad_columns", DropBadColumns()),
                 ("scale", Standardize()),
                 ("model", model)]
        pipe = Pipeline(steps)
        pipe.fit(X, y)
        self.model = pipe
        self.score = pipe.named_steps["model"].score
        self.score_type = pipe.named_steps["model"].score_type
        self.display_score = pipe.named_steps["model"].display_score
        self.all_metrics = pipe.named_steps["model"].all_metrics
        self.understandable_metric_name = pipe.named_steps["model"].understandable_metric_name
        self.understandable_metric_description = pipe.named_steps["model"].understandable_metric_description
        self.understandable_metric_value = pipe.named_steps["model"].understandable_metric_value
        return self
        
    def predict(self, X):
        X.columns = [str(col) for col in X.columns]
        predictions = self.model.predict(X)
        return predictions

Overwriting forall.py


In [14]:
import os
import sys
from pprint import pprint

# Allows importing of local modules
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from learn import forall as fa
from learn import utils

In [15]:
%%time
total_score = 0

for dataset in ["boston", "diabetes", 
                "cancer", "digits", 
                "iris", "titanic", 
                "abalone", "bank_marketing",
                "car_evaluation", "income",
                "chess", "mushrooms",
                "tictactoe",  "wine-origin",
                "wine-quality"
               ]:
    # In the flask app:
    X_train, X_test = utils.make_data(source=dataset)
    X, y = utils.X_y_split(X_train=X_train, X_test=X_test)
    model = fa.All()
    model.fit(X, y)
    predictions = model.predict(X_test)
    print("%s: %s (%s)" % (dataset, model.display_score, model.score_type))
    
    # Benchmarking
    total_score += model.score
    
print(total_score)

boston: 87/100 (R^2*100)
diabetes: 47/100 (R^2*100)
cancer: 98/100 ((AUC - .5)*200)
digits: 99/100 ((AUC - .5)*200)
iris: 97/100 ((AUC - .5)*200)
titanic: 73/100 ((AUC - .5)*200)
abalone: 81/100 ((AUC - .5)*200)
bank_marketing: 85/100 ((AUC - .5)*200)
car_evaluation: 99/100 ((AUC - .5)*200)
income: 51/100 ((AUC - .5)*200)
chess: 99/100 ((AUC - .5)*200)
mushrooms: 100/100 ((AUC - .5)*200)
tictactoe: 99/100 ((AUC - .5)*200)
wine-origin: 100/100 ((AUC - .5)*200)
wine-quality: 81/100 ((AUC - .5)*200)
1296
CPU times: user 46.5 s, sys: 2.62 s, total: 49.1 s
Wall time: 1min 16s


In [16]:
%%time
total_score = 0

for dataset in ["boston", "diabetes", 
                "cancer", "digits", 
                "iris", "titanic", 
                "abalone", "bank_marketing",
                "car_evaluation", "income",
                "chess", "mushrooms",
                "tictactoe",  "wine-origin",
                "wine-quality"
               ]:
    # In the flask app:
    X_train, X_test = utils.make_data(source=dataset)
    X, y = utils.X_y_split(X_train=X_train, X_test=X_test)
    model = fa.All(time_to_compute=200)
    model.fit(X, y)
    predictions = model.predict(X_test)
    print("%s: %s (%s)" % (dataset, model.display_score, model.score_type))
    
    # Benchmarking
    total_score += model.score
    
print(total_score)

boston: 87/100 (R^2*100)
diabetes: 47/100 (R^2*100)
cancer: 98/100 ((AUC - .5)*200)
digits: 99/100 ((AUC - .5)*200)
iris: 97/100 ((AUC - .5)*200)
titanic: 75/100 ((AUC - .5)*200)
abalone: 81/100 ((AUC - .5)*200)
bank_marketing: 86/100 ((AUC - .5)*200)
car_evaluation: 99/100 ((AUC - .5)*200)
income: 51/100 ((AUC - .5)*200)
chess: 99/100 ((AUC - .5)*200)
mushrooms: 100/100 ((AUC - .5)*200)
tictactoe: 99/100 ((AUC - .5)*200)
wine-origin: 100/100 ((AUC - .5)*200)
wine-quality: 82/100 ((AUC - .5)*200)
1300
CPU times: user 1min 14s, sys: 3.61 s, total: 1min 17s
Wall time: 1min 27s


In [17]:
%%time
for dataset in ["boston", "diabetes", 
                "cancer", "digits", 
                "iris", "titanic", 
                "abalone", "bank_marketing",
                "car_evaluation", "income",
                "chess", "mushrooms",
                "tictactoe",  "wine-origin",
                "wine-quality"
               ]:
    # In the flask app:
    X_train, X_test = utils.make_data(source=dataset)
    X, y = utils.X_y_split(X_train=X_train, X_test=X_test)
    model = fa.All(time_to_compute=100)
    model.fit(X, y)
    predictions = model.predict(X_test)
    print(dataset)
    print("Score: %s" % model.display_score)
    print(model.understandable_metric_name, "=", model.understandable_metric_value)
    pprint(model.all_metrics)
    print()

boston
Score: 87/100
Average prediction error = 2.18994700093
{'Explained variance score': 0.87649582271482873,
 'Mean absolute error': 2.1899470009311672,
 'Mean squared error': 10.481110565137438,
 'Median absolute error': 1.5011250912032263,
 'R^2 score': 0.87649574580146905,
 'Root mean squared error': 3.2374543340620945}

diabetes
Score: 47/100
Average prediction error = 44.6849187927
{'Explained variance score': 0.4738710017729707,
 'Mean absolute error': 44.684918792690965,
 'Mean squared error': 3062.3986831492375,
 'Median absolute error': 39.832219870347757,
 'R^2 score': 0.47387075588059335,
 'Root mean squared error': 55.338943639621796}

cancer
Score: 98/100
Accuracy = 0.977777777778
{'AUC': 0.99214840253012226,
 'Accuracy': 0.97777777777777775,
 'Average Hamming loss': 0.022222222222222223,
 'Average precision score': 0.99223228995832868,
 'Brier score loss': 0.017105412294150812,
 "Cohen's kappa": 0.95235364185820792,
 'F1 score': 0.98235294117647076,
 'Hinge loss': 0.39

### Benchmarks