# Machine Learning Supervised

## Functions MLS

In [1]:
import math
from sklearn.model_selection import (cross_val_score, train_test_split)
from sklearn.metrics import mean_squared_error

#
def split_set(X, y, test_size=0.33, random_state_value=5):
    
    return train_test_split(X, y, test_size=test_size, 
                            random_state=random_state_value)

#
def ml_getting(model, param, X_train, y_train, X_test, y_test):
    
    ml_model = model(**param)
    
    ml_model.fit(X_train, y_train.values.ravel())
    ml_predict = ml_model.predict(X_test)
    ml_rmse = math.sqrt(
        mean_squared_error(ml_predict, y_test.values.ravel()))
    
    ml_scores = cross_val_score(ml_model, X_test, y_test.values.ravel()).mean() 
    
    return [model.__name__, param, ml_rmse, ml_scores]

#
def ml_run(models, parameters, X, y):
    
    X_train, X_test, y_train, y_test = split_set(X, y)
    
    outputs = []
    for i, model in enumerate(models):
        for param in parameters[i]:
            try:
                outputs.append(
                    ml_getting(model, param, X_train, y_train, X_test, y_test)
                )
            except ValueError:
                print('# ValueError:')
                print(model)
                print(param)
                print()
            
    return outputs

## Plot

In [9]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

def plot_sgdclassifier(X, y, clf_fit):
    
    plt.figure()
    
    h = .02  # step size in the mesh
    
    # create a mesh to plot in
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))

    # Plot the decision boundary. For that, we will assign a color to each
    # point in the mesh [x_min, x_max]x[y_min, y_max].
    Z = clf_fit.predict(np.c_[xx.ravel(), yy.ravel()])
    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    cs = plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)
    plt.axis('tight')

    # Plot also the training points
    label_y = np.unique(y)
    for i, color in zip(clf.classes_, colors):
        idx = np.where(y == i)
        plt.scatter(X[idx, 0], X[idx, 1], c=color, label=label_y[i],
                    cmap=plt.cm.Paired, edgecolor='black', s=20)
    plt.title("Decision surface of multi-class SGD")
    plt.axis('tight')

    # Plot the three one-against-all classifiers
    xmin, xmax = plt.xlim()
    ymin, ymax = plt.ylim()
    coef = clf_fit.coef_
    intercept = clf_fit.intercept_

    def plot_hyperplane(c, color):
        def line(x0):
            return (-(x0 * coef[c, 0]) - intercept[c]) / coef[c, 1]

        plt.plot([xmin, xmax], [line(xmin), line(xmax)],
                 ls="--", color=color)

    for i, color in zip(clf_fit.classes_, colors):
        plot_hyperplane(i, color)
        
    plt.legend()
        
    plt.show();

## Example

In [14]:
import pandas as pd
import numpy as np

from sklearn.datasets import load_iris
from sklearn.linear_model import (SGDClassifier, LogisticRegression)

### Generation parameters for models

In [19]:
max_iteration = [100, 200, 500]
models = [LogisticRegression, SGDClassifier]

### SGDClassifier

In [12]:
# http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html

## Parameter names
sgd_param_name = ['loss', 'penalty']

## Diferents elements by parameter name
sgd_loss = ['hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron', 'squared_loss', 'huber', 'epsilon_insensitive', 'squared_epsilon_insensitive']
sgd_penalty = ['none', 'l2', 'l1', 'elasticnet']

## Generate list with dict type with configurate paramenter
sgd_param = [{'loss' : i, 'penalty' : j} for i in sgd_loss for j in sgd_penalty]
[x.update({'max_iter' : y}) for y in max_iteration for x in sgd_param]

print()




### LogisticRegression

In [18]:
# http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html#sklearn.linear_model.LogisticRegression
## Parameter names
lr_param_name = ['penalty', 'solver']

## Diferents elements by parameter name
lr_penalty = ['l2', 'l1']
lr_solver = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']

## Generate list with dict type with configurate paramenter
lr_param = [{'solver' : i, 'penalty' : j} for i in lr_solver for j in lr_penalty]
[x.update({'max_iter' : y}) for y in max_iteration for x in lr_param]

print()




In [None]:
# Parameters config for LR and SGD
parameters = [lr_param, sgd_param]

In [20]:
# loading data

def change(number):
    try:
        return iris.target_names[number]
    except IndexError:
        return "error"

df_iris = pd.DataFrame(load_iris().data, columns = load_iris().feature_names)
df_iris.columns = ['sepal_length', 'sepal_width','petal_length', 'petalal_width']
iris = load_iris()
iris_target = pd.DataFrame(iris.target, columns=['target'])#.applymap(change)

df = pd.concat([df_iris, iris_target], axis=1)

In [21]:
# Run models
outputs = ml_run(models, parameters, df_iris, iris_target)

# ValueError:
<class 'sklearn.linear_model.logistic.LogisticRegression'>
{'solver': 'newton-cg', 'penalty': 'l1', 'max_iter': 500}

# ValueError:
<class 'sklearn.linear_model.logistic.LogisticRegression'>
{'solver': 'lbfgs', 'penalty': 'l1', 'max_iter': 500}

# ValueError:
<class 'sklearn.linear_model.logistic.LogisticRegression'>
{'solver': 'sag', 'penalty': 'l1', 'max_iter': 500}





In [22]:
# Create a DataFrame with all models and parameters as well as RMSE and R^2
df_ml = pd.DataFrame(
    outputs, columns=['model', 'parameters', 'rmse', 'r2']
)

df_ml.sort_values(by=['r2'], ascending=False).reset_index(drop=True)

Unnamed: 0,model,parameters,rmse,r2
0,SGDClassifier,"{'loss': 'perceptron', 'penalty': 'none', 'max...",0.2,0.924837
1,SGDClassifier,"{'loss': 'log', 'penalty': 'none', 'max_iter':...",0.2,0.924837
2,SGDClassifier,"{'loss': 'modified_huber', 'penalty': 'l1', 'm...",0.2,0.906318
3,SGDClassifier,"{'loss': 'squared_hinge', 'penalty': 'none', '...",0.2,0.906318
4,SGDClassifier,"{'loss': 'perceptron', 'penalty': 'l1', 'max_i...",0.2,0.906318
5,LogisticRegression,"{'solver': 'liblinear', 'penalty': 'l2', 'max_...",0.244949,0.905229
6,SGDClassifier,"{'loss': 'hinge', 'penalty': 'none', 'max_iter...",0.244949,0.9
7,LogisticRegression,"{'solver': 'lbfgs', 'penalty': 'l2', 'max_iter...",0.141421,0.896296
8,LogisticRegression,"{'solver': 'newton-cg', 'penalty': 'l2', 'max_...",0.141421,0.896296
9,LogisticRegression,"{'solver': 'liblinear', 'penalty': 'l1', 'max_...",0.282843,0.885621


In [None]:
plot_sgdclassifier(df_iris, iris_target)