In [2]:
import numpy as np
from sklearn import datasets

iris = datasets.load_iris()
iris_X = iris.data
iris_y = iris.target

np.unique(iris_y)

array([0, 1, 2])

In [18]:
# Split iris data in train and test data
# A random permutation, to split the data randomly
np.random.seed(0)

indices = np.random.permutation(len(iris_X))
iris_X_train = iris_X[indices[:-10]]
iris_y_train = iris_y[indices[:-10]]
iris_X_test  = iris_X[indices[-10:]]
iris_y_test  = iris_y[indices[-10:]]

# Create and fit a nearest-neighbor classifier
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(iris_X_train, iris_y_train)

# Predict based on model fit
p = knn.predict(iris_X_test)

print('Prediction: ', p)
print('Actual:     ', iris_y_test)

Prediction:  [1 2 1 0 0 0 2 1 2 0]
Actual:      [1 1 1 0 0 0 2 1 2 0]


In [26]:
# Linear model

# Linear regression
diabetes = datasets.load_diabetes()
diabetes_X_train = diabetes.data[:-20]
diabetes_X_test  = diabetes.data[-20:]
diabetes_y_train = diabetes.target[:-20]
diabetes_y_test  = diabetes.target[-20:]

from sklearn import linear_model
regr = linear_model.LinearRegression()

regr.fit(diabetes_X_train, diabetes_y_train)
print(regr.coef_)

# m=Mean square error
m = np.mean((regr.predict(diabetes_X_test)-diabetes_y_test)**2)
print('Mean square error: ', m)

# Variance score: 1 is perfect prediction and 0 means 
# that there is no linear relationship between X and y.
vs = regr.score(diabetes_X_test, diabetes_y_test) 
print('Variance score: ', vs)

[  3.03499549e-01  -2.37639315e+02   5.10530605e+02   3.27736980e+02
  -8.14131709e+02   4.92814588e+02   1.02848452e+02   1.84606489e+02
   7.43519617e+02   7.60951722e+01]
Mean square error:  2004.56760269
Variance score:  0.585075302269


In [39]:
# Shrinkage

# If there are few data points per dimension, 
# noise in the observations induces high variance:
X = np.c_[ .5, 1].T
y = [.5, 1]
test = np.c_[ 0, 2].T

regr = linear_model.LinearRegression()

import matplotlib.pyplot as plt 
plt.figure() 

np.random.seed(0)

for _ in range(6): 
    this_X = .1*np.random.normal(size=(2, 1)) + X
    
    regr.fit(this_X, y)
    plt.plot(test, regr.predict(test))
    plt.scatter(this_X, y, s=3)

In [32]:
# A solution in high-dimensional statistical learning is to shrink
# the regression coefficients to zero: any two randomly chosen set of 
# observations are likely to be uncorrelated. This is called Ridge regression:
regr = linear_model.Ridge(alpha=.1)

plt.figure() 

np.random.seed(0)

for _ in range(6): 
    this_X = .1*np.random.normal(size=(2, 1)) + X
    regr.fit(this_X, y)
    plt.plot(test, regr.predict(test)) 
    plt.scatter(this_X, y, s=3) 

In [34]:
# This is an example of bias/variance tradeoff: 
# the larger the ridge alpha parameter, the higher the bias and the lower the variance.
# We can choose alpha to minimize left out error, 
# this time using the diabetes dataset rather than our synthetic data:

alphas = np.logspace(-4, -1, 6)
from __future__ import print_function
print([regr.set_params(alpha=alpha
                    ).fit(diabetes_X_train, diabetes_y_train,
                    ).score(diabetes_X_test, diabetes_y_test) for alpha in alphas])

[0.58511106838835303, 0.58520730154446765, 0.58546775406984897, 0.58555120365039159, 0.58307170855541623, 0.57058999437280089]


In [40]:
# Sparsity
regr = linear_model.Lasso()
scores = [regr.set_params(alpha=alpha
             ).fit(diabetes_X_train, diabetes_y_train
             ).score(diabetes_X_test, diabetes_y_test)
        for alpha in alphas]

best_alpha = alphas[scores.index(max(scores))]
regr.alpha = best_alpha

regr.fit(diabetes_X_train, diabetes_y_train)

print(regr.coef_)

[   0.         -212.43764548  517.19478111  313.77959962 -160.8303982    -0.
 -187.19554705   69.38229038  508.66011217   71.84239008]


In [41]:
# Classification

logistic = linear_model.LogisticRegression(C=1e5)
logistic.fit(iris_X_train, iris_y_train)

# The C parameter controls the amount of regularization 
# in the LogisticRegression object: 
# a large value for C results in less regularization. 
# penalty="l2" gives Shrinkage (i.e. non-sparse coefficients), while penalty="l1" gives Sparsity.

LogisticRegression(C=100000.0, class_weight=None, dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='ovr', n_jobs=1, penalty='l2', random_state=None,
          solver='liblinear', tol=0.0001, verbose=0, warm_start=False)

In [None]:
# SVMs
from sklearn import svm

svc = svm.SVC(kernel='linear')
svc.fit(iris_X_train, iris_y_train)  