In [1]:
from sklearn.datasets import olivetti_faces as facedata
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.linear_model.logistic import LogisticRegression
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
import matplotlib.gridspec as gridspec
from matplotlib import image
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
from matplotlib.pyplot import imshow
from sklearn.decomposition import pca
from sklearn.utils import resample
from sklearn.datasets import fetch_california_housing
from time import time
import numpy as np
fd = facedata.fetch_olivetti_faces()
%matplotlib inline

In [None]:
def show_face_row(face_n, show=np.ones(10)):
    fig = plt.figure()
    gs = gridspec.GridSpec(1, 10, wspace=0.0)
    ax = [plt.subplot(gs[i]) for i in range(10)]
    gs.update(hspace=0)
    print "face #%s" % face_n
    for i in range(face_n*10,face_n*10+10):
        if show[i-face_n*10]==1:
            ax[i-face_n*10].imshow(fd.images[i], cmap='gist_gray')
            ax[i-face_n*10].axis('off')
        else:
            ax[i-face_n*10].imshow(np.ones((64,64)), cmap='gist_gray')
            ax[i-face_n*10].axis('off')


In [None]:
from sklearn.datasets import load_boston
housing = load_boston()
#help(housing)
#print housing.data

In [None]:
#get individuals
individuals = fd.target
itraining = np.arange(0,fd.target.shape[0]) % 10 == 0

#get_glasses
has_glasses = np.zeros(len(fd.images))
def fill_ones(arr, face_n, ind=range(10)):
    for i in ind:
        arr[face_n*10+i] = 1
    return arr
has_glasses = fill_ones(has_glasses, 1)
has_glasses = fill_ones(has_glasses, 3, [0,1,2,7,8])
has_glasses = fill_ones(has_glasses, 5)
has_glasses = fill_ones(has_glasses, 6, [3,4,9])
has_glasses = fill_ones(has_glasses, 12, [0,1,4,5,6,7,8,9])
has_glasses = fill_ones(has_glasses, 13)
has_glasses = fill_ones(has_glasses, 16,[0,1,4,5,6,7,8,9])
has_glasses = fill_ones(has_glasses, 18, [0,1,2,5,9])
has_glasses = fill_ones(has_glasses, 19, [0,1,2,4,6,7,8,9])
has_glasses = fill_ones(has_glasses, 26)
has_glasses = fill_ones(has_glasses, 27)
has_glasses = fill_ones(has_glasses, 30)
has_glasses = fill_ones(has_glasses, 33)
has_glasses = fill_ones(has_glasses, 35, [8,9])
has_glasses = fill_ones(has_glasses, 36)

gtraining = ~np.in1d(fd.target, np.array([16,18]))

has_beard = np.zeros(len(fd.target), dtype=np.int8)
has_beard[np.in1d(fd.target, np.array([6,10,13,15,16,24,25,27,36,]))]=1


In [None]:
# flatten data and setup targets
flattened_data = []
target_glasses = []
for i in range(len(fd.images)):
    flattened_data.append(fd.images[i].flatten())
    target_glasses.append(has_glasses[i])
flattened_data = np.array(flattened_data)
target_glasses = np.array(target_glasses)

In [None]:
def print_neurons(nn, layer = 0):
    fig = plt.figure(figsize=(12,4))
    if np.sqrt(nn.coefs_[layer].shape[0]) % 1 == 0:
        shape = (int(np.sqrt(nn.coefs_[layer].shape[0])), 
                 int(np.sqrt(nn.coefs_[layer].shape[0])))
    else:
        shape = (nn.coefs_[layer].shape[0],1)
    gs = gridspec.GridSpec(1, nn.coefs_[layer].shape[1], wspace=0.0)
    ax = [plt.subplot(gs[i]) for i in range(nn.coefs_[layer].shape[1])]
    for i in range(nncls.coefs_[layer].shape[1]):
        neuron = nncls.coefs_[layer][:,i].reshape(shape)
        gs.update(hspace=0)
        ax[i].imshow(neuron, cmap='gist_gray', interpolation="nearest")
        ax[i].set_yticks([])
        ax[i].set_xticks([])

In [None]:
def return_roc_curve(data, target, model, selection):
    return roc_curve(target[selection], model.predict_proba(data[selection])[:,0], drop_intermediate=False)

def plot_roc(data, target, model1, model2, selection):
    tpr1, fpr1, thh1 = return_roc_curve(data, target, model1, selection)
    tpr2, fpr2, thh2 = return_roc_curve(data, target, model2, selection)
    p = plt.plot(fpr1, tpr1, 'blue', fpr2, tpr2, 'red', np.array([0,1]),np.array([0,1]), "black", zorder=10)
    plt.ylabel("True Positive Rate")
    plt.xlabel("False Positive Rate")
    print "AUC model 1:", auc(fpr1, tpr1)
    print "AUC model 2:", auc(fpr2, tpr2)

In [None]:
def learning_curve(model, data, target, test_set, step=20, rs=1000):
    in_samp_accs = []
    out_samp_accs = []
    benchmarks = []
    np.random.seed(rs)
    for sample_size in range(19, np.sum(~test_set), step):
        selection = np.random.choice(np.arange(0,fd.target.shape[0])[np.arange(0,fd.target.shape[0])], sample_size)
        data[selection,:]
        benchmark = time()
        m = model.fit(data[selection,:], has_glasses[selection])
        benchmark = (time() - benchmark) # time in seconds
        in_samp_acc =  m.score(data[selection,:], has_glasses[selection])
        out_samp_acc = m.score(data[test_set,:], has_glasses[test_set])
        print "training round: %s sample size, in sample: %s, out sample: %s, benchmark: %s" % (
            has_glasses[selection].shape,
            in_samp_acc,
            out_samp_acc,
            benchmark
        )
        in_samp_accs.append(in_samp_acc)
        out_samp_accs.append(out_samp_acc)
        benchmarks.append(benchmark)
    np.random.seed(None)
    return (
        range(19, np.sum(~test_set), step),
        in_samp_accs,
        out_samp_accs,
        benchmarks
    )