In [4]:
print(__doc__)

import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.datasets import fetch_olivetti_faces
from sklearn.utils.validation import check_random_state

from sklearn.ensemble import ExtraTreesRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RidgeCV
import cv2
'''

def fetch_olivetti_faces(data_home=None, shuffle=False, random_state=0,
                         download_if_missing=True):
    """Loader for the Olivetti faces data-set from AT&T.
    Read more in the :ref:`User Guide <olivetti_faces>`.
    Parameters
    ----------
    data_home : optional, default: None
        Specify another download and cache folder for the datasets. By default
        all scikit-learn data is stored in '~/scikit_learn_data' subfolders.
    shuffle : boolean, optional
        If True the order of the dataset is shuffled to avoid having
        images of the same person grouped.
    random_state : int, RandomState instance or None, optional (default=0)
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`.
    download_if_missing : optional, True by default
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.
    Returns
    -------
    An object with the following attributes:
    data : numpy array of shape (400, 4096)
        Each row corresponds to a ravelled face image of original size
        64 x 64 pixels.
    images : numpy array of shape (400, 64, 64)
        Each row is a face image corresponding to one of the 40 subjects
        of the dataset.
    target : numpy array of shape (400, )
        Labels associated to each face image. Those labels are ranging from
        0-39 and correspond to the Subject IDs.
    DESCR : string
        Description of the modified Olivetti Faces Dataset.
    Notes
    ------
    This dataset consists of 10 pictures each of 40 individuals. The original
    database was available from (now defunct)
        http://www.cl.cam.ac.uk/research/dtg/attarchive/facedatabase.html
    The version retrieved here comes in MATLAB format from the personal
    web page of Sam Roweis:
        http://www.cs.nyu.edu/~roweis/
    """
    data_home = get_data_home(data_home=data_home)
    if not exists(data_home):
        makedirs(data_home)
    filepath = _pkl_filepath(data_home, 'olivetti.pkz')
    if not exists(filepath):
        if not download_if_missing:
            raise IOError("Data not found and `download_if_missing` is False")

        print('downloading Olivetti faces from %s to %s'
              % (FACES.url, data_home))
        mat_path = _fetch_remote(FACES, dirname=data_home)
        mfile = loadmat(file_name=mat_path)
        # delete raw .mat data
        remove(mat_path)

        faces = mfile['faces'].T.copy()
        joblib.dump(faces, filepath, compress=6)
        del mfile
    else:
        faces = joblib.load(filepath)

    # We want floating point data, but float32 is enough (there is only
    # one byte of precision in the original uint8s anyway)
    faces = np.float32(faces)
    faces = faces - faces.min()
    faces /= faces.max()
    faces = faces.reshape((400, 64, 64)).transpose(0, 2, 1)
    # 10 images per class, 400 images total, each class is contiguous.
    target = np.array([i // 10 for i in range(400)])
    if shuffle:
        random_state = check_random_state(random_state)
        order = random_state.permutation(len(faces))
        faces = faces[order]
        target = target[order]
    return Bunch(data=faces.reshape(len(faces), -1),
                 images=faces,
                 target=target,
DESCR=MODULE_DOCS)
'''
def readJpgFile(dirname):
    jpg_list = []
    file_name_list=os.listdir(dirname)
    for filename in file_name_list:
        type(filename)
        if(filename.find('.jpg')!=-1):
            jpg_list.append(dirname+"//"+filename)
            
    return jpg_list
# Load the faces datasets


data = fetch_olivetti_faces()
targets = data.target
data = data.images.reshape((len(data.images),-1))

jpg_list = readJpgFile("ys_test_img")

test1 = np.zeros((len(jpg_list),4096))
print("Pppp",test1.shape)
for jpg in jpg_list:
    test_ys = cv2.imread(jpg)
    
    size = (64, 64)
    shrink = cv2.resize(test_ys, size, interpolation=cv2.INTER_AREA)
    im_gray = cv2.cvtColor(shrink, cv2.COLOR_BGR2GRAY)   #转换了灰度化
    print(im_gray.shape)
    cv2.imshow("gray_",im_gray)
    
    cv2.waitKey(10)
    imgray = im_gray.reshape((1, -1))
    test1[jpg,:] = imgray
    print("im_gray.shape",imgray.shape)

train = data[targets < 30]

test = data[targets >= 30]  # Test on independent people
print(test.shape)
print("original test.shape is ",test.shape)
# Test on a subset of people
n_faces = 5
rng = check_random_state(None)
size_=(n_faces, )
face_ids = rng.randint(test.shape[0], size=size_)
print(face_ids)
print(face_ids.shape)

test = test[face_ids, :]
print("test.shape",test.shape)
n_pixels = data.shape[1]
# Upper half of the faces
X_train = train[:, :(n_pixels + 1) // 2]
# Lower half of the faces
y_train = train[:, n_pixels // 2:]

X_test = test[:, :(n_pixels + 1) // 2]
y_test = test[:, n_pixels // 2:]

# Fit estimators
ESTIMATORS = {
    "Extra trees": ExtraTreesRegressor(n_estimators=10, max_features=32,
                                       random_state=0),
    "K-nn": KNeighborsRegressor(),
    
    "Linear regression": LinearRegression(),
    "Ridge": RidgeCV(),
}

y_test_predict = dict()
for name, estimator in ESTIMATORS.items():
    estimator.fit(X_train, y_train)
    y_test_predict[name] = estimator.predict(X_test)

# Plot the completed faces
image_shape = (64, 64)

n_cols = 1 + len(ESTIMATORS)
plt.figure(figsize=(2. * n_cols, 2.26 * n_faces))
plt.suptitle("Face completion with multi-output estimators", size=16)

for i in range(n_faces):
    true_face = np.hstack((X_test[i], y_test[i]))

    if i:
        sub = plt.subplot(n_faces, n_cols, i * n_cols + 1)
    else:
        sub = plt.subplot(n_faces, n_cols, i * n_cols + 1,
                          title="true faces")

    sub.axis("off")
    sub.imshow(true_face.reshape(image_shape),
               cmap=plt.cm.gray,
               interpolation="nearest")

    for j, est in enumerate(sorted(ESTIMATORS)):
        completed_face = np.hstack((X_test[i], y_test_predict[est][i]))

        if i:
            sub = plt.subplot(n_faces, n_cols, i * n_cols + 2 + j)

        else:
            sub = plt.subplot(n_faces, n_cols, i * n_cols + 2 + j,
                              title=est)

        sub.axis("off")
        sub.imshow(completed_face.reshape(image_shape),
                   cmap=plt.cm.gray,
                   interpolation="nearest")

plt.show()



def fetch_olivetti_faces(data_home=None, shuffle=False, random_state=0,
                         download_if_missing=True):
    """Loader for the Olivetti faces data-set from AT&T.
    Read more in the :ref:`User Guide <olivetti_faces>`.
    Parameters
    ----------
    data_home : optional, default: None
        Specify another download and cache folder for the datasets. By default
        all scikit-learn data is stored in '~/scikit_learn_data' subfolders.
    shuffle : boolean, optional
        If True the order of the dataset is shuffled to avoid having
        images of the same person grouped.
    random_state : int, RandomState instance or None, optional (default=0)
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`.
    download_if_missing : optional, True by default
     

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [21]:
test = np.zeros((300, 64, 64))
print(test.shape)
a = test.reshape((400,-1))
print(a.shape)

(300, 64, 64)
(400, 3072)


In [41]:
from sklearn.utils.validation import check_random_state
for i in range(20):
    rng = check_random_state(None)
    size_=(5, 5)
    face_ids = rng.randint(20, size=size_)
    
    #face_ids = rng.randint(100, 200)
    print(face_ids,'\n')


[[13 17 19  8 15]
 [10  6  9  6  5]
 [ 6 10 10  7 15]
 [10 13 11 12 14]
 [13 12  1 10  6]] 

[[13  1  5 13  9]
 [ 1  2  6 16  9]
 [10  8  2 13 18]
 [ 6 16 16 12 14]
 [ 0  3 18 14  5]] 

[[15 18  4 15  6]
 [ 5  4 12 16  5]
 [ 1 15  3  0  0]
 [ 1 18 10 16  0]
 [11  4  3 17  6]] 

[[11  9  8  5  6]
 [ 1 12 10  7  1]
 [ 6 19  4 12 17]
 [ 7  1  0 14 13]
 [14 11  0  1 15]] 

[[11  4 19 19 18]
 [15  5  1  8 13]
 [ 4  9 13 14 12]
 [10 11  1 13 13]
 [ 1  6  1 16  8]] 

[[16  8  9  0 15]
 [ 4 13 16 19  5]
 [ 3 15  3  8  9]
 [18  9  5  2  4]
 [11 10  9 12  3]] 

[[14 15 15 12 16]
 [10  5 16 13 18]
 [ 3 18  2 17  3]
 [19 10  4 14 17]
 [13 12  5 17 11]] 

[[ 7 16 17  3  0]
 [ 4  4  1 12 18]
 [18  8 11 16 15]
 [15  2  0  2  4]
 [ 0 18 13 14  3]] 

[[ 2  4 17 15 14]
 [ 6 14 16  5 13]
 [11  4  4 10  6]
 [ 8  4 18  7  5]
 [ 9 17 19 17  9]] 

[[19 14 14 11 14]
 [ 3  7 12 11 18]
 [16  5 18  8  4]
 [17 14  8  0 12]
 [ 6 18 16 19 17]] 

[[ 0 16 18 11 12]
 [18  0  4 12  9]
 [ 8 19  3  6  4]
 [17 15  8 17  4

In [59]:
import os
import os.path
import string



ys_test_img\1.jpg
ys_test_img\2.jpg
ys_test_img\3.jpg
ys_test_img\4.jpg
ys_test_img\beauty2.jpg
