In [1]:
# Run some setup code for this notebook.
import random
import numpy as np
import matplotlib.pyplot as plt


# This is a bit of magic to make matplotlib figures appear inline in the notebook
# rather than in a new window.
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# Some more magic so that the notebook will reload external python modules;
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2



In [2]:
import os
import pickle

def unpickle(filename):
    """ 载入cifar数据集的一个batch """
    with open(filename, 'rb') as f:
        datadict = pickle.load(f,encoding='latin1')
    return datadict


def load_CIFAR_batch(filename):
        datadict = unpickle(filename)
        X = datadict['data']
        Y = datadict['labels']
        X = X.reshape(10000, 3, 32, 32).transpose(0, 2, 3, 1).astype("float")
        Y = np.array(Y)
        return X, Y

def load_CIFAR10(ROOT):
    """ 载入cifar全部数据 """
    xs = []
    ys = []
    for b in range(1, 6):
        f = os.path.join(ROOT, 'data_batch_%d' % (b,))
        X, Y = load_CIFAR_batch(f)
        xs.append(X)
        ys.append(Y)
    Xtr = np.concatenate(xs)
    Ytr = np.concatenate(ys)
    del X, Y
    Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))
    return Xtr, Ytr, Xte, Yte

In [3]:
cifar10_dir = './data'

# Cleaning up variables to prevent loading data multiple times (which may cause memory issue)
try:
   del X_train, y_train
   del X_test, y_test
   print('Clear previously loaded data.')
except:
   pass

X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

# As a sanity check, we print out the size of the training and test data.
print('Training data shape: ', X_train.shape)
print('Training labels shape: ', y_train.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

Training data shape:  (50000, 32, 32, 3)
Training labels shape:  (50000,)
Test data shape:  (10000, 32, 32, 3)
Test labels shape:  (10000,)


In [4]:
meta = unpickle(cifar10_dir+'/batches.meta');
meta
classes = meta['label_names'];
classes

['airplane',
 'automobile',
 'bird',
 'cat',
 'deer',
 'dog',
 'frog',
 'horse',
 'ship',
 'truck']

In [None]:
# Visualize some examples from the dataset.
# We show a few examples of training images from each class.
num_classes = len(classes)
samples_per_class = 7
for y, cls in enumerate(classes):
    idxs = np.flatnonzero(y_train == y)   #返回非0元素的索引
    idxs = np.random.choice(idxs, samples_per_class, replace=False)  #随机取样，不放回的取出samples_per_class个
    for i, idx in enumerate(idxs):
        plt_idx = i * num_classes + y + 1
        plt.subplot(samples_per_class, num_classes, plt_idx)
        plt.imshow(X_train[idx].astype('uint8'))
        plt.axis('off')
        if i == 0:
            plt.title(cls)
plt.show()

In [None]:
# Subsample the data for more efficient code execution in this exercise
num_training = 50000
mask = list(range(num_training))
X_train = X_train[mask]
y_train = y_train[mask]

num_test = 10000
mask = list(range(num_test))
X_test = X_test[mask]
y_test = y_test[mask]

print(X_train.shape, X_test.shape)

In [7]:
'''
# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print(X_train.shape, X_test.shape)
'''

'\n# Reshape the image data into rows\nX_train = np.reshape(X_train, (X_train.shape[0], -1))\nX_test = np.reshape(X_test, (X_test.shape[0], -1))\nprint(X_train.shape, X_test.shape)\n'

In [8]:

def min_n(distance,n):
    result = []
    inf = 1e64
    for i in range (n):
        result.append(np.argmin(distance))
        distance[np.argmin(distance)]=inf
    result.sort()
    return result


In [9]:

from scipy import stats

class KNearestNeighbor(object):
    def _init_(self):
        pass
    def train(self,X,y):
        self.Xtr = X
        self.ytr = y
        
    def predict(self,X,k):    #self.Xtr是训练出来的数组
        num=X.shape[0]
        #print(num)
        for i in range(num):
            distance=np.sqrt(np.sum(np.square(self.Xtr-X[i,:]),axis=1))
            min_nindex = min_n(distance,k)
            for j in range (k):
                min_nindex[j] = self.ytr[min_nindex[j]]
            Y_predict[i] = stats.mode(min_nindex)[0][0]  #求众数
            #min_index=np.argmin(distance)   #返回最小的那个数的下标
            #print(min_index,self.ytr[min_index])
            #print(i)
            #Y_predict[i]=self.ytr[min_index]
        return Y_predict


In [10]:
'''
Y_predict=np.zeros(y_test.shape[0]);
new = KNearestNeighbor()
new.train(X_train,y_train)
Y_predict=new.predict(X_test,15)
print ('accuracy: %f' % (np.mean(Y_predict==y_test)))   #np.mean求平均值
'''

"\nY_predict=np.zeros(y_test.shape[0]);\nnew = KNearestNeighbor()\nnew.train(X_train,y_train)\nY_predict=new.predict(X_test,15)\nprint ('accuracy: %f' % (np.mean(Y_predict==y_test)))   #np.mean求平均值\n"

In [11]:
def rgb2gray(img):
    r, g, b = img[:,:,0], img[:,:,1], img[:,:,2]
    gray = 0.299 * r + 0.587 * g + 0.114 * b
    return gray

In [12]:
from skimage.feature import local_binary_pattern
radius = 1
n_points = radius * 8
X_train_gray = np.zeros((num_training,32,32))
X_test_gray = np.zeros((num_test,32,32))
X_train_lbp = np.zeros((num_training,32,32))
X_test_lbp = np.zeros((num_test,32,32))
for i in range(num_training):
    #print(np.shape(X_train[i]))
    X_train_gray[i] = rgb2gray(X_train[i])
    #print(np.shape(X_train_gray[i]))
    #plt.imshow(X_train_gray[i].astype('uint8'))
    #plt.show()
    X_train_lbp[i]=local_binary_pattern(X_train_gray[i],n_points,radius)
    #print(np.shape(X_train_lbp[i]))
for i in range(num_test):
    X_test_gray[i] = rgb2gray(X_test[i])
    X_test_lbp[i]=local_binary_pattern(X_test_gray[i],n_points,radius)

In [13]:
# Reshape the image data into rows
X_train_lbp = np.reshape(X_train_lbp, (X_train_lbp.shape[0], -1))
X_test_lbp = np.reshape(X_test_lbp, (X_test_lbp.shape[0], -1))
print(X_train_lbp.shape, X_test_lbp.shape)


(40000, 1024) (10000, 1024)


In [None]:
Y_predict=np.zeros(y_test.shape[0]);
new = KNearestNeighbor()
new.train(X_train_lbp,y_train)
Y_predict=new.predict(X_test_lbp,15)
print ('accuracy: %f' % (np.mean(Y_predict==y_test)))   #np.mean求平均值