In [1]:
import matplotlib.pyplot as plt  
import numpy as np
import struct
from andylearn import mnist, KNNneighbor
from mpl_toolkits.mplot3d import Axes3D
#%matplotlib inline



In [7]:
def PCA(data,dimensions):
    '''
    data is the original data. m*n(m=samples, n=dimensions)
    '''

    # making data zero-means
    average = np.mean(data,0)
    data = np.mat(data-average)
    
    #covariance
    covariance = np.dot(data.T, data)
    
    #eigenvalues
    eig_var,eig_vec = np.linalg.eig(covariance)
    
    
    #from the numpy doc, the eig_var may not be ordered.
    sort_eig = np.argsort(-eig_var)
    #return the index that make a sorted array

    #so we got the sorted eig_var
    sort_eig = sort_eig[:dimensions]
    principal_vec = eig_vec[:,sort_eig]
    
   
    low_data = np.dot(data, principal_vec)
    
    return low_data, principal_vec, average

In [8]:
def SVD(data,dimensions):
    '''
    data is the original data set，rows are samples of data,columns are the features
    '''

    # making data zero-means
    average = np.mean(data,0)
    data_zero = data-average
    
    #covariance
    covariance = np.cov(data_zero,rowvar=False)
    
    u,s,v = np.linalg.svd(data)
    #s is sorted in descending order.
    principal_vec = v.T[:,:dimensions]
    low_data = np.mat(data_zero) * np.mat(principal_vec)
    
    return low_data, principal_vec, average


## MNIST data to 2D

In [9]:
def PCA_MNIST_2D():
    data, labels = mnist.load_mnist_data("./data/mnist/train-images", "./data/mnist/train-labels", 2000)

    low_data, mapping, average = PCA(data,5)

    fig = plt.figure()
    plotwindow = fig.add_subplot(111)
    a = np.array(low_data)

    # draw the scatters
    color = ['yellowgreen','yellow','chartreuse','turquoise','orange','coral','salmon','darkgray','skyblue','mediumpurple']
    for j in range(len(low_data)):
        plt.scatter(a[j][0],a[j][1],c=color[labels[j]],s=10)

    plt.show()

In [10]:
def PCA_MNIST_acuracy():
    data, labels = mnist.load_mnist_data("./data/mnist/train-images", "./data/mnist/train-labels", 2000)
    for i in range(2,22):
        low_data, mapping, average = PCA(data,i)
        s = neighbor.accuracy(low_data[0:1000], labels[0:1000], low_data[1000:2000], labels[1000:2000], 1)
        print("features",i)
        print("KNN score",s)

In [11]:
def PCA_MNIST_3D():
    data, labels= mnist.load_mnist_data("./data/mnist/test-images", "./data/mnist/test-labels", 2000)
    low_data, mapping, average= PCA(data,3)
    a = np.array(low_data)
    ax = plt.figure().add_subplot(111, projection = '3d')  
    # draw the scatters

    for j in range(len(low_data)): 
        ax.scatter(a[j][0],a[j][1],a[j][2], c=color[labels[j]],s=5) #点为红色三角形  
    plt.show()
    