In [14]:
import numpy as np
import tensorflow as tf
from sklearn import datasets
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [15]:
iris_dataset = datasets.load_iris()

In [16]:
class TF_PCA:
    
    def __init__(self, data, target=None, dtype=tf.float32):
        self.data = data
        self.target = target
        self.dtype = dtype
        
        self.graph = None
        self.X = None
        self.u = None
        self.singular_values = None
        self.sigma = None
    
    def fit(self):
        self.graph = tf.Graph()
        with self.graph.as_default():
            self.X = tf.placeholder(self.dtype, shape=self.data.shape)

            # Perform SVD
            # tf.svd返回奇异值（singular_values），U和V。其中singular_values是一个向量，
            # 需要用tf.diag恢复成对角矩阵
            singular_values, u, _ = tf.svd(self.X)

            # Create sigma matrix
            # tf.diag根据对角向量获取一个对角矩阵
            sigma = tf.diag(singular_values)

        with tf.Session(graph=self.graph) as sess:
            self.u, self.singular_values, self.sigma = sess.run([u, singular_values, sigma], feed_dict={self.X: self.data})
    
    def reduce(self, n_dimensions=None, keep_info=None):
        if(keep_info):
            # Normalize singular values
            normalized_singular_values = self.singular_values / sum(self.singular_values)

            # Create the aggregated ladder of kept information per dimension
            # np.cumsum(a, axis=None)，元素累加，如果没有指定轴，则矩阵转为向量，逐
            # 元素累加；axis=0，按行累加；axis=1，按列累加
            ladder = np.cumsum(normalized_singular_values)

            # Get the first index which is above the given information threshold
            index = next(idx for idx, value in enumerate(ladder) if value >= keep_info) + 1
            n_dimensions = index

        with self.graph.as_default():
            # Cut out the relevant part from sigma
            sigma = tf.slice(self.sigma, [0, 0], [self.data.shape[1], n_dimensions])

            # PCA
            pca = tf.matmul(self.u, sigma)

        with tf.Session(graph=self.graph) as sess:
            return sess.run(pca, feed_dict={self.X: self.data})

In [17]:
tf_pca = TF_PCA(iris_dataset.data, iris_dataset.target)
tf_pca.fit()
pca = tf_pca.reduce(keep_info=0.9)

color_mapping = {0: sns.xkcd_rgb['bright purple'], 1: sns.xkcd_rgb['lime'], 2: sns.xkcd_rgb['ochre']}
colors = list(map(lambda x: color_mapping[x], tf_pca.target))

plt.scatter(pca[:, 0], pca[:, 1], c=colors)

<matplotlib.collections.PathCollection at 0x1ee6b08fb00>