In [1]:
######################## 机器学习对数几率回归类 ##############################
# Author: 杨玉奇
# email: yangyuqi@sina.com
# url: https://github.com/jerryyyq/tf_algorithm_example
# copyright yangyuqi
# 著作权归作者 杨玉奇 所有。商业转载请联系作者获得授权，非商业转载请注明出处。
# date: 2017-09-19
###################################################################

from ML_Model import ML_Model
import tensorflow as tf
    
class ML_Softmax_Regression( ML_Model ):
    def __init__(self, feature_number, label_number):
        ML_Model.__init__(self)

        self.__W = tf.Variable(tf.zeros([feature_number, label_number]), name = 'weights')
        self.__b = tf.Variable(tf.zeros([label_number]), name = 'bias')
    
        
    def combine_inputs(self, features):
        return tf.matmul(features, self.__W) + self.__b
    
    def inference(self, features):
        return tf.nn.softmax( self.combine_inputs(features) )
    
    def loss(self, features, label):
        label_predicted = self.combine_inputs(features)
        return tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(label_predicted, label) )
    
    
    def inputs(self, file_name = [], batch_size = 10):
        sepal_length, sepal_width, setal_length, setal_width, label = \
        self._read_csv(batch_size, file_name, [[0.0], [0.0], [0.0], [0.0], ['']])

        # return passenger_id, survived
        label_number = tf.to_int32(tf.argmax(tf.to_int32(tf.pack([
            tf.equal(label, ['Iris-setosa']),
            tf.equal(label, ['Iris-versicolor']),
            tf.equal(label, ['Iris-virginica'])
        ])), 0))
        
        features = tf.transpose(tf.pack([sepal_length, sepal_width, setal_length, setal_width]))

        return features, label_number 
        
        
    def train(self, loss):    
        learning_rate = 0.01
        return tf.train.GradientDescentOptimizer( learning_rate ).minimize( loss )

    
    def evaluate(self, test_features, test_label):
        self._echo_tensor(self.__W, 'At evaluate, the __W')
        self._echo_tensor(self.__b, 'At evaluate, the __b')
        self._echo_tensor(test_features, 'At evaluate, test_features')
        self._echo_tensor(test_label, 'At evaluate, test_label')
        
        label_predicted = tf.cast( tf.arg_max(self.inference(test_features), 1), tf.int32 )
        
        return tf.reduce_mean(tf.cast(tf.equal(label_predicted, test_label), tf.float32))

    
if __name__ == '__main__':
    one_ml = ML_Softmax_Regression(4, 3)
    # data from: https://archive.ics.uci.edu/ml/datasets/Iris
    one_ml.do_train( 10, ['iris.csv'] )  # 10000 次可以得到 80% 的准确率

    one_ml.do_evaluate( ['iris.csv'] )

    

-------------- do_train: start -----------------
file_path =  ['/home/yangyuqi/work/tf_algorithm_example/iris.csv'] 

Tensor("ReaderRead:0", shape=(), dtype=string) Tensor("ReaderRead:1", shape=(), dtype=string)
features_0 tensor.shape = [10  4], tensor = [[ 5.5         3.5         1.29999995  0.2       ]
 [ 5.          3.          1.60000002  0.2       ]
 [ 5.5         2.5         4.          1.29999995]
 [ 5.          3.20000005  1.20000005  0.2       ]
 [ 6.4000001   3.20000005  4.5         1.5       ]
 [ 4.80000019  3.4000001   1.60000002  0.2       ]
 [ 4.80000019  3.          1.39999998  0.30000001]
 [ 6.5         3.          5.19999981  2.        ]
 [ 5.69999981  2.9000001   4.19999981  1.29999995]
 [ 5.69999981  3.          4.19999981  1.20000005]]

label_0 tensor.shape = [10], tensor = [1 0 0 2 2 0 2 0 0 1]

step_0 loss:  tensor.shape = [], tensor = 1.1516644954681396

training end. step_9 final loss:  tensor.shape = [], tensor = 1.0444961786270142

save_path is:  /tmp/ML_Soft

In [20]:
# 中间计算数据样例展示
import tensorflow as tf

'''
5.0,3.3,1.4,0.2,Iris-setosa
7.0,3.2,4.7,1.4,Iris-versicolor
5.7,2.8,4.1,1.3,Iris-versicolor
6.3,3.3,6.0,2.5,Iris-virginica
'''
sepal_length = [5.0, 7.0, 5.7, 6.3]
sepal_width = [3.3, 3.2, 2.8, 3.3]
setal_length = [1.4, 4.7, 4.1, 6.0]
setal_width = [0.2, 1.4, 1.3, 2.5]

features_pack = tf.pack([sepal_length, sepal_width, setal_length, setal_width])
features = tf.transpose( features_pack )


label = ['Iris-setosa', 'Iris-versicolor', 'Iris-versicolor', 'Iris-virginica']

label_pack = tf.pack([
            tf.equal(label, ['Iris-setosa']),
            tf.equal(label, ['Iris-versicolor']),
            tf.equal(label, ['Iris-virginica'])
        ])

label_pack_int = tf.to_int32( label_pack )
label_pack_max = tf.argmax( label_pack_int, 0 )
test_label = tf.to_int32( label_pack_max )

''' 学习 10 次获得的值
W = [[-0.02240583, -0.04530681, 0.06771266],
 [ 0.00781024, -0.02450576, 0.01669552],
 [-0.05717348, -0.03503608, 0.09220956],
 [-0.02351422, -0.01420259, 0.03771682]]

b = [-0.00052203, -0.00652796, 0.00704999]
'''

# 学习 10000 次获得的值
W = [[0.87548745, 0.80772996, -1.68321049],
 [2.02138662, -0.17534477, -1.84603953],
 [-2.75662613, -0.15486683, 2.91150331],
 [-1.28630328, -1.03182793, 2.31813335]]

b = [0.41992012, 0.57542503, -0.99534476]

with tf.Session() as sess:
    print( '---------- input value -----------\n' )
    print( 'features_pack: ', sess.run(features_pack) )
    print( 'features: ', sess.run( features ), '\n' )
    
    print( 'label: ', sess.run( label_pack ), '\n\n', sess.run(label_pack_int) )
    print( 'label_max: ', sess.run( label_pack_max ) )
    print( 'label_number: ', sess.run( label_number ) )
    
    print( '\n---------- traning -----------\n' )
    label_combine = tf.matmul( features, W ) + b
    label_softmax = tf.nn.sparse_softmax_cross_entropy_with_logits( label_combine, test_label )
    label_loss = tf.reduce_mean( label_softmax )
    
    print( 'W: ', W )
    print( 'b: ', b )    
    print( 'label_combine: ', sess.run(label_combine) )
    print( 'label_softmax: ', sess.run(label_softmax) )
    print( 'label_loss: ', sess.run(label_loss) )
    
    print( '\n---------- evaluate -----------\n' )
    label_inference = tf.nn.softmax( label_combine )
    label_arg_max = tf.arg_max( label_inference, 1 )
    label_predicted = tf.cast( label_arg_max, tf.int32 )
   
    print( 'label_inference: ', sess.run(label_inference) )
    print( 'label_arg_max: ', sess.run(label_arg_max) )
    print( 'label_predicted: ', sess.run(label_predicted) )
    
    evaluate = tf.cast( tf.equal( label_predicted, test_label ), tf.float32 )
    accuracy_rate = tf.reduce_mean( evaluate )

    print( 'evaluate: ', sess.run(evaluate) )
    print( 'accuracy_rate: ', sess.run(accuracy_rate) )


---------- input value -----------

features_pack:  [[ 5.          7.          5.69999981  6.30000019]
 [ 3.29999995  3.20000005  2.79999995  3.29999995]
 [ 1.39999998  4.69999981  4.0999999   6.        ]
 [ 0.2         1.39999998  1.29999995  2.5       ]]
features:  [[ 5.          3.29999995  1.39999998  0.2       ]
 [ 7.          3.20000005  4.69999981  1.39999998]
 [ 5.69999981  2.79999995  4.0999999   1.29999995]
 [ 6.30000019  3.29999995  6.          2.5       ]] 

label:  [[ True False False False]
 [False  True  True False]
 [False False False  True]] 

 [[1 0 0 0]
 [0 1 1 0]
 [0 0 0 1]]
label_max:  [0 1 1 2]
label_number:  [0 1 1 2]

---------- traning -----------

W:  [[0.87548745, 0.80772996, -1.68321049], [2.02138662, -0.17534477, -1.84603953], [-2.75662613, -0.15486683, 2.91150331], [-1.28630328, -1.03182793, 2.31813335]]
b:  [0.41992012, 0.57542503, -0.99534476]
label_combine:  [[  7.35139561   3.61225796 -10.9635973 ]
 [ -1.74019706   3.49599838  -1.75569403]
 [ -1.904280

---------- traning -----------

W:  [[-0.02240583, -0.04530681, 0.06771266], [0.00781024, -0.02450576, 0.01669552], [-0.05717348, -0.03503608, 0.09220956], [-0.02351422, -0.01420259, 0.03771682]]
b:  [-0.00052203, -0.00652796, 0.00704999]
label_combine:  [[-0.17152311 -0.36582205  0.53734523]
 [-0.43400532 -0.58664727  1.02065277]
 [-0.37134629 -0.49550417  0.86685061]
 [-0.51773143 -0.61855286  1.13628435]]
label_softmax:  [ 1.34939766  1.96770382  1.79800224  0.31057963]
label_loss:  1.35642

---------- evaluate -----------

label_inference:  [[ 0.25939646  0.2135901   0.52701342]
 [ 0.16282785  0.13977745  0.69739473]
 [ 0.1875248   0.16562946  0.64684576]
 [ 0.1402126   0.12676543  0.73302197]]
label_arg_max:  [2 2 2 2]
label_predicted:  [2 2 2 2]
evaluate:  [ 0.  0.  0.  1.]
accuracy_rate:  0.25

In [14]:
import numpy
from tensorflow.python.platform import gfile

def _read32(bytestream):
    dt = numpy.dtype(numpy.uint32).newbyteorder('>')
    return numpy.frombuffer(bytestream.read(4), dtype=dt)[0]

def _dense_to_one_hot(labels_dense, num_classes):
    """Convert class labels from scalars to one-hot vectors."""
    num_labels = labels_dense.shape[0]
    index_offset = numpy.arange(num_labels) * num_classes
    labels_one_hot = numpy.zeros((num_labels, num_classes))
    labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
    return labels_one_hot

def input_mnist_data(file_path):
    with gfile.Open(file_path, 'rb') as bytestream:
        magic = _read32(bytestream)
        if magic != 2051:
            raise ValueError('Invalid magic number %d in MNIST image file: %s' % (magic, file_path))

        num_images = _read32(bytestream)
        rows = _read32(bytestream)
        cols = _read32(bytestream)
        buf = bytestream.read(rows * cols * num_images)
        data = numpy.frombuffer(buf, dtype=numpy.uint8)
        data = data.reshape(num_images, rows, cols, 1)
        return data
    
def input_mnist_label(file_path):    
    with gfile.Open(file_path, 'rb') as bytestream:
        magic = _read32(bytestream)
        if magic != 2049:
            raise ValueError( 'Invalid magic number %d in MNIST label file: %s' % (magic, file_path) )
        
        num_items = _read32(bytestream)
        buf = bytestream.read(num_items)
        labels = numpy.frombuffer(buf, dtype=numpy.uint8)
        return _dense_to_one_hot(labels, 10)
   

class ML_Softmax_Regression_Mnist( ML_Softmax_Regression ):
    def __init__(self, feature_number, label_number):
        ML_Softmax_Regression.__init__(self, feature_number, label_number)
        self.__train_data = input_mnist_data('./MNIST_data/train-images.idx3-ubyte')
        self.__train_label = input_mnist_label('./MNIST_data/train-labels.idx1-ubyte')
        
        self.__generator = self.__get_next_data( 2 )
        
    def __get_next_data(self, batch_size):
        step = 0
        for step in range( len(self.__train_data) // batch_size ):
            begin = step * batch_size
            end = begin + batch_size
            if end >= len(self.__train_data):
                end = len(self.__train_data) - 1
    
            yield self.__train_data[begin : end], self.__train_label[begin : end]


    def inputs(self, file_name = [], batch_size = 10):
        return next( self.__generator )
 

    
one_ml = ML_Softmax_Regression_Mnist(784, 10)

for i in range(10):
    features, labels = one_ml.inputs()
    
    one_ml._echo_tensor( features, '{}_features: '.format(i) )
    one_ml._echo_tensor( labels, '{}_labels: '.format(i) )

0_features:  not_tensor = [[[[0]
   [0]
   [0]
   ..., 
   [0]
   [0]
   [0]]

  [[0]
   [0]
   [0]
   ..., 
   [0]
   [0]
   [0]]

  [[0]
   [0]
   [0]
   ..., 
   [0]
   [0]
   [0]]

  ..., 
  [[0]
   [0]
   [0]
   ..., 
   [0]
   [0]
   [0]]

  [[0]
   [0]
   [0]
   ..., 
   [0]
   [0]
   [0]]

  [[0]
   [0]
   [0]
   ..., 
   [0]
   [0]
   [0]]]


 [[[0]
   [0]
   [0]
   ..., 
   [0]
   [0]
   [0]]

  [[0]
   [0]
   [0]
   ..., 
   [0]
   [0]
   [0]]

  [[0]
   [0]
   [0]
   ..., 
   [0]
   [0]
   [0]]

  ..., 
  [[0]
   [0]
   [0]
   ..., 
   [0]
   [0]
   [0]]

  [[0]
   [0]
   [0]
   ..., 
   [0]
   [0]
   [0]]

  [[0]
   [0]
   [0]
   ..., 
   [0]
   [0]
   [0]]]]

0_labels:  not_tensor = [[ 0.  0.  0.  0.  0.  1.  0.  0.  0.  0.]
 [ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]]

1_features:  not_tensor = [[[[0]
   [0]
   [0]
   ..., 
   [0]
   [0]
   [0]]

  [[0]
   [0]
   [0]
   ..., 
   [0]
   [0]
   [0]]

  [[0]
   [0]
   [0]
   ..., 
   [0]
   [0]
   [0]]

  ..., 
  [[0]
   [0]