In [1]:
######################## 机器学习对数几率回归类 ##############################
# Author: 杨玉奇
# email: yangyuqi@sina.com
# url: https://github.com/jerryyyq/tf_algorithm_example
# copyright yangyuqi
# 著作权归作者 杨玉奇 所有。商业转载请联系作者获得授权，非商业转载请注明出处。
# date: 2017-09-13
###################################################################

from ML_Model import ML_Model
import tensorflow as tf
import time
    
class ML_Sigma_Regression( ML_Model ):
    def __init__(self):
        ML_Model.__init__(self)

        self.__W = tf.Variable(tf.zeros([5, 1]), name = 'weights')
        self.__b = tf.Variable(0., name = 'bias')
    
        
    def combine_inputs(self, features):
        return tf.matmul(features, self.__W) + self.__b
    
    def inference(self, features):
        return tf.sigmoid( self.combine_inputs(features) )
    
    def loss(self, features, label):
        label_predicted = self.combine_inputs(features)
        return tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(label_predicted, label) )
    
    
    def inputs(self, file_name = [], batch_size = 10):
        passenger_id, survived, pclass, name, sex, age, sibsp, parch, ticket, fare, cabin, embarked = \
        self._read_csv(batch_size, file_name, [[0.0], [0.0], [0], [''], [''], [0.0], [0.0], [0.0], [''], [0.0], [''], ['']])

        # return passenger_id, survived
        
        is_first_class = tf.to_float(tf.equal(pclass, [1]))
        is_second_class = tf.to_float(tf.equal(pclass, [2]))
        is_third_class = tf.to_float(tf.equal(pclass, [3]))

        gender = tf.to_float(tf.equal(sex, ['female']))

        features = tf.transpose(tf.pack([is_first_class, is_second_class, is_third_class, gender, age]))
        label = tf.reshape(survived, [batch_size, 1])
        return features, label 
        
        
    def train(self, loss):    
        learning_rate = 0.01
        return tf.train.GradientDescentOptimizer( learning_rate ).minimize( loss )

    
    def evaluate(self, test_features, test_label):
        self._echo_tensor(self.__W, 'At evaluate, the __W')
        self._echo_tensor(self.__b, 'At evaluate, the __b')
        self._echo_tensor(test_features, 'At evaluate, test_features')
        self._echo_tensor(test_label, 'At evaluate, test_label')
        
        label_predicted = tf.cast( self.inference(test_features) > 0.5, tf.float32 )
        
        return tf.reduce_mean(tf.cast(tf.equal(label_predicted, test_label), tf.float32))

    
if __name__ == '__main__':
    one_ml = ML_Sigma_Regression()
    # data from: https://www.kaggle.com/c/titanic/data
    one_ml.do_train( 10, ['Titanic_train.csv'] )

    one_ml.do_evaluate( ['Titanic_train.csv'] )

    

-------------- do_train: start -----------------
file_path =  ['/home/yangyuqi/work/tf_algorithm_example/Titanic_train.csv'] 

Tensor("ReaderRead:0", shape=(), dtype=string) Tensor("ReaderRead:1", shape=(), dtype=string)
step_0 loss:  tensor.shape = Tensor("Shape:0", shape=(0,), dtype=int32), tensor = 0.7069069147109985

step_9 final loss:  tensor.shape = Tensor("Shape_1:0", shape=(0,), dtype=int32), tensor = 0.6950401663780212

save_path is:  /tmp/ML_Sigma_Regression.vari
At evaluate, the __W tensor.shape = Tensor("Shape_2:0", shape=(2,), dtype=int32), tensor = [[ 0.00149437]
 [ 0.00232796]
 [-0.00893528]
 [ 0.01217323]
 [-0.09827359]]

At evaluate, the __b tensor.shape = Tensor("Shape_3:0", shape=(0,), dtype=int32), tensor = -0.005112948827445507

At evaluate, test_features tensor.shape = Tensor("Shape_4:0", shape=(2,), dtype=int32), tensor = [[  0.    1.    0.    0.   23. ]
 [  0.    0.    1.    1.   47. ]
 [  1.    0.    0.    1.   19. ]
 [  0.    1.    0.    0.   42. ]
 [  0.    1

In [4]:
# 中间计算数据样例展示
import tensorflow as tf

f = [[1., 0., 0., 1., 35.],
 [0., 0., 1., 1., 26.],
 [0., 0., 1., 0., 0.]]

l = [[ 1.], [ 0.], [ 0.]]

tf_f = tf.convert_to_tensor(f)
tf_l = tf.convert_to_tensor(l)

with tf.Session() as sess:
    print( sess.run(tf.shape(f)) )
    # print( sess.run(f) ) --> Can not convert a list into a Tensor or Operation.
    print(f)
    print( sess.run(tf.shape(tf_f)) )
    print( sess.run(tf_f) )
    
    print( sess.run([tf_f, tf_l]) )
    
    W = tf.Variable(tf.zeros([5, 1]), name = 'weights')
    b = tf.Variable(0., name = 'bias')
    sess.run( tf.initialize_all_variables() )
    
    label_predicted = tf.matmul(f, W) + b   # tf_f, f total OK!
    sig = tf.nn.sigmoid_cross_entropy_with_logits(label_predicted, l)
    loss = tf.reduce_mean( sig ) # tf_l, l total OK!
    print( sess.run([label_predicted, sig, loss]) )
    
    learning_rate = 0.01
    cost = tf.train.GradientDescentOptimizer( learning_rate ).minimize( loss )
    sess.run(cost)
    print(sess.run([W, b]))
    print(sess.run(loss))
    
    sess.close()
    

[3 5]
[[1.0, 0.0, 0.0, 1.0, 35.0], [0.0, 0.0, 1.0, 1.0, 26.0], [0.0, 0.0, 1.0, 0.0, 0.0]]
[3 5]
[[  1.   0.   0.   1.  35.]
 [  0.   0.   1.   1.  26.]
 [  0.   0.   1.   0.   0.]]
[array([[  1.,   0.,   0.,   1.,  35.],
       [  0.,   0.,   1.,   1.,  26.],
       [  0.,   0.,   1.,   0.,   0.]], dtype=float32), array([[ 1.],
       [ 0.],
       [ 0.]], dtype=float32)]
[array([[ 0.],
       [ 0.],
       [ 0.]], dtype=float32), array([[ 0.69314718],
       [ 0.69314718],
       [ 0.69314718]], dtype=float32), 0.69314718]
[array([[ 0.00166667],
       [ 0.        ],
       [-0.00333333],
       [ 0.        ],
       [ 0.015     ]], dtype=float32), -0.0016666667]
0.686475


In [5]:
tf.train.Saver?