In [1]:
import tensorflow as tf
import numpy as np
import os


In [2]:
#in interactive mode, there is no __file__ defined
#os.getcwd() is a workaround
modelpath=os.path.dirname(__file__) if('__file__') in dir() else os.getcwd()

In [3]:
def read_csv(batch_size,file_name,record_defaults):
    filename_queue = tf.train.string_input_producer ( 
        [os.path.join(modelpath,file_name)])
    reader = tf.TextLineReader(skip_header_lines=1)
    key,value=reader.read(filename_queue)
    #decode-csv will convert a Tensor from type string (the text line) 
    #in a tuple of tensor columns with the specified defaults, 
    #which also sets the data type for each column
    decoded=tf.decode_csv(value,record_defaults=record_defaults)
    
    #batch actually reads the file and loads "batch_size" rows in a single tensor
    
    return tf.train.shuffle_batch(decoded,batch_size=batch_size,
                                 capacity=batch_size*50, 
                                 min_after_dequeue=batch_size)
    

In [4]:
#demo of logistic to answer Yes/No question
#initialize variables/model parameters
W=tf.Variable(tf.zeros([5,1]),dtype=tf.float32,name="weights")
b = tf.Variable(0,name="bias",dtype=tf.float32)

def combine_inputs(X):
    return tf.matmul(X,W)+b
#define the training loop operations
def inference(X):
    #compute the inference model over data X and return the result
    return tf.sigmoid(combine_inputs(X))

def loss(X,Y):
    #compute loss over training data X and expected output Y
    Y_predicted = inference(X)
    
    #applying cross entropy reduction
    err=tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits (logits=inference(X),labels=Y))
    return err
def inputs():
    batch_size=100
    passenger_id, survived, pclass, name, sex, age, sibsp, parch, ticket, fare, cabin, embarked = \
     read_csv(batch_size,'titanicData/train.csv', [[0.0], [0.0], [0], [""], [""], [0.0], [0.0], [0.0], [""], [0.0], [""], [""]])
    #convert categorical data
    is_first_class = tf.to_float(tf.equal(pclass,[1]))
    is_second_class = tf.to_float(tf.equal(pclass,[2]))
    is_third_class = tf.to_float(tf.equal(pclass,[3]))
    
    gender = tf.to_float(tf.equal(sex,["female"]))
    
    #finally we pack all the features in a single matrix;
    #We then transpose to have a matrix with one example
    #per row and one feature per column
    features = tf.transpose(tf.stack(
    [is_first_class,is_second_class,is_third_class,gender,age]
    ))
    
    survived = tf.reshape(survived,[batch_size,1])
    #survived = tf.transpose(survived)
    return features, survived
def train(total_loss):
    #train/adjust model parameters according to computed total loss
    learning_rate = 0.01
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(total_loss)
    return optimizer
def evaluate(sess,X,Y):
    #evaluate the resulting trained model
    predicted = tf.cast(inference (X) > 0.5,tf.float32)
    result=sess.run(tf.reduce_mean(tf.cast (tf.equal(predicted,Y),tf.float32)))
    print(result)
    return
    

    

In [5]:
#Create a saver
saver = tf.train.Saver()

In [6]:
#Launch the graph in a session, setup boilerplate
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    X,Y = inputs()
    total_loss = loss(X,Y)
    train_op = train(total_loss)
    coord = tf.train.Coordinator()
    threads=tf.train.start_queue_runners(sess=sess,coord=coord)
    
    initial_step=0
    
    #verify if we don't have a checkpoint saved already
    ckpt = tf.train.get_checkpoint_state(modelpath)
    
    if ckpt and ckpt.model_checkpoint_path:
        #Restores from checkpoints
        saver.restore(sess,ckpt.model_checkpoint_path)
        initial_step = int(ckpt.model_checkpoint_path.rsplit('-',1)[1])
    
    #actual training loop
    training_steps=2000
    
    for step in range(initial_step,training_steps):
        sess.run([train_op])
        if step%1000 ==0:
            saver.save(sess,"my-model",global_step=step)
        #for debugging and learning purposes, see how the loss gets decremented through training steps
        if step%10 ==0:
            print("loss: {0}".format(sess.run([total_loss])))
            evaluate(sess,X,Y)
            
    coord.request_stop()
    coord.join(threads)
    saver.save(sess,"my-model",global_step=training_steps)
    sess.close()

loss: [0.71457791]
0.52
loss: [0.721066]
0.65
loss: [0.71222895]
0.74
loss: [0.71749192]
0.61
loss: [0.72772002]
0.62
loss: [0.72040725]
0.63
loss: [0.70911407]
0.51
loss: [0.72066557]
0.66
loss: [0.71487617]
0.64
loss: [0.7055797]
0.58
loss: [0.7228412]
0.65
loss: [0.69297892]
0.71
loss: [0.69356424]
0.62
loss: [0.71059746]
0.64
loss: [0.70414209]
0.66
loss: [0.71013242]
0.6
loss: [0.71170652]
0.67
loss: [0.71716768]
0.66
loss: [0.70107305]
0.64
loss: [0.7117433]
0.7
loss: [0.70200467]
0.57
loss: [0.71637315]
0.55
loss: [0.70654511]
0.65
loss: [0.71763837]
0.57
loss: [0.72588503]
0.66
loss: [0.72286844]
0.64
loss: [0.71919024]
0.62
loss: [0.71646607]
0.65
loss: [0.7132715]
0.68
loss: [0.71061707]
0.59
loss: [0.71524435]
0.61
loss: [0.70339769]
0.66
loss: [0.71043336]
0.56
loss: [0.72513354]
0.59
loss: [0.71449494]
0.61
loss: [0.70070767]
0.55
loss: [0.70964873]
0.59
loss: [0.70167845]
0.7
loss: [0.6980958]
0.59
loss: [0.71789354]
0.68
loss: [0.71245456]
0.63
loss: [0.68398744]
0.71
lo