In [1]:
import tensorflow as tf
import numpy as np
import os


In [2]:
#in interactive mode, there is no __file__ defined
#os.getcwd() is a workaround
modelpath=os.path.dirname(__file__) if('__file__') in dir() else os.getcwd()

In [3]:
def read_csv(batch_size,file_name,record_defaults):
    filename_queue = tf.train.string_input_producer ( 
        [os.path.join(modelpath,file_name)])
    reader = tf.TextLineReader(skip_header_lines=1)
    key,value=reader.read(filename_queue)
    #decode-csv will convert a Tensor from type string (the text line) 
    #in a tuple of tensor columns with the specified defaults, 
    #which also sets the data type for each column
    decoded=tf.decode_csv(value,record_defaults=record_defaults)
    
    #batch actually reads the file and loads "batch_size" rows in a single tensor
    
    return tf.train.shuffle_batch(decoded,batch_size=batch_size,
                                 capacity=batch_size*50, 
                                 min_after_dequeue=batch_size)
    

In [4]:
#demo of logistic to answer Yes/No question
#initialize variables/model parameters
W=tf.Variable(tf.zeros([5,1]),dtype=tf.float32,name="weights")
b = tf.Variable(0,name="bias",dtype=tf.float32)

def combine_inputs(X):
    return tf.matmul(X,W)+b
#define the training loop operations
def inference(X):
    #compute the inference model over data X and return the result
    return tf.sigmoid(combine_inputs(X))

def loss(X,Y):
    #compute loss over training data X and expected output Y
    Y_predicted = inference(X)
    
    #applying cross entropy reduction
    err=tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits (logits=combine_inputs(X),labels=Y))
    return err
def inputs():
    batch_size=100
    passenger_id, survived, pclass, name, sex, age, sibsp, parch, ticket, fare, cabin, embarked = \
     read_csv(batch_size,'titanicData/train.csv', [[0.0], [0.0], [0], [""], [""], [0.0], [0.0], [0.0], [""], [0.0], [""], [""]])
    #convert categorical data
    is_first_class = tf.to_float(tf.equal(pclass,[1]))
    is_second_class = tf.to_float(tf.equal(pclass,[2]))
    is_third_class = tf.to_float(tf.equal(pclass,[3]))
    
    gender = tf.to_float(tf.equal(sex,["female"]))
    
    #finally we pack all the features in a single matrix;
    #We then transpose to have a matrix with one example
    #per row and one feature per column
    features = tf.transpose(tf.stack(
    [is_first_class,is_second_class,is_third_class,gender,age]
    ))
    
    survived = tf.reshape(survived,[batch_size,1])
    #survived = tf.transpose(survived)
    return features, survived
def train(total_loss):
    #train/adjust model parameters according to computed total loss
    learning_rate = 0.01
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(total_loss)
    return optimizer
def evaluate(sess,X,Y):
    #evaluate the resulting trained model
    predicted = tf.cast(inference (X) > 0.5,tf.float32)
    result=sess.run(tf.reduce_mean(tf.cast (tf.equal(predicted,Y),tf.float32)))
    print(result)
    return
    

    

In [None]:
#Create a saver
saver = tf.train.Saver()

In [None]:
#Launch the graph in a session, setup boilerplate
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    X,Y = inputs()
    total_loss = loss(X,Y)
    train_op = train(total_loss)
    coord = tf.train.Coordinator()
    threads=tf.train.start_queue_runners(sess=sess,coord=coord)
    
    initial_step=0
    
    #verify if we don't have a checkpoint saved already
    ckpt = tf.train.get_checkpoint_state(modelpath)
    
    if ckpt and ckpt.model_checkpoint_path:
        #Restores from checkpoints
        saver.restore(sess,ckpt.model_checkpoint_path)
        initial_step = int(ckpt.model_checkpoint_path.rsplit('-',1)[1])
    
    #actual training loop
    training_steps=2000
    
    for step in range(initial_step,training_steps):
        sess.run([train_op])
        if step%1000 ==0:
            saver.save(sess,"my-model",global_step=step)
        #for debugging and learning purposes, see how the loss gets decremented through training steps
        if step%10 ==0:
            print("loss: {0}".format(sess.run([total_loss])))
            evaluate(sess,X,Y)
            
    coord.request_stop()
    coord.join(threads)
    saver.save(sess,"my-model",global_step=training_steps)
    sess.close()

loss: [0.75339079]
0.59
loss: [0.68174118]
0.62
loss: [0.85530418]
0.67
loss: [0.88676226]
0.61
loss: [0.87536913]
0.54
loss: [0.68709624]
0.63
loss: [0.65691394]
0.56
loss: [0.77134734]
0.62
loss: [0.63297492]
0.65
loss: [0.6712808]
0.77
loss: [0.73701036]
0.5
loss: [0.72535247]
0.72
loss: [0.64167649]
0.61
loss: [0.96485502]
0.42
loss: [0.65268934]
0.81
loss: [0.64984512]
0.51
loss: [0.68836701]
0.55
loss: [0.63678545]
0.69
loss: [0.6363709]
0.68
loss: [0.63003778]
0.79
loss: [0.64692986]
0.71
loss: [0.63846725]
0.69
loss: [0.61399126]
0.69
loss: [0.60540533]
0.61
loss: [0.64728242]
0.67
loss: [0.67267835]
0.5
loss: [0.59518212]
0.67
loss: [0.62957311]
0.71
loss: [0.55984879]
0.6
loss: [0.58209401]
0.56
loss: [0.6194368]
0.8
loss: [0.60013819]
0.72
loss: [0.61256754]
0.56
loss: [0.66808236]
0.61
loss: [0.70263994]
0.65
loss: [0.61218959]
0.62
loss: [0.74367279]
0.65
loss: [0.77111787]
0.34
loss: [0.63299596]
0.65
loss: [0.70304459]
0.62
loss: [0.59161186]
0.76
loss: [0.59384286]
0.75