In [3]:
"""Input and output helpers to load in data.
"""
import numpy as np

def read_dataset(path_to_dataset_folder,index_filename):
    """ Read dataset into numpy arrays with preprocessing included
    Args:
        path_to_dataset_folder(str): path to the folder containing samples and indexing.txt
        index_filename(str): indexing.txt
    Returns:
        A(numpy.ndarray): sample feature matrix A = [[1, x1], 
                                                     [1, x2], 
                                                     [1, x3],
                                                     .......] 
                                where xi is the 16-dimensional feature of each sample
            
        T(numpy.ndarray): class label vector T = [y1, y2, y3, ...] 
                             where yi is +1/-1, the label of each sample 
    """
    with open(path_to_dataset_folder+'/'+index_filename, 'r') as f:
        label_sample_path = f.readlines()
    T = np.array([max(0,float(label_sample_path[i].split(' ')[0])) for i in range(len(label_sample_path))])
    sample_path = [label_sample_path[i].split(' ')[1].replace('\n','') for i in range(len(label_sample_path))]
    
    A = []
    for i in range(len(sample_path)):
        with open(path_to_dataset_folder+'/'+sample_path[i], 'r') as f:
            row_data = f.read().strip().split('  ')
            A.append([1.  if i ==0 else float(row_data[i-1]) for i in range(len(row_data)+1)])
    A = np.array(A)
    
        
    
    return A, T

In [4]:
X, Y_true = read_dataset('C:/Users/PIxel/CS446/mp3/data/trainset','indexing.txt')

In [5]:
print(Y_true)

[ 0.  0.  0. ...,  1.  1.  1.]


In [6]:
"""logistic model class for binary classification."""
import tensorflow as tf
import numpy as np

class LogisticModel_TF(object):
    
    def __init__(self, ndims, W_init='zeros'):
        """Initialize a logistic model.

        This function prepares an initialized logistic model.
        It will initialize the weight vector, self.W, based on the method
        specified in W_init.

        We assume that the FIRST index of Weight is the bias term, 
            Weight = [Bias, W1, W2, W3, ...] 
            where Wi correspnds to each feature dimension

        W_init needs to support:
          'zeros': initialize self.W with all zeros.
          'ones': initialze self.W with all ones.
          'uniform': initialize self.W with uniform random number between [0,1)
          'gaussian': initialize self.W with gaussion distribution (0, 0.1)

        Args:
            ndims(int): feature dimension
            W_init(str): types of initialization.
        """
        self.ndims = ndims
        self.W_init = W_init
        self.W0 = None
        ###############################################################
        # Fill your code below
        ###############################################################
        if W_init == 'zeros':
            # Hint: self.W0 = tf.zeros([self.ndims+1,1])
            self.W0 = tf.zeros([self.ndims, 1])
        elif W_init == 'ones':
            self.W0 = tf.ones([self.ndims, 1])
        elif W_init == 'uniform':
            self.W0 = tf.random_uniform([self.ndims, 1], maxval=1)
        elif W_init == 'gaussian':
            self.W0 = tf.random_normal([self.ndims, 1],mean=0.0,stddev=0.1)
        else:
            print ('Unknown W_init ', W_init) 
        #self.graph = tf.Graph()
        
    def build_graph(self, learn_rate, Y_true, X):
        """ build tensorflow training graph for logistic model.
        Args:
            learn_rate: learn rate for gradient descent
            ......: append as many arguments as you want
        """
        ###############################################################
        # Fill your code in this function
        ###############################################################
        # Hint: self.W = tf.Variable(self.W0)
        self.W = tf.Variable(self.W0)
        self.lr = learn_rate
        self.X_TF = X 
        self.y_TF = np.array([Y_true]).T.astype(int) 
        self.predictions = tf.sigmoid(tf.matmul(tf.cast(self.X_TF,tf.float32), self.W))
        self.classify = tf.cond(tf.less(0.5, self.predictions),0,1)
        self.cost = tf.reduce_mean(tf.square(tf.subtract(tf.cast(self.y_TF,tf.float32), self.predictions)))
        self.optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.lr).minimize(self.cost)        

        pass
        
    def fit(self, Y_true, X, max_iters, learn_rate):
        """ train model with input dataset using gradient descent. 
        Args:
            Y_true(numpy.ndarray): dataset labels with a dimension of (# of samples,1)
            X(numpy.ndarray): input dataset with a dimension of (# of samples, ndims+1)
            max_iters: maximal number of training iterations
            ......: append as many arguments as you want
        Returns:
            (numpy.ndarray): sigmoid output from well trained logistic model, used for classification
                             with a dimension of (# of samples, 1)
        """
        ###############################################################
        # Fill your code in this function
        ###############################################################
        def accuracy(Y_t, Y_p):
            acc_vec = np.array([1 if Y_t[i] == Y_p[i] else 0 for i in range(len(Y_p))])
            acc_val = np.mean(acc_vec)
            return acc_vec, acc_val
        
        self.build_graph(learn_rate, Y_true, X)
        init = tf.global_variables_initializer()
        with tf.Session() as sess:
            sess.run(init)
            Y = np.array([Y_true]).T.astype(int)    
            for epoch in range(max_iters):
                sess.run(self.optimizer)
                if epoch % 200 == 0:
                    classify = sess.run(self.classify)
                    acc_vec, acc_val = accuracy(Y, classify)
                    cost = sess.run(self.cost)
                    
                    print(epoch, '..', cost,acc_val)
                if epoch+1 == max_iters:
                    classify = sess.run(self.classify)
                    acc_vec, acc_val = accuracy(Y, classify)
                    print("Final step accuracy:", acc_val)
                    return acc_vec
                    
    
    

In [7]:
model =LogisticModel_TF(17,'zeros')

In [8]:
acc_ve = model.fit(Y_true, X,2000, 0.1)

TypeError: true_fn must be callable.

In [142]:
print(acc_ve[10:50])

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1
 1 1 1]


In [143]:
print(1290/2)
print(Y_true[770:800])

645.0
[ 0.  0.  0.  0.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]
