In [3]:
import numpy as np
import tensorflow as tf
from tensorflow.python.client import device_lib
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import datetime, os

In [1]:
# Setup data read from google drive
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
#import data - all data is prenormalized and split
dTrainPath = "//content//drive//My Drive//data//DTrain.csv"
dTestPath = "//content//drive//My Drive//data//DTest.csv"
tTestPath = "//content//drive//My Drive//data//tTest.csv"
tTrainPath = "//content//drive//My Drive//data//tTrain.csv"
dTrain = np.genfromtxt(dTrainPath, delimiter=",")
dTrain =np.transpose(dTrain)
dTest= np.genfromtxt(dTestPath, delimiter=",")
dTest = np.transpose(dTest)
tTrain = np.genfromtxt(tTrainPath, delimiter=",")
tTest = np.genfromtxt(tTestPath, delimiter=",")

In [5]:
dTrain.shape, dTest.shape, tTrain.shape, tTest.shape #Check shape for debugging

((2000, 9), (1781, 9), (2000,), (1781,))

In [8]:
#Setup logistic regression
log_clf = LogisticRegression()
log_clf.fit(dTrain,tTrain)
#Test accuracy as in regression example
tPred = log_clf.predict(dTest)
acc = accuracy_score( tTest, tPred)
print( 'acc = %g' % acc)

acc = 0.532285




In [0]:
#Using functions from class TF example. Own comments added to better understand what's going on
class TF_Logistic_Classifier_SGD:
    def __init__(self, D, zero_init = False, reg = 1.0, lr = 0.05):
        '''
        :param D: length of feature vector
        :param zero_init: whether use all zero initialization
        :param reg: regularization strength ( precision of prior))
        '''
        self.D = D
        self.zero_init = zero_init
        self.reg = reg
        self.lr = lr

        if self.zero_init:
            self.initializer = tf.initializers.zeros( )
        else:
            self.initializer = tf.initializers.glorot_normal()

        self._build_graph()

    def _build_graph(self):
        #model parameters( weights)
        self.w = tf.Variable( self.initializer( shape = [ self.D,1] ))

        #Design Matrix and target label
        self.X = tf.placeholder( tf.float32, shape = [None, self.D,])
        self.t = tf.placeholder( tf.float32, shape = [None])

        self.y = tf.reshape( tf.sigmoid( self.X @ self.w), shape = [-1] ) #reshape sigmoid tensor of features * weights

        self.mle_loss = - tf.reduce_sum( self.t * tf.log( self.y) + ( 1 - self.t) * tf.log( 1 - self.y) ) #sum
        self.prior_loss = tf.reduce_sum( self.w * self.w * self.reg)
        self.map_loss = self.mle_loss + self.prior_loss #MAP Loss = MLE loss + prior loss

        #minimizer
        self.min_opt = tf.train.AdamOptimizer(self.lr)

        #minimizing step
        self.min_step = self.min_opt.minimize( self.map_loss) #minimize MAP loss

        #GPU settings
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=config)
        self.run_options = tf.RunOptions(report_tensor_allocations_upon_oom=True)
        self.sess.run(tf.global_variables_initializer())

    def fit(self,X, t, num_iter = 50, verbose = True):
        '''
        :param X: Design Matrix, N by D
        :param t: target, N by None
        :return: self
        '''

        self.train_hist = []
        for i in range( num_iter):
            train_feed_dict = { self.X:X, self.t:t}
            _, map_loss,w = self.sess.run( [ self.min_step, self.map_loss, self.w], feed_dict=train_feed_dict)
            if verbose:
                print("iter %3d: map loss = %f" % ( i + 1, map_loss), " w = ", w.reshape( -1))

            self.train_hist.append( map_loss)
        return self

    def predict(self,X): #predict tags
        test_feed_dict = { self.X:X}

        y_pred = self.sess.run( self.y, feed_dict=test_feed_dict)
        y_pred = ( y_pred > 0.5).astype( np.int)

        return y_pred

In [0]:
#Now run the training code - again modified from in class example
SGD_clf = TF_Logistic_Classifier_SGD(dTrain.shape[1], zero_init=False, reg= 1.0, lr= 0.05) #LR and num_iter were varied here to test best values
_ = SGD_clf.fit(dTrain, tTrain, num_iter = 10, verbose = True)


In [56]:
SGD_train_acc = accuracy_score( tTrain, SGD_clf.predict( dTrain)) #compare the accuracy between training data and predicted data
SGD_test_acc = accuracy_score( tTest, SGD_clf.predict( dTest))
print( 'SGD train acc = %g, test acc =%g' % ( SGD_train_acc, SGD_test_acc))
#For 500 iterations, training accuracy of 52.65%. Test accuracy of 51.881%
#This is mildly better than a coin flip but worse than predicting the return will be positve

SGD train acc = 0.523, test acc =0.535654
