# When reading through this notebook, please also read the python comments carefully. 
# First, we import tensorflow and other commomly used libraries

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.python.client import device_lib
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# check the version of numpy and tensorflow
print( np.__version__)
print( tf.__version__)

In [None]:
#check device
device_list = device_lib.list_local_devices()
for d in device_list:
    print( d.name)


# Part 1 Arithmetic and basic tensor operations

## we compute the result of the computation graph as follow:
## a  = 2, b = 3 constant
## c = a + b 
## d = a * b
## e = c - d

In [None]:
#a  = 2, b = 3 constant
#c = a + b 
#d = a * b
#e = c - d

#in numpy
a = np.array( 2.0 )
b = np.array( 3.0 )

c = a + b
d = a * b
e = c - d

In [None]:

#result showing as expected
print(c)
print(d)
print(e)


In [None]:
# in tensorflow
a = tf.constant( 2.0)
b = tf.constant( 3.0)

#define operations, i.e., compute graph
c = a + b
d = a * b
e = c - d 

In [None]:
#Until now, we are just defining the graph, not actually excute it.
print( c )
print( d )
print( e )

In [None]:
# to excute a graph, we need a tensorflow "engine" call session
sess = tf.Session()

In [None]:
# excute the graph, sess only return the value of nodes that been passed to it

e_val = sess.run( e)
print( e_val)

In [None]:
c_val, d_val, e_val = sess.run([ c, d, e])
print( c_val, d_val, e_val)

## Rule 1: First define the graph then excute it. Graph won't change in excution. 
## Tensorflow only excute computations(nodes) that are neccessary

In [None]:
#matrix computation
A = np.array([[1,2],[3,4]])
B = np.array([[3,4],[5,6]])
C = A @ B

print(A,'\n')
print(B,'\n')
print(C,'\n')

In [None]:
tf_A = tf.constant( A)
tf_B = tf.constant( B)

tf_C = tf_A @ tf_B  
print(tf_C)

In [None]:
tf_C_val = sess.run( tf_C)
print( tf_C_val)


### Place holders: used to represent data points
### feed_dicts: replace(feed) placeholders with actual data


In [None]:
tf_A = tf.placeholder( tf.float32, shape=[2,2])
tf_B = tf.placeholder( tf.float32, shape = [2,2])

tf_C = tf_A @ tf_B
print( tf_C)

In [None]:
tf_C_val = sess.run( tf_C, feed_dict = { tf_A:A, tf_B:B})

#tf_C_val = sess.run( tf_C) # wrong calling
print( tf_C_val)

### Variable: tensors whose values can change, i.e., be assigned new values. Usually represent parameters like weights, bias, means and variances.
### Node that you can also feed value to a variable to overwrite its current value temporally. Useful in debugging

In [None]:
tf_w = tf.Variable( 0.0)
tf_b = tf.Variable( 0.0)

add_op = tf.assign_add( tf_w, 1.0)
print( tf_w)
print( tf_b)

In [None]:
sess.run( tf.global_variables_initializer()) # initialized variables before using them
tf_w_val = sess.run( tf_w)
print( tf_w_val)

In [None]:
_ = sess.run( add_op)
print( sess.run( tf_w))

In [None]:
#overwrite the value of w to -2
print( sess.run(tf_w, feed_dict={ tf_w:-2}))

In [None]:
print( sess.run( tf_w))

### Rule 2: Initalized Variable before excute graph. Note that you only need to initialize variables once.

## Summary
### Place holders: data points
### Variables: parameters
### Constants: other fixed values 

# Part 2 Bayesian Logistic Regression( MAP )

In [None]:
#using banknote data set as we use in homeowork 2

In [None]:
# load and preprocess data set
train_dat = np.genfromtxt( "train.csv", delimiter=",")
test_dat = np.genfromtxt( "test.csv", delimiter=',')

x_train = train_dat[:, :-1]
y_train = train_dat[:, -1]

x_test = test_dat[:, :-1]
y_test = test_dat[:, -1]


In [None]:
x_train.shape, y_train.shape, x_test.shape, y_test.shape

In [None]:
x_train.dtype, y_train.dtype, x_test.dtype, y_test.dtype

In [None]:
# attribute normalization
x_train = x_train.astype( np.float32)
x_test = x_test.astype( np.float32)
y_train = y_train.astype( np.int32)
y_test = y_test.astype( np.int32)

mean_train = np.average( x_train, axis=0)
var_train = np.var( x_train, axis = 0)

x_train -= mean_train
x_train /= np.sqrt( var_train )

x_test -= mean_train
x_test /= np.sqrt( var_train)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [None]:
log_clf = LogisticRegression()

In [None]:
log_clf.fit( x_train, y_train)

In [None]:
y_pred = log_clf.predict( x_test)

In [None]:
acc = accuracy_score( y_test, y_pred)
print( 'acc = %g' % acc)

In [None]:
class TF_Logistic_Classifier_NR:
    def __init__(self, D, zero_init = False, reg = 1.0):
        '''
        :param D: length of feature vector
        :param zero_init: whether use all zero initialization
        :param reg: regularization strength ( precision of prior))
        '''
        self.D = D
        self.zero_init = zero_init
        self.reg = reg

        if self.zero_init:
            self.initializer = tf.initializers.zeros( )
        else:
            self.initializer = tf.initializers.glorot_normal()

        self._build_graph()

    def _build_graph(self):
        #model parameters( weights)
        self.w = tf.Variable( self.initializer( shape = [ self.D, 1] ))

        #Design Matrix and target label
        self.X = tf.placeholder( tf.float32, shape = [None, self.D,])
        self.t = tf.placeholder( tf.float32, shape = [None, 1])
        self.y = tf.sigmoid( self.X @ self.w)

        self.grad = tf.transpose( self.X) @ ( self.y - self.t) + self.w * self.reg

        R = tf.linalg.diag( tf.reshape(  self.y * (  1 - self.y),shape=[-1] ))
        H = tf.transpose( self.X) @ R @ self.X + tf.eye(self.D)*self.reg
        
        #key step
        self.update_op = tf.assign( self.w, self.w - tf.linalg.inv( H) @ self.grad)

        #GPU settings
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=config)
        self.run_options = tf.RunOptions(report_tensor_allocations_upon_oom=True)
        self.sess.run(tf.global_variables_initializer())

    def fit(self,X, t, num_iter = 10, verbose = False):
        '''
        :param X: Design Matrix, N by D
        :param t: target, N by None
        :return: self
        '''

        self.train_hist = []
        for i in range( num_iter):
            train_feed_dict = { self.X:X, self.t:t.reshape(-1,1)}
            _,  w = self.sess.run( [ self.update_op, self.w], feed_dict=train_feed_dict)

            if verbose:
                print('iter %3d: w = ' % ( i +1),w.reshape( -1))
        return self

    def predict(self,X):
        test_feed_dict = { self.X:X}

        y_pred = self.sess.run( self.y, feed_dict=test_feed_dict).reshape( -1)
        y_pred = ( y_pred > 0.5).astype( np.int)

        return y_pred


class TF_Logistic_Classifier_SGD:
    def __init__(self, D, zero_init = False, reg = 1.0, lr = 0.1):
        '''
        :param D: length of feature vector
        :param zero_init: whether use all zero initialization
        :param reg: regularization strength ( precision of prior))
        '''
        self.D = D
        self.zero_init = zero_init
        self.reg = reg
        self.lr = lr

        if self.zero_init:
            self.initializer = tf.initializers.zeros( )
        else:
            self.initializer = tf.initializers.glorot_normal()

        self._build_graph()

    def _build_graph(self):
        #model parameters( weights)
        self.w = tf.Variable( self.initializer( shape = [ self.D,1] ))

        #Design Matrix and target label
        self.X = tf.placeholder( tf.float32, shape = [None, self.D,])
        self.t = tf.placeholder( tf.float32, shape = [None])

        self.y = tf.reshape( tf.sigmoid( self.X @ self.w), shape = [-1] )

        self.mle_loss = - tf.reduce_sum( self.t * tf.log( self.y) + ( 1 - self.t) * tf.log( 1 - self.y) )
        self.prior_loss = tf.reduce_sum( self.w * self.w * self.reg)
        self.map_loss = self.mle_loss + self.prior_loss

        #minimizer
        self.min_opt = tf.train.AdamOptimizer(self.lr)

        #minimizing step
        self.min_step = self.min_opt.minimize( self.map_loss)

        #GPU settings
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=config)
        self.run_options = tf.RunOptions(report_tensor_allocations_upon_oom=True)
        self.sess.run(tf.global_variables_initializer())

    def fit(self,X, t, num_iter = 10, verbose = True):
        '''
        :param X: Design Matrix, N by D
        :param t: target, N by None
        :return: self
        '''

        self.train_hist = []
        for i in range( num_iter):
            train_feed_dict = { self.X:X, self.t:t}
            _, map_loss,w = self.sess.run( [ self.min_step, self.map_loss, self.w], feed_dict=train_feed_dict)
            if verbose:
                print("iter %3d: map loss = %f" % ( i + 1, map_loss), " w = ", w.reshape( -1))

            self.train_hist.append( map_loss)
        return self

    def predict(self,X):
        test_feed_dict = { self.X:X}

        y_pred = self.sess.run( self.y, feed_dict=test_feed_dict)
        y_pred = ( y_pred > 0.5).astype( np.int)

        return y_pred


In [None]:
NR_clf = TF_Logistic_Classifier_NR( x_train.shape[1], zero_init = False, reg = 1.0)
_ = NR_clf.fit( x_train, y_train,verbose = True)

In [None]:
train_acc = accuracy_score( y_train, NR_clf.predict( x_train))
test_acc = accuracy_score( y_test, NR_clf.predict( x_test))
print( "train acc = %g, test acc =%g" % ( train_acc, test_acc))

In [None]:
SGD_clf = TF_Logistic_Classifier_SGD( x_train.shape[1], zero_init=False, reg= 1.0, lr= 0.1)
_ = SGD_clf.fit( x_train, y_train, num_iter = 50, verbose = True)

In [None]:
SGD_train_acc = accuracy_score( y_train, SGD_clf.predict( x_train))
SGD_test_acc = accuracy_score( y_test, SGD_clf.predict( x_test))
print( 'SGD trian acc = %g, test acc =%g' % ( SGD_train_acc, SGD_test_acc))

In [None]:
plt.rcParams['figure.figsize'] = 8,6
plt.plot( range( len( SGD_clf.train_hist)), SGD_clf.train_hist)
plt.xlabel( 'num iter')
plt.ylabel(' map loss')