In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
np.set_printoptions(threshold = np.nan)
def loadData():
    with np.load('notMNIST.npz') as data :
        Data, Target = data ['images'], data['labels']
        #print(Data[3745])
        #plt.figure()
        #plt.imshow(Data[3745])
        #plt.show()
        #print(Target)     # 0-9
        posClass = 2
        negClass = 9
        dataIndx = (Target==posClass) + (Target==negClass)
        #true =1 false=-1?
        #print(dataIndx)   #true or false
        #print(Data[dataIndx])  #Data[true]
        Data = Data[dataIndx]/255.
        #print(Data[0])
        Target = Target[dataIndx].reshape(-1, 1)
        #print(Target[10])
        #Target [size, 1]
        Target[Target==posClass] = 1
        Target[Target==negClass] = 0
        np.random.seed(421)
        randIndx = np.arange(len(Data))
        #print(randIndx)
        np.random.shuffle(randIndx)
        Data, Target = Data[randIndx], Target[randIndx]
        #print(Target)
        trainData, trainTarget = Data[:3500], Target[:3500]
        validData, validTarget = Data[3500:3600], Target[3500:3600]
        testData, testTarget = Data[3600:], Target[3600:]
    return trainData, validData, testData, trainTarget, validTarget, testTarget

In [2]:
def MSE(W, b, x, y, reg):
    loss = 0
    for i in range(0,len(y)):
        traning_data = x[i].flatten()
        loss =1/(2*len(y))*(np.dot(np.transpose(W),traning_data) + b - y[i])**2 + loss
    loss = loss + reg/2 * np.dot(np.transpose(W), W)
    return loss

In [3]:
def gradMSE(W, b, x, y, reg):
    grad_W = 0
    grad_b = 0
    for i in range(0,len(y)):
        traning_data = x[i].flatten()
        grad_W = (1/len(y)) * (np.dot(np.transpose(W),traning_data) + b - y[i]) * traning_data + grad_W
        grad_b = (1/len(y)) * (np.dot(np.transpose(W),traning_data) + b - y[i]) + grad_b
    grad_W = grad_W + reg * W
    return grad_W, grad_b

In [4]:
def batch_grad_descent(W, b, x, y, alpha, epochs, reg, error_tol):
    old_loss = 0;
    rate_losses = []
    validate_losses = []
    test_losses = []
    for i in range(0,epochs):
        
        new_loss = MSE(W,b,x,y,reg)
        validate_loss = MSE(W,b,validData,validTarget,reg)
        test_loss = MSE(W,b,testData,testTarget,reg)
        
        grad_W, grad_b = gradMSE(W,b,x,y,reg)
        W = W - grad_W * alpha
        b = b - grad_b * alpha
        if abs(new_loss - old_loss) < error_tol:
            final_W = W
            final_b = b
        old_loss = new_loss
        #print(new_loss,validate_loss,test_loss, i)
        rate_losses.append(new_loss)
        validate_losses.append(validate_loss)
        test_losses.append(test_loss)
        
    return rate_losses,validate_losses,test_losses

In [5]:
def plot_graph():
    #rates = [0.005, 0.001, 0.0001]
    rate_losses = []
    plt.figure(0)
    plt.title("Q1")
    plt.xlabel("epoch")
    plt.ylabel("loss")
    for rate in rates:
        rate_losses = batch_grad_descent(W, b, x, y, alpha, epochs, reg, error_tol)
        plt.plot(range(len(rate_losses)), losses, label="rate=" + str(rate))
    plt.grid()
    plt.legend()
    return rates[rate_losses.index(min(rate_losses))]

In [None]:
trainData, validData, testData, trainTarget, validTarget, testTarget = loadData()
weight = np.zeros(len(trainData[0].flatten()))
rate_losses,validate_losses,test_losses  = batch_grad_descent(weight,0, trainData, trainTarget, 0.005, 5000,0.5,1e-7)
print(rate_losses,validate_losses,test_losses)

In [None]:
plt.figure(0)
plt.title("regularization parameter is 0.5")
plt.xlabel("epoch")
plt.ylabel("loss")
plt.plot(range(len(rate_losses)), rate_losses, 'g--', label="training")
plt.plot(range(len(validate_losses)), validate_losses, label="validate")
plt.plot(range(len(test_losses)), test_losses, label="testing")
plt.grid()
plt.legend()

In [9]:
def batch_grad_descent_2(W, b, x, y, alpha, epochs, reg, error_tol):
    old_loss = 0;
    rate_losses = []
    for i in range(0,epochs):
        
        new_loss = MSE(W,b,x,y,reg)      
        grad_W, grad_b = gradMSE(W,b,x,y,reg)
        W = W - grad_W * alpha
        b = b - grad_b * alpha
        if abs(new_loss - old_loss) < error_tol:
            final_W = W
            final_b = b
        old_loss = new_loss
        #print(new_loss,validate_loss,test_loss, i)
        rate_losses.append(new_loss)
    return rate_losses

In [None]:
trainData, validData, testData, trainTarget, validTarget, testTarget = loadData()
weight = np.zeros(len(trainData[0].flatten()))
first_losses  = batch_grad_descent_2(weight,0, trainData, trainTarget, 0.005, 5000,0.001,1e-7)
second_losses  = batch_grad_descent_2(weight,0, trainData, trainTarget, 0.005, 5000,0.1,1e-7)
third_losses  = batch_grad_descent_2(weight,0, trainData, trainTarget, 0.005, 5000,0.5,1e-7)
print(first_losses,second_losses,third_losses)

In [None]:
plt.figure(0)
plt.title("regularization parameter vs loss")
plt.xlabel("epoch")
plt.ylabel("loss")
plt.plot(range(len(first_losses)), first_losses, 'g--', label="0.001")
plt.plot(range(len(second_losses)), second_losses, label="0.1")
plt.plot(range(len(third_losses)), third_losses, label="0.5")
plt.grid()
plt.legend()

In [20]:
print(first_losses[-1], second_losses[-1], third_losses[-1])

[0.01372196] [0.01672046] [0.02039289]


In [76]:
def normal_equation(x,y):
    training = np.empty((len(y),len(trainData[0].flatten())))
    for i in range(0,len(y)):
        training[i] = x[i].flatten()
    final = np.dot(np.linalg.inv(np.dot(np.transpose(training),training)),np.dot(np.transpose(training),y))
    return final_weight

In [78]:
trainData, validData, testData, trainTarget, validTarget, testTarget = loadData()
final_weight = normal_equation(trainData,trainTarget)
result = MSE(final_weight, 0, trainData, trainTarget, 0)
print(result)

[[0.01158202]]


In [None]:
def crossEntropyLoss(W, b, x, y, reg):
    loss = 0
    for i in range(0,len(y)):
        traning_data = x[i].flatten()
        y_bar = 1/(1+exp(-(np.dot(np.transpose(W),traning_data + b))))
        loss =1/(len(y))*(-y[i]*np.log(y_bar) - (1-y[i])*np.log(1-y_bar)) + loss
    loss = loss + reg/2 * np.dot(np.transpose(W), W)
    return loss

In [None]:
def gradCE(W, b, x, y, reg):
    # Your implementation here
    pass

In [None]:
def grad_descent(W, b, trainingData, trainingLabels, alpha, iterations, reg, EPS):
    pass



In [2]:
def buildGraph(beta1=None, beta2=None, epsilon=None, lossType=None, learning_rate=None):
    trainData, validData, testData, trainTarget, validTarget, testTarget = loadData()
    dim_x = trainData.shape[1]
    dim_y = trainData.shape[2]

    tf.set_random_seed(421)
    W_shape = (dim_x*dim_y, 1)
    W = tf.get_variable("W", initializer=tf.truncated_normal(shape=W_shape, stddev=0.5))
    b = tf.get_variable("b", initializer=tf.truncated_normal(shape=[1], stddev=0.5))

    X = tf.placeholder(tf.float32, shape=(1750, dim_x*dim_y), name="X")
    Y = tf.placeholder(tf.float32, shape=(1750, 1), name="Y")
    lam = tf.placeholder(tf.float32, shape=(1, None), name="lam")

    predict = None
    loss = None
    if lossType == "MSE":
        predict = tf.matmul(X, W) + b
        loss = tf.losses.mean_squared_error(labels=Y, predictions=predict)
    elif lossType == "CE":
        logit = -1*(tf.matmul(X, W) + b)
        predict = tf.sigmoid(logit)
        loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=Y, logits=logit)

    train_op = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=beta1, beta2=beta2, epsilon=epsilon).minimize(loss)

    return W, b, predict, Y, X, loss, train_op, lam

In [41]:
def BuildGraph(beta1=None, beta2=None, epsilon=None, lossType=None, learning_rate=None):
    
    # Your implementation here
    tf.set_random_seed(421)
    W = tf.truncated_normal(shape = (28*28, 1), stddev = 0.5, dtype = tf.float32, seed = 421, name = "Weight")
    b = tf.truncated_normal(shape = (1,1),stddev = 0.5, dtype = tf.float32, seed = 421, name = "Bias")
    X = tf.placeholder(tf.float32)
    Y = tf.placeholder(tf.float32)
    
    estimate = tf.matmul(X,W) + b
    predict = None
    error = None
    if lossType == "MSE":
        predict = estimate
        loss = tf.losses.mean_squared_error(labels=Y, predictions=predict)
    elif lossType == "CE":
        logit = -1*estimate
        predict = tf.sigmoid(logit)
        loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=Y,logits=logit)
    print(loss)
    train = tf.train.AdamOptimizer(learning_rate=0.001,beta1=beta1,beta2=beta2,epsilon=epsilon).minimize(loss)
    return W,b,X,predict,Y,loss,train

In [60]:
def buildGraph2(beta1=None, beta2=None, epsilon=None, lossType=None, learning_rate=None):
    #Initialize weight and bias tensors
    tf.set_random_seed(421)
    W = tf.truncated_normal(shape = (28*28, 1), stddev = 0.5, dtype = tf.float32, seed = 421, name = "weight")
    B = tf.truncated_normal(shape = (1,1), stddev = 0.5, dtype = tf.float32, seed = 421, name = "bias")
    x = tf.placeholder(tf.float32, shape = (3200, 28*28))
    y = tf.placeholder(tf.float32, shape = (3200, 1))
    if lossType == "MSE":   
        return SGD2(x, y, W, B, learning_rate, reg, epoch, batch_size)
    elif lossType == "CE":
        return SGD2(x, y, W, B, learning_rate, reg, epoch, batch_size, opt = "Adam", beta1_ = b1, beta2_ = b2, epsilon_ = epsilon)

In [3]:
def SGD(batchSize, epochs):
    trainData, validData, testData, trainTarget, validTarget, testTarget = loadData()

    batches = trainData.shape[0]/batchSize
    W, b, predict, Y, X, loss, train_op, lam = buildGraph(beta1=1, beta2=1, epsilon=0, lossType="CE", learning_rate=0.001)
    losses = []


    with tf.Session() as sess:
        init = tf.global_variables_initializer()
        sess.run(init)

        for i in range(epochs):
            rand_i = np.random.choice(100, size=batchSize)

            x_batch = trainData[rand_i].reshape((batchSize, trainData.shape[1]*trainData.shape[2]))
            y_batch = trainTarget[rand_i].reshape((batchSize, 1))
            _, c = sess.run([train_op, loss], feed_dict={X:x_batch, Y:y_batch})
    return

In [36]:
def Tensor(epochs,trainData,trainTarget,typeError,batch_size):
    W,b,X,predict,Y,error,train = buildGraph(lossType=typeError)
    init = tf.global_variables_initializer()
    traning_data = x[i].flatten()
    sess = tf.InteractiveSession()
    sess.run(init)
    
    total_E = []
    total_A = []
    i = 0
    for a in range(0,epochs):
        ins = np.shape(trainData)[0]
        t_batches = int(ins/batch_size)
        idx = np.random.permutation(len(trainData))
        X_r, Y_r = traning_data[idx], trainTarget[idx]
        
        i = 0
        
        for b in range(t_batches):
            X_batch = X_r[i:(i+batch_size),:]
            Y_batch = Y_r[i:(i+batch_size),:]
            _,err,currentW,currentb,yh = sess.run([train,error,W,b,predict])
            i = i + batch
    for c in range(len(yh)):
        print(yh)
    

In [4]:
SGD(1750,700)
