In [6]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [7]:
def loadData():
    with np.load('notMNIST.npz') as data :
        Data, Target = data ['images'], data['labels']
        posClass = 2
        negClass = 9
        dataIndx = (Target==posClass) + (Target==negClass)
        Data = Data[dataIndx]/255.
        print(Data)
        Target = Target[dataIndx].reshape(-1, 1)
        Target[Target==posClass] = 1
        Target[Target==negClass] = 0
        np.random.seed(421)
        randIndx = np.arange(len(Data))
        np.random.shuffle(randIndx)
        Data, Target = Data[randIndx], Target[randIndx]
        trainData, trainTarget = Data[:3500], Target[:3500]
        validData, validTarget = Data[3500:3600], Target[3500:3600]
        testData, testTarget = Data[3600:], Target[3600:]
    return trainData, validData, testData, trainTarget, validTarget, testTarget

In [8]:
x, validData, testData, y, validTarget, testTarget = loadData()

[[[0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.00784314 0.00392157 0.        ]
  ...
  [0.         0.         0.         ... 0.00392157 0.         0.        ]
  [0.         0.         0.         ... 0.00392157 0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]]

 [[1.         1.         1.         ... 1.         1.         1.        ]
  [1.         1.         1.         ... 1.         1.         1.        ]
  [1.         1.         1.         ... 1.         1.         1.        ]
  ...
  [1.         1.         1.         ... 1.         1.         1.        ]
  [1.         1.         1.         ... 1.         1.         1.        ]
  [1.         1.         1.         ... 1.         1.         1.        ]]

 [[0.         0.         0.         ... 0.00392157 0.00392157 0.        ]
  [0.         0.      

In [4]:
y.shape

(3500, 1)

def MSE(W, b, x, y, reg):
    sum = 0
    for i in range(len(x)):
        sum += (np.linalg.norm(np.dot(W.flatten(),x[i].flatten()) + b[i] - y[i])**2)*(1/(2*len(x))) + (reg/2)*((np.linalg.norm(W))**2)
    return sum

In [9]:
def MSE(W, b, x, y, reg):
    mse = 0
    for i in range(len(y)):
        mse+=(1/(2*len(y)))*(W.flatten().dot(np.transpose(x[i].flatten()))+b[i]-y[i])**2 + reg/2*np.linalg.norm(W)**2
              
    return mse

In [6]:
print(MSE(np.ones(28*28), np.zeros(3500), x, y, 0.1))

AttributeError: module 'numpy' has no attribute 'Transpose'

def gradMSE(W, b, x, y, reg):
    
    GradWRTW = np.zeros(shape = (28,28))
    GradWRTB = 0
    for i in range(len(x)):
        GradWRTW += ((np.linalg.norm(np.dot(W.flatten(),(x[i].flatten()).T) + b[i] - y[i]))*x[i]*(1/(len(x))) + reg*((np.linalg.norm(W)))).reshape(28, 28)
        GradWRTB += ((np.linalg.norm(np.dot(W.flatten(),(x[i].flatten()).T) + b[i] - y[i])))*(1/(len(x)))
    
    return GradWRTW, GradWRTB
    
    

In [10]:
def gradMSE(W, b, x, y, reg):
    #calculate gradient with respect to W
    gradW=0
    gradB=0
    for i in range(len(y)):
        gradW += (1/len(y))*((W.flatten().dot(x[i].flatten().transpose())+b[i]-y[i])*x[i]) + reg*W
        gradB += 1/len(y)*(W.flatten().dot(x[i].flatten().transpose())+b[i]-y[i])
    #calculate gradient with respect to regularization-tor
    return gradW, gradB

In [91]:
print(gradMSE(np.ones(28*28), np.zeros(3500), x, y, 0.1))

ValueError: operands could not be broadcast together with shapes (3500,) (28,28) 

In [None]:
def crossEntropyLoss(W, b, x, y, reg):
    # Your implementation here

In [None]:
def gradCE(W, b, x, y, reg):
    # Your implementation here


In [25]:
def grad_descent(W, b, trainingData, trainingLabels, alpha, iterations, reg, EPS):
        
    counter = 0
    error = np.zeros(iterations)
    error[0] = MSE(W,b,trainingData,trainingLabels,reg)
    print(("Epoch {0}: Train loss: {1}").format(counter, error[counter]))
    while(error[counter] > EPS or counter < 5000):
        gradW, gradB = gradMSE(W,b,trainingData,trainingLabels,reg)
        W = np.subtract(W,alpha*gradW)
        b = b - alpha*gradB
        counter += 1
        error[counter] = MSE(W,b,trainingData,trainingLabels,reg)
        print(("Epoch {0}: Train loss: {1}").format(counter, error[counter]))
    
    model_path = "training"
    np.savetxt("{}_loss.csv".format(model_path), error)
    return W,b
    

In [24]:
grad_descent(np.ones(shape=(28,28)), np.zeros(3500), x, y, 0.01, 5000, 0, 10**(-7))

Epoch 0: Train loss: 44700.07030074798
Epoch 1: Train loss: 6512.134098651701
Epoch 2: Train loss: 1104.7085706201874
Epoch 3: Train loss: 319.08711226533757
Epoch 4: Train loss: 189.1355514505606
Epoch 5: Train loss: 154.8706486245489
Epoch 6: Train loss: 136.5135919007809
Epoch 7: Train loss: 122.27783927586125
Epoch 8: Train loss: 110.14597492238023
Epoch 9: Train loss: 99.57976037166186
Epoch 10: Train loss: 90.30931435996985
Epoch 11: Train loss: 82.14131278792165
Epoch 12: Train loss: 74.9207074874359
Epoch 13: Train loss: 68.51872101114525
Epoch 14: Train loss: 62.82684356339702
Epoch 15: Train loss: 57.75291875411608
Epoch 16: Train loss: 53.218268605734394
Epoch 17: Train loss: 49.15545764522475
Epoch 18: Train loss: 45.50649953105627
Epoch 19: Train loss: 42.22139245390472
Epoch 20: Train loss: 39.25690946112863
Epoch 21: Train loss: 36.57559167949773
Epoch 22: Train loss: 34.14490569598698
Epoch 23: Train loss: 31.936535197737335
Epoch 24: Train loss: 29.925783264071477
Epoc

KeyboardInterrupt: 

In [None]:
plt.close('all')
plt.scatter(x, y)
plt.xlabel('input (x)')
plt.ylabel('target (t)')

In [None]:
def buildGraph(beta1=None, beta2=None, epsilon=None, lossType=None, learning_rate=None):
    # Your implementation here