In [1]:
import activations
import layers
import metrics
import model
from sklearn import datasets
import preprocessing
from utils import global_param_init

In [2]:
iris = datasets.load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = preprocessing.train_test_split(X, y, test_size=0.20)

In [3]:
layer_dims = [4, 32, 64, 3]

In [4]:
m = model.feedforward_neuralnet(X_train, y_train, X_test, y_test, layer_dims)

  6%|▌         | 173/3000 [00:00<00:12, 224.72it/s, loss=0.431, train_acc=0.942, val_acc=1]    


Early Stopping at Epoch: 186
Training Finished
Training Accuracy Score: 94.17%
Validation Accuracy Score: 100.00%
Training Logs are saved as a CSV file.
Training Curves are saved as a PNG file.





In [5]:
m.keys()

dict_keys(['params', 'losses', 'training_accuracy', 'validation_accuracy', 'layer_dims', 'activation', 'weight_init', 'dropout_rate', 'learning_rate', 'best_epoch'])

In [9]:
class NN:
    def __init__(self, layer_dims, 
               activation='relu', 
               weight_init='glorot_uniform', 
               dropout_rate=None, 
               learning_rate=0.0495, 
               num_steps=3000, 
               early_stopping=True):
        self.activation = activation
        self.weight_init = weight_init
        self.dropout_rate = dropout_rate
        self.learning_rate = learning_rate
        self.num_steps = num_steps
        self.early_stopping = early_stopping
        self.params = global_param_init(layer_dims, weight_init)
    
    def train(self):
        pass
    
    def predict(self):
        pass
    
    def plot(self):
        pass
    
    def log_csv(self):
        pass

In [12]:
class ANN(NN):
    def print(self):
        print(self.activation)

In [13]:
b = ANN(layer_dims)

In [15]:
b.print()

relu


In [None]:
model = {} # model will be returned as dictionary
losses = [] # initialize loss array
train_accs = [] # initialize training accuracy array
val_accs = [] # initialize validation accuracy array
params = global_param_init(layer_dims, weight_init) # initialize weights
t = tqdm.trange(num_steps) # tqdm object
best_epoch = num_steps # initialize best epoch value

# Training loop
for i in t:
    probs, caches = model_forward(X_train, params, activation, dropout_rate) # forward propagation
    loss = cat_xentropy_loss(probs, y_train) # calculate loss
    grads = model_backward(probs, y_train, caches, activation, dropout_rate) # error backpropagation
    params = update_params(params, grads, learning_rate) # weight updates
    train_acc = predict(X_train, y_train, params, activation) # training accuracy
    val_acc = predict(X_test, y_test, params, activation) # testing accuracy
    t.set_postfix(loss=float(loss), train_acc=train_acc, val_acc=val_acc) # tqdm printing
    losses.append(loss)
    train_accs.append(train_acc)
    val_accs.append(val_acc)
    # Record training logs
    if early_stopping and val_acc > 0.99:
        best_epoch = i
        print()
        print('Early Stopping at Epoch: {}'.format(i))      
        break # stop training if maximum accuracy is achieved
print('Training Finished')
print('Training Accuracy Score: {:.2f}%'.format(train_acc*100))
print('Validation Accuracy Score: {:.2f}%'.format(val_acc*100))

In [None]:
def train():
    model = {} # model will be returned as dictionary
    losses = [] # initialize loss array
    train_accs = [] # initialize training accuracy array
    val_accs = [] # initialize validation accuracy array
    params = global_param_init(layer_dims, weight_init) # initialize weights
    t = tqdm.trange(num_steps) # tqdm object
    best_epoch = num_steps # initialize best epoch value

    # Training loop
    for i in t:
        probs, caches = model_forward(X_train, params, activation, dropout_rate) # forward propagation
        loss = cat_xentropy_loss(probs, y_train) # calculate loss
        grads = model_backward(probs, y_train, caches, activation, dropout_rate) # error backpropagation
        params = update_params(params, grads, learning_rate) # weight updates
        train_acc = predict(X_train, y_train, params, activation) # training accuracy
        val_acc = predict(X_test, y_test, params, activation) # testing accuracy
        t.set_postfix(loss=float(loss), train_acc=train_acc, val_acc=val_acc) # tqdm printing
        losses.append(loss)
        train_accs.append(train_acc)
        val_accs.append(val_acc)
        # Record training logs
        if early_stopping and val_acc > 0.99:
            best_epoch = i
            print()
            print('Early Stopping at Epoch: {}'.format(i))      
            break # stop training if maximum accuracy is achieved
    print('Training Finished')
    print('Training Accuracy Score: {:.2f}%'.format(train_acc*100))
    print('Validation Accuracy Score: {:.2f}%'.format(val_acc*100))

In [10]:
a = NN(layer_dims)

In [11]:
a.learning_rate

0.0495

In [8]:
a.params

{'b1': array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]),
 'W1': array([[-0.10243756,  0.36800669,  0.18942226,  0.08055432, -0.2808596 ,
         -0.2808793 , -0.36082322,  0.29898157,  0.08256006,  0.16989055,
         -0.39144112,  0.38367979,  0.27143828, -0.23487413, -0.25978883,
         -0.25849914, -0.15983554,  0.02021354, -0.05556666, -0.17046069,
          0.09132751, -0.29435203, -0.16971318, -0.1091151 , -0.03586871,
          0.2328452 , -0.24521533,  0.01162237,  0.07545618, -0.37032169,
          0.08781   , -0.26901593],
        [-0.35513389,  0.36651351,  0.38018696,  0.25180538, -0.15953219,
         -0.32849934,  0.15042564, -0.04886528, -0.30860449, -0.00393804,
         -0.38017018,  0.33420871, -0.19695532,  0.13269889, -0.15373726,
          0.01638547,  0.03813878, -0.25731526,  0.38341424,  0.22464501,
          0.35884938,  0.32237518,  0.079935  ,  0.34445887, -0

In [None]:
(X_train, y_train, X_test, y_test, layer_dims, activation='sigmoid', weight_init='glorot_uniform', dropout_rate=None, learning_rate=0.0495, num_steps=3000, early_stopping=True)

In [None]:
import numpy as np

class NeuralNet:

  def __init__(self, layer_dims):
    self.W = None

  def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100,
            batch_size=200, verbose=False):
    """
    Train this linear classifier using stochastic gradient descent.

    Inputs:
    - X: D x N array of training data. Each training point is a D-dimensional
         column.
    - y: 1-dimensional array of length N with labels 0...K-1, for K classes.
    - learning_rate: (float) learning rate for optimization.
    - reg: (float) regularization strength.
    - num_iters: (integer) number of steps to take when optimizing
    - batch_size: (integer) number of training examples to use at each step.
    - verbose: (boolean) If true, print progress during optimization.

    Outputs:
    A list containing the value of the loss function at each training iteration.
    """
    dim, num_train = X.shape
    num_classes = np.max(y) + 1 # assume y takes values 0...K-1 where K is number of classes
    if self.W is None:
      # lazily initialize W
      self.W = np.random.randn(num_classes, dim) * 0.001

    # Run stochastic gradient descent to optimize W
    loss_history = []
    for it in xrange(num_iters):
      X_batch = None
      y_batch = None

      batch_mask = np.random.choice(num_train, batch_size)
      X_batch = X[:,batch_mask]
      y_batch = y[batch_mask]

      # evaluate loss and gradient
      loss, grad = self.loss(X_batch, y_batch, reg)
      loss_history.append(loss)

      # perform parameter update
      step = -learning_rate * grad
      self.W += step

      if verbose and it % 100 == 0:
        print 'iteration %d / %d: loss %f' % (it, num_iters, loss)

    return loss_history

  def predict(self, X):
    """
    Use the trained weights of this linear classifier to predict labels for
    data points.

    Inputs:
    - X: D x N array of training data. Each column is a D-dimensional point.

    Returns:
    - y_pred: Predicted labels for the data in X. y_pred is a 1-dimensional
      array of length N, and each element is an integer giving the predicted
      class.
    """
    y_pred = np.zeros(X.shape[1])
    scores = self.W.dot(X)
    y_pred = np.argmax(scores, axis=0) # top scoring class
    return y_pred
  
  def loss(self, X_batch, y_batch, reg):
    """
    Compute the loss function and its derivative. 
    Subclasses will override this.

    Inputs:
    - X_batch: D x N array of data; each column is a data point.
    - y_batch: 1-dimensional array of length N with labels 0...K-1, for K classes.
    - reg: (float) regularization strength.

    Returns: A tuple containing:
    - loss as a single float
    - gradient with respect to self.W; an array of the same shape as W
    """
    pass


class LinearSVM(LinearClassifier):
  """ A subclass that uses the Multiclass SVM loss function """

  def loss(self, X_batch, y_batch, reg):
    return svm_loss_vectorized(self.W, X_batch, y_batch, reg)


def svm_loss_vectorized(W, X, y, reg):
  """
  Structured SVM loss function, vectorized implementation.

  Inputs and outputs are the same as svm_loss_naive.
  """
  loss = 0.0
  dW = np.zeros(W.shape) # initialize the gradient as zero

  D, num_train = X.shape
  scores = W.dot(X)
  correct_class_scores = scores[y, range(num_train)]
  margins = np.maximum(0, scores - correct_class_scores + 1.0)
  margins[y, range(num_train)] = 0

  loss_cost = np.sum(margins) / num_train
  loss_reg = 0.5 * reg * np.sum(W * W)
  loss = loss_cost + loss_reg
  num_pos = np.sum(margins > 0, axis=0) # number of positive losses

  dscores = np.zeros(scores.shape)
  dscores[margins > 0] = 1
  dscores[y, range(num_train)] = -num_pos

  dW = dscores.dot(X.T) / num_train + reg * W

  return loss, dW


class Softmax(LinearClassifier):
  """ A subclass that uses the Softmax + Cross-entropy loss function """

  def loss(self, X_batch, y_batch, reg):
    return softmax_loss_vectorized(self.W, X_batch, y_batch, reg)


def softmax_loss_vectorized(W, X, y, reg):
  """
  Softmax loss function, vectorized version.

  Inputs and outputs are the same as softmax_loss_naive.
  """
  # Initialize the loss and gradient to zero.
  loss = 0.0
  dW = np.zeros_like(W)

  D, num_train = X.shape
  scores = W.dot(X) # C x N

  scores -= np.max(scores, axis = 0)
  p = np.exp(scores)
  p /= np.sum(p, axis = 0)

  loss_cost = -np.sum(np.log(p[y, range(y.size)])) / num_train
  loss_reg = 0.5 * reg * np.sum(W * W)
  loss = loss_cost + loss_reg

  dscores = p
  dscores[y, range(y.size)] -= 1.0
  dW = dscores.dot(X.T) / num_train + reg * W

  return loss, dW