In [None]:
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.neural_network import MLPClassifier
import numpy as np
import matplotlib.pyplot as plt
import random
import math

In [None]:
class MLPClassifier(BaseEstimator,ClassifierMixin):

    def __init__(self, numParameters, outputNodes, lr=.1, momentum=0, deterministic=None, shuffle=True,hidden_layer_widths=None):
        """ Initialize class with chosen hyperparameters.

        Args:
            lr (float): A learning rate / step size.
            shuffle(boolean): Whether to shuffle the training data each epoch. DO NOT SHUFFLE for evaluation / debug datasets.
            momentum(float): The momentum coefficent 
        Optional Args (Args we think will make your life easier):
            hidden_layer_widths (list(int)): A list of integers which defines the width of each hidden layer if hidden layer is none do twice as many hidden nodes as input nodes.
        Example:
            mlp = MLPClassifier(lr=.2,momentum=.5,shuffle=False,hidden_layer_widths = [3,3]),  <--- this will create a model with two hidden layers, both 3 nodes wide
        """
        self.layer_widths = hidden_layer_widths.append(outputNodes)
        self.layers = len(self.layer_widths)
        self.outputNodes = outputNodes
        self.lr = lr
        self.momentum = momentum
        self.shuffle = shuffle
        self.deterministic = deterministic


    def _sigmoid(x):
      return 1/(1+math.e**-x)

    assert _sigmoid(0) == .5

    def _sigmoid_prime(x):
      return x*(1-x) 

    def forwardPass(self, io):
      for x in range(self.layers):
        outputs = []
        currWeights = self.weights[x]
        for y in range(self.layer_widths[x]-1):
          net = np.dot(currWeights[:,y], io[x])
          outputs.append(sigmoid(net))

        outputs.append(1)
        io.append(outputs)

    def outputDeltas(self, outputs, t):
      deltas = []
      for x in range(self.outputNodes):
        delta = (t-outputs[x])*_sigmoid_prime(outputs[x])
        deltas.append(delta)

      return deltas

    def hiddenDeltas(self, outputs, deltas, layer):
      new_deltas = []
      for x in range(self.layer_widths[layer]):
        delta = _sigmoid_prime(outputs[x])*np.dot(deltas, self.weights[layer][x,:])
        new_deltas.append(delta)

      return new_deltas

    def addMatrices(weight_changes):
      for x in range(len(weight_changes)):
        self.weights[x] = [map(sum,zip(*t)) for t in zip(weight_changes[x], self.weights[x])]

    def backwardPass(self, io, t):
      o_deltas = outputDeltas(io[self.layers], t)
      weight_changes = self.weights
      prevLayerOutput = io[self.layers-1]
      for x in range(self.outputNodes):
        weight_changes[self.layers-1][:,x] = (self.lr*prevLayerOutput*o_deltas[x]) + (self.momentum*self.weights[self.layers-1][:,x])
      
      hl_deltas = []
      deltas = hiddenDeltas(io[self.layers-1], o_deltas, self.layers-2)
      for hl in range(self.layers-3, 0, -1):
        deltas = hiddenDeltas(io[hl], deltas, hl)
        hl_deltas.append(deltas)

      for hl in range(self.layers-2):
        for node in range(self.layer_widths[hl]):
          weight_changes[hl][:,node] = self.lr*io[hl]*hl_deltas[node] + (self.momentum*self.weights[][:,x])

      addMatrices(weight_changes)
      
    def fit(self, X, y, initial_weights=None):
        """ Fit the data; run the algorithm and adjust the weights to find a good solution

        Args:
            X (array-like): A 2D numpy array with the training data, excluding targets
            y (array-like): A 2D numpy array with the training targets
        Optional Args (Args we think will make your life easier):
            initial_weights (array-like): allows the user to provide initial weights
        Returns:
            self: this allows this to be chained, e.g. model.fit(X,y).predict(X_test)

        """
        self.initial_weights = self.initialize_weights() if not initial_weights else initial_weights
        self.weights = self.initial_weights


        if self.deterministic == 10:
          print("Starting 10 epochs of training...")
          for i in range(10):
            for data in range(X.shape[0]):
              io = [data]
              forwardPass(io)
              backwardPass(io, y[data])
              
        # Foward pass: call on each datapoint. Gives me the outputs.
        #   Loop that goes through each layer of weights.
        #   Have a list to keep track of the inputs and the outputs. 
        # Backward pass: use those outputs to get changes to the weights.
                 

        return self

    def predict(self, X):
        """ Predict all classes for a dataset X
        Args:
            X (array-like): A 2D numpy array with the training data, excluding targets
        Returns:
            array, shape (n_samples,)
                Predicted target values per element in X.
        """
        pass

    def initialize_weights(self):
        """ Initialize weights for perceptron. Don't forget the bias!

        Returns:

        """
        weights = [np.random.normal(0,1,size=(self.numParameters, self.layer_widths[0]-1))]
        for x in len(self.layer_widths - 1):
          weights.append(np.random.normal(0,1,size=(self.layer_widths[x], self.layer_widths[x+1])))

        return weights

    def score(self, X, y):
        """ Return accuracy of model on a given dataset. Must implement own score function.

        Args:
            X (array-like): A 2D numpy array with data, excluding targets
            y (array-like): A 2D numpy array with targets

        Returns:
            score : float
                Mean accuracy of self.predict(X) wrt. y.
        """

        results = self.predict(X)

        numCorrect = 0
        for i in range(len(y)):
          if results[i] == y[i]:
            numCorrect += 1

        return numCorrect/y.shape[0]


    def _shuffle_data(self, X, y):
        """ Shuffle the data! This _ prefix suggests that this method should only be called internally.
            It might be easier to concatenate X & y and shuffle a single 2D array, rather than
             shuffling X and y exactly the same way, independently.
        """

        random.shuffle(X)
        random.shuffle(y)

    ### Not required by sk-learn but required by us for grading. Returns the weights.
    def get_weights(self):
        pass