# CrossEntropyWithSoftmax Solution

In [None]:
class CrossEntropyWithSoftmax(Node):
    def __init__(self, x, y):
        Node.__init__(self, [x,y])

    def _predict(self):
        probs = self._softmax(self.input_nodes[0].value)
        return np.argmax(probs, axis=1)

    def _accuracy(self):
        preds = self._predict()
        return np.mean(preds == self.input_nodes[1].value)
    
    def _softmax(self, x):
        exp_x = np.exp(x)
        probs = exp_x / np.sum(exp_x, axis=1, keepdims=True)
        return probs

    def forward(self):
        probs = self._softmax(self.input_nodes[0].value)
        y = self.input_nodes[1].value
        self.cache[0] = np.copy(probs)
        self.cache[1] = np.copy(y)
        n = probs.shape[0]
        logprobs = -np.log(probs[range(n), y])
        self.value = np.sum(logprobs) / n

    # we know this is a loss so we can be a bit less generic here
    # should have 0 output nodes
    def backward(self):
        assert len(self.output_nodes) == 0
        self.dvalues = {n: np.zeros_like(n.value) for n in self.input_nodes}
        # combined derivative of softmax and cross entropy
        dprobs = self.cache[0]
        y = self.cache[1]
        n = dprobs.shape[0]
        dprobs[range(n), y] -= 1
        dprobs /= n
        # leave the gradient for the 2nd node all 0s, we don't care about the gradient
        # for the labels
        self.dvalues[self.input_nodes[0]] = dprobs

## Forward Pass


## Backward Pass