# 1. Feed-Foward Neural Network

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from utils import get_q1_data
%matplotlib inline
plt.rcParams['figure.figsize'] = 8,8

In [2]:
X_train, X_test, y_train, y_test, le = get_q1_data()
print("%d training samples, %d test samples"%(X_train.shape[0], X_test.shape[0]))
print("classes:", le.classes_)
print("The first 10 training samples are (with bias):")
print(X_train[:10])

70 training samples, 30 test samples
classes: [b'Iris-versicolor' b'Iris-virginica']
The first 10 training samples are (with bias):
[[1.  5.6 3.  4.1 1.3]
 [1.  5.5 2.5 4.  1.3]
 [1.  5.5 2.6 4.4 1.2]
 [1.  6.1 3.  4.6 1.4]
 [1.  5.8 2.6 4.  1.2]
 [1.  5.  2.3 3.3 1. ]
 [1.  5.6 2.7 4.2 1.3]
 [1.  5.7 3.  4.2 1.2]
 [1.  5.7 2.9 4.2 1.3]
 [1.  6.2 2.9 4.3 1.3]]


### 1. Implement sigmoid function
\begin{align}
sigmoid(x) & = \frac{1}{1+e^{-x}} \\
\end{align}
<img src="Figures/logistic.png">

In [3]:
def sigmoid(x):
    ### TODO: Fill this function with your implementation of sigmoid function ####
    return 1.0 / (1 + np.exp(-x))

### 2. Implement cross entropy
For binary classification for all samples with the output vector o and target label t $\in \{0, 1\}$:
\begin{align}
L(o, t) & = - \sum_{i=1}^n(t^{(i)}log(o^{(i)}) + (1-t^{i})log(1-o^{i})) \\
\end{align}

In [4]:
from math import log

def crossentropy(o,t):
    ### o is the output, t is the target.
    ### TODO: Fill this function with your implementation of crossentropy function for all samples ####
    return -1.0 * np.sum(t * np.log(o) + (1 - t) * np.log(1 - o))

### 3. Initialize weights
For weight initialization, please refer to http://cs231n.github.io/neural-networks-2/#init.

Here we are building a feed forward neural network with 2 hidden units as shown below. 
<img src="Figures/nn.png">

In [5]:
from math import sqrt

J = 2 # number of hidden units
### TODO: Fill the information for weight initialization ###
w1 = np.random.randn(5, 2) / sqrt(5) # initialize weights with calibration between input and hidden layer.
w2 = np.random.rand(3, 1) / sqrt(3) # initialize weights with calibration between hidden and output layer.
n_iter = 10000 # can be modified
alpha = 0.002 # can be modified
train_err = []
test_err = []
dw1_ = []
train_loss = []

In [34]:
w1.shape, w2.shape

((5, 2), (3, 1))

### 4. Implement gradient descent for n iterations.
Implement the update dw1 and dw2 based on your derivations for \begin{align}
\frac{\delta L}{\delta w_2}, 
\frac{\delta L}{\delta w_1}
\end{align}

In [35]:
### TODO: Fill the blanks below for gradient descent ###
for n in range(n_iter):
    # forward computation
    # compute the predictions
    o1 = sigmoid(w1.T.dot(X_train.T))
    temp = np.vstack((np.ones((1, o1.shape[1])), o1)) 
    o2 = sigmoid(w2.T.dot(temp))
    # backward computation to calculate dw1 and dw2
    # compute the loss
    dw1 = (o2 - y_train) * o1.T.dot(1 - o1) * X_train
    dw2 = (o2 - y_train) * o1
    _______________
    _______________
    _______________
    # weight updating
    w1 = w1 + alpha*dw1
    w2 = w2 + alpha*dw2
    # training error
    y_predict = _________
    train_err.append(____) # calculate the error and append to train_err
    # training loss
    train_loss.append(____) # use your crossentropy to calculate the loss
    # test error
    __________________
    __________________
    __________________
    y_predict = __________
    test_err.append(______)

ValueError: operands could not be broadcast together with shapes (70,70) (70,5) 

### 5. Print training loss vs number of iterations

In [None]:
import matplotlib.pyplot as plt
plt.plot(train_loss)
plt.show()

### 6. Print training error and test error

In [None]:
plt.plot(train_err, label="Training error")
plt.plot(test_err, label="Test error")
plt.show()

# 2. Char RNN

In [7]:
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, Activation
from keras import optimizers
from keras.utils.vis_utils import plot_model
from keras import regularizers

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [16]:
model = Sequential()
model.add(SimpleRNN(100, input_shape = (100,1), return_sequences = False))
model.add(Dense(46, activation="softmax"))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn_2 (SimpleRNN)     (None, 50)                2600      
_________________________________________________________________
dense_2 (Dense)              (None, 46)                2346      
Total params: 4,946
Trainable params: 4,946
Non-trainable params: 0
_________________________________________________________________


In [10]:
data = open('tinyshakespeare.txt', 'r').read()
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print('data has %d characters, %d unique.' % (data_size, vocab_size))
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

data has 1115390 characters, 65 unique.


In [11]:
# hyperparameters
hidden_size = 100 # size of hidden layer of neurons
seq_length = 25 # number of steps to unroll the RNN for
learning_rate = 1e-1

# model parameters
Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hidden
Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden
Why = np.random.randn(vocab_size, hidden_size)*0.01 # hidden to output
bh = np.zeros((hidden_size, 1)) # hidden bias
by = np.zeros((vocab_size, 1)) # output bias

In [17]:
Wxh.shape

(100, 65)

# 3. Object Detection