# Theano ANN basic pattern  

```
import theano
import theano.tensor as T  

# input layer NxD 
# hidden layer DxM 
# output layer MxK 

# Initial value for weights/biases
W1_init = np.randam.randn(D,M) / np.sqrt(D) 
b1_init = np.zero(M)
W2_init = np.random.randn(M,K) / np.sqrt(M) 
b2_init = np.zero(K)

# Input/Target as Theano Matrix 
thX = T.matrix('X') 
thT = T.matrix('T')

# Weights/biases as Theano Shared variable, with initial value specified
W1 = theano.shared(W1_init, 'W1') 
b1 = theano.shared(b1_init, 'b1') 
W2 = theano.shared(W2_init, 'W2') 
b2 = theano.shared(b2_init, 'b2') 

# Network model 
thZ = T.nnet.relu( thX.dot(W1) + b1 ) 
thY = T.nnet.softmax( thZ.dot(W2) + b2 ) 

# Define cost model (cross entropy) 
# reg is regularization factor (L2 regularization in this case)  
# Since Theano can solve derivetive, we define simply cost function with regularization penalty.  
cost = - (thT * T.log(thY)).sum() + reg*( (W1*W1).sum() + (b1*b1).sum() + (W2*W2).sum() + (b2*b2)+sum() ) 

# Define prediction logic 
prediction = T.argmax(thY, axis=1) 

# Training expressions and functions  
# lr is learning rate 
update_W1 = W1 - lr*T.grad(cost, W1) 
update_b1 = b1 - lr*T.grad(cost, b1) 
update_W2 = W2 - lr.T.grad(cost, W2) 
update_b2 = b2 - lr*T.grad(cost, b2) 

# Define train function 
train = theano.function(
    inputs=[thX, thY],  
    updates=[(W1, update_W1), (b1, update_b1), (W2, update_W2), (b2, update_b2) ], 
    )
    
# Define predict function, which generates predication and cost 
get_predict = theano.function( 
    inputs=[thX, thT], 
    outputs=[cost, prediction], 
    ) 
    
# Training Loop 
for _ in range(1000): 
    
    # take Xbatch, Ybatch as numpy array, and run train function 
    train( Xbatch, Ybatch) 
    
    # run get_predict function (with validation data) 
    cost_val, prediction_val = get_predict( Xtest, Ytest ) 
    

```
