In [1]:
from character_mapping import Character_Map 
import theano 
import theano.tensor as T
import numpy as np 

text_test = './../texts/melville.txt'
char_map_obj = Character_Map(text_test,'mapping.dat',overwrite=True, break_line=None)
char_map_obj.k_map()
x_, y_, shared_x, shared_y = char_map_obj.gen_x_and_y(filename=None)

# print(shared_y.get_value().shape[1])
nh = 100
nx = len(char_map_obj.unique_char)
ny = nx 

Time creating character mapping and pickling: 1.9495 sec
Time creating k map 3.319 sec
Time creating arrays: 1.948 sec


In [3]:
import theano 
import theano.tensor as T 
import numpy as np 
from character_mapping import Character_Map
import time
try:
	import cPickle as pickle
except:
	import pickle

class RNN(object):

	def __init__(self, nh, nx, ny):
		"""
		This is only set up for a single hidden layer 
		args:
			nh is size of hidden layer vector 
			nx is the size of the input vector 
			ny is the size of the output vector (ny = nx in character example)
		"""
		
		# self.emb = theano.shared(name='embeddings',
		# 						 value=0.2 * numpy.random.uniform(-1.0, 1.0,
		# 						 (ne+1, de))
		# 						 # add one for padding at the end
		# 						 .astype(theano.config.floatX))

		self.wx = theano.shared(name='wx',
								value=0.2 * np.random.uniform(-1.0, 1.0,
								(nx, nh))
								.astype(theano.config.floatX)) #input weights

		self.wh = theano.shared(name='wh',
								value=0.2 * np.random.uniform(-1.0, 1.0,
								(nh, nh))
								.astype(theano.config.floatX)) #hidden layer weights

		self.wy = theano.shared(name='wy',
							   value=0.2 * np.random.uniform(-1.0, 1.0,
							   (nh, ny))
							   .astype(theano.config.floatX)) #output weights
		
		self.bh = theano.shared(name='bh',
								value=np.zeros(nh,
								dtype=theano.config.floatX)) #hidden layer bias
		
		self.by = theano.shared(name='b',
							   value=np.zeros(ny,
							   dtype=theano.config.floatX)) #output layer bias
		
		self.h0 = theano.shared(name='h0',
								value=np.zeros(nh,
								dtype=theano.config.floatX)) #initial h vector 

		self.sequence_length = 15

	def feed_through(self,x,h_tm1):
		"""
		t_step is the current time step. If t_step == 0, then we use self.h0
		to feed through net.
		basically copied from the theano tutorial
		"""
		h = T.tanh(T.dot(x,self.wx) + T.dot(h_tm1, self.wh) + self.bh)

		y_hat = self.by + T.dot(h,self.wy)

		y_guess = T.nnet.softmax(y_hat) 

		return h, y_guess
	
	def loss(self,x,y):
		"""
		args:
			- x is a vector containing the first character of a sequence 
			- y is a vector containing the last character of the sequence 
			
		***assuming constance sequence length****
		"""
		# y = y[-1]

		# y_intermediate, h = self.feed_through_dean(x,self.h0)
		# y_total = y_intermediate
		# for i in xrange(1,self.sequence_length):
		# 	y_intermediate, h = self.feed_through_dean(y_intermediate,h)
		# 	y_total += y_intermediate

		[h, s], _ = theano.scan(fn=self.feed_through,
						sequences=x,
						outputs_info=[self.h0,None])
						# n_steps=self.sequence_length)
		
		# return -T.mean(T.log(p_y_given_x_sentence)[T.arange(y.shape[0]), y])
		return -T.mean(T.log(s)[T.arange(y.shape[0]), y])

	def save_param(self,pickle_file):

		pickle_me = {
					'param':[self.wx, self.wh, self.wy, self.bh, self.by, self.h0]
		}

		pickle.dump( pickle_me, open(pickle_file, 'wb') )

	def load_param(self,pickle_file):

		pickle_me = pickle.load(open(pickle_file,'rb'))

		param = pickle_me['param']

		self.wx, self.wh, self.wy, self.bh, self.by, self.h0 = param

	def train(self,training_data,learning_rate,n_epochs,mini_batch_size):
		"""
		args:
			- training_data: inputs with ideal 
		"""
		# self.char_sequence_length = char_sequence_length
		train_x, train_y = training_data
		# train_y = T.cast(train_y,'int32')
		train_size_total = train_x.get_value(borrow=True).shape[0]

		# n_train = len(train_x)
		# print(train_size_total, n_train)
		n_train_batches = train_size_total/mini_batch_size

		x = T.matrix('x')
		y = T.imatrix('y')
		index = T.iscalar()

		cost = self.loss(x,y)
		params = [self.wx, self.wh, self.wy, self.bh, self.by, self.h0]
		grads = T.grad(cost,params)
		updates = [(param, param-learning_rate*grad) for param, grad in zip(params,grads)]

		train_model1 = theano.function(
			inputs = [x,y],
			outputs = cost,
			updates = updates
		)
		print("function compiled")
# 		train_model1(train_x.get_value()[0:2],train_y.get_value()[0:2])
		# train_model = theano.function(
		# 	inputs = [index],
		# 	outputs = cost,
		# 	updates = updates,
		# 	givens = {
		# 		x: train_x[index*mini_batch_size: (index+1)*mini_batch_size],
		# 		y: train_y[index*mini_batch_size: (index+1)*mini_batch_size] 
		# 	}
		# )
		# print("Function compiled")
		# for i in xrange(n_epochs):
		# 	t1 = time.time()
		# 	for index in xrange(n_train_batches):
		# 		train_model(index)
		# 		# print("Minibatch done")
		# 	print("Epoch number {}, took {:.3f} sec".format(i,time.time()-t1))
		# 	if i % 5 == 0:
		# 		t2 = time.time()
		# 		self.save_param("param_epoch{}.dat".format(i))
		# 		print("Pickling epoch number {} took {:.3f} sec".format(i, time.time()-t2))


In [7]:
trainer = RNN(nh,nx,ny)

x = T.matrix('x')
y = T.imatrix('y')

[h,s], _= theano.scan(fn=trainer.feed_through,
                     sequences=x,
                     outputs_info=[trainer.h0, None])
                     #non_sequences = [trainer.wx,trainer.wh,trainer.wy,trainer.bh,trainer.by])
# foo = T.log(s)
# fn1 = theano.function([x],s)
# print(fn1(shared_x.get_value()[0]))
    
error = T.log(s)[T.arange(y.shape[0]),y]

fn = theano.function([x,y],error)

fn(shared_x.get_value()[0],shared_y.get_value()[0])


[[[ 0.01091557  0.01346954  0.01023326 ...,  0.01117334  0.00918349
    0.01048125]]

 [[ 0.01185487  0.00872138  0.01160469 ...,  0.01344216  0.01319988
    0.00789976]]

 [[ 0.01562081  0.01086014  0.01994807 ...,  0.01426068  0.01843997
    0.00894905]]

 ..., 
 [[ 0.00699087  0.02011978  0.01191004 ...,  0.00674438  0.00590327
    0.00818914]]

 [[ 0.02325227  0.01286179  0.00688772 ...,  0.00643754  0.01449506
    0.00492894]]

 [[ 0.01275721  0.01069187  0.01666327 ...,  0.01197606  0.01197785
    0.00748668]]]
