In [1]:
import torch
import numpy as np
from torch import optim
import torch.nn as nn
import time
import math
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

import random

import AttnEncoder
# Global defs

# iters_per_epoch should also be shifted here ?

%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from torch.autograd import Variable
use_cuda = torch.cuda.is_available()

In [3]:
num_samples = 1
batch_size = 32

MASTER_MAX_LEN = 300
MASTER_MAX_VAL_LEN = 400

In [4]:
# Function defs
def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)

def asMinutes(s):
	m = math.floor(s / 60)
	s -= m * 60
	return '%dm %ds' % (m, s)

def timeSince(since, percent):
	now = time.time()
	s = now - since
	es = s / (percent + 1e-8)
	rs = es - s
	return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [5]:
# ** Starting of the Main code 
# ** ** 
# ** ** 
import re

# Convert string to vector of floats
def convert_to_float(string): # string with float values separated by spaces
	lis = string.split()
	lis_rating = [ float(value) for value in lis]
	return lis_rating

# Unique index for words
index = 0
def get_index():
	global index
	to_ret = index
	index += 1
	return to_ret

# Dictionaries
dict_ind2vec = {}
dict_ind2str = {}
dict_str2ind = {}

def get_list_of_indices(string):
	lis_words = string.split()
	# lis_ret = [ for word in lis_words]
	lis_ret = []
	for word in lis_words:
		try:
			ind_append = dict_str2ind[word]
			lis_ret.append(ind_append)
		except:
			pass
			# ind_append = 
			# print("THERE IT IS!", word)
	# print("About to return")
	return lis_ret

## **
## **
# read the word2vec representations

with open('../review+wiki.filtered.200.txt') as f:
	wordvecs = f.readlines()

first_pair = wordvecs[0].split(" ", 1)
first_vec = convert_to_float(first_pair[1])
dim_vecSpace = len(first_vec) # Dimension of the vector space in which we are

# add stuff for EOS, Blank
# at index = 0, 1

eos_index = get_index()
dict_str2ind["<EOS>"] = eos_index
dict_ind2str[eos_index] = "<EOS>"
dict_ind2vec[eos_index] = [1.0]*dim_vecSpace

blk_index = get_index()
dict_str2ind["<BLANK>"] = blk_index
dict_ind2str[blk_index] = "<BLANK>"
dict_ind2vec[blk_index] = [0.0]*dim_vecSpace


for elem in wordvecs:
	liss = elem.split(" ", 1) # split on the first space
	word_str = liss[0]
	word_vec = convert_to_float(liss[1])
	
	here_index = get_index()
	dict_str2ind[word_str] = here_index
	dict_ind2str[here_index] = word_str
	dict_ind2vec[here_index] = word_vec

# CHKING
# print( dict_str2ind['a'] )

## **
## **
# read the data

with open('../reviews.aspect0.train.txt') as f:
	train_data = f.readlines()

rating_regex = re.compile('\d\.\d\d \d\.\d\d \d\.\d\d \d\.\d\d \d\.\d\d\t') # Exactly matches only the ratings

# extract ratings - # each rating is a scalar value # NO ::: each rating is a list of 5 values
ratings = [ float( re.findall(rating_regex, review)[0][0] ) for review in train_data ]

# extract reviews
reviews_str = [ rating_regex.sub('', review) for review in train_data ]
reviews = [ get_list_of_indices( review_str ) for review_str in reviews_str ]
X = reviews
total_size = len(X)

divide_train = int( (4*total_size)/5 )
train_indices_of_X = sorted( random.sample( range(total_size), divide_train ) )

X_train = []
X_val = []
ratings_train = []
ratings_val = []
for i in range(total_size):
	if i in train_indices_of_X:
		X_train.append(X[i])
		ratings_train.append(ratings[i])
	else:
		X_val.append(X[i])
		ratings_val.append(ratings[i])

X = X_train
ratings = ratings_train

num_train_examples = len(X) # we also assume len(X) = len(ratings)
num_val_examples = len(X_val)
# read validation data

# ** ** 
# ** ** 

In [6]:
def getAccuracy(X, ratings, attn_encoder):

    # iterate through X_val and pass to generator->encoder to get mse_error and compare it to truth
    num_val_examples = len(X)
    X_val_size = num_val_examples
    num_iters = X_val_size // (batch_size)
    total_loss = 0.0
    for iters in range(num_iters):

        # get X_batch, ratings_batch
        # This sampling also preserves the order
        X_bch = []
        ratings_bch = []

        _ = [ ( X_bch.append(X[i]) , ratings_bch.append(ratings[i]) ) for i in sorted(random.sample(range(num_val_examples), batch_size)) ]

        # almost done here - make all the reviews of equal length now

        maxlen_rev = max(X_bch, key=len)
        maxlen = len(maxlen_rev)

        max_seq_len = min(maxlen, MASTER_MAX_VAL_LEN)
        
        
        X_bach = np.empty([batch_size,max_seq_len])
        ratings_bach = np.empty([batch_size,1])
        
        encoderLoss = nn.MSELoss(reduce=False)

        for iterr in range(batch_size):
            currentlen = len(X_bch[iterr])
            if (currentlen < max_seq_len):
                zero_count = max_seq_len - currentlen
                X_bch[iterr].extend([0]*zero_count)
            else:
                X_bch[iterr] = X_bch[iterr][0:max_seq_len]
            # X_bch[iterr] is now a list containing indices of words
            # Convert it into a Variable ?
            to_append = np.array( X_bch[iterr] )
            X_bach[iterr] = to_append
            to_append = np.array( ratings_bch[iterr] )
            ratings_bach[iterr] = to_append
        # X_bach is a 2d numpy array of size :: batch_size X maxlen

        if (use_cuda):
            X_bach_tensor = torch.from_numpy(X_bach).type(torch.LongTensor).cuda()
            ratings_bach_tensor = torch.from_numpy(ratings_bach).type(torch.FloatTensor).cuda()
        else:
            X_bach_tensor = torch.from_numpy(X_bach).type(torch.LongTensor)
            ratings_bach_tensor = torch.from_numpy(ratings_bach).type(torch.FloatTensor)

        X_batch = Variable(X_bach_tensor)
        ratings_batch = Variable(ratings_bach_tensor)

#         init_hidden = generator.initHidden(batch_size, use_cuda)
#         z_sample = generator.sample(X_batch, init_hidden, use_cuda)

        attn_weights, ratings_pred = attn_encoder(X_batch, use_cuda, False)
        encoder_loss = encoderLoss(ratings_pred, ratings_batch.squeeze(1))

        total_loss += float(torch.sum(encoder_loss))

    return total_loss / X_val_size

In [7]:
# Train function - here's some ingenuity
# one iteration of training
def train(X, ratings, attn_encoder, attn_encoder_optimizer, print_grad_norm):
    # X - single batch

    attn_encoder_optimizer.zero_grad()

    encoderLoss = nn.MSELoss(reduce=False)

    mean_cost = 0.0

    attn_weights, ratings_pred = attn_encoder(X, use_cuda)
    encoder_loss = encoderLoss(ratings_pred, ratings.squeeze(1))

    cost = encoder_loss
       
    cost1 = torch.mean(cost)

    mean_cost += float(cost1)

    cost1.backward()
        
        
    if (print_grad_norm):
        for name, param in attn_encoder.named_parameters():
            if param.requires_grad:
                print(name)
                print(param.data.norm())
                print (param.grad.data.norm())
                
        input()
    
    attn_encoder_optimizer.step()
    
    return mean_cost

In [11]:
def trainIters(X, ratings, X_val, ratings_val, attn_encoder, learning_rate, learning_rate_decay, num_epochs, \
                load_dict=None, print_every=1000, plot_every=100, val_every=1000, print_grad_every=-1, \
                save_folder='', weight_decay=0):

    
    num_train_examples = len(X)
    start = time.time()
    plot_losses = []
    print_loss_total = 0.0
    plot_loss_total = 0.0
    best_val_acc = float("inf")
    if load_dict is not None:
        attn_encoder = load_dict['attn_encoder_model']
        
        cur_tot_iters = load_dict['tot_iter']
    else:
        cur_tot_iters = 0
        
    attn_enc_param_list = []
    for param in attn_encoder.parameters():
        if (param.requires_grad):
            attn_enc_param_list.append(param)
    attn_encoder_optimizer = optim.Adam(attn_enc_param_list, lr=learning_rate, weight_decay=weight_decay)    
        
    attn_encoder_scheduler = optim.lr_scheduler.StepLR(attn_encoder_optimizer, 1, learning_rate_decay)
    
    if (load_dict is not None):
        attn_encoder_optimizer.load_state_dict(load_dict['attn_encoder_optimizer'])
        for param_group in attn_encoder_optimizer.param_groups:
            param_group['weight_decay'] = weight_decay
            param_group['lr'] = learning_rate
            
        attn_encoder_scheduler.load_state_dict(load_dict['attn_encoder_scheduler'])

#     print(encoder_optimizer.param_groups[0]['lr'])
#      set iters_per_epoch
    iters_per_epoch = num_train_examples // batch_size
    n_iters = iters_per_epoch * num_epochs
    
    position_set = False
    for epoch in range(num_epochs):
        if (position_set):
            pass
#             encoder_scheduler.step()
#             generator_scheduler.step()
        for iter_num in range(iters_per_epoch):
            if (cur_tot_iters >= epoch * iters_per_epoch + iter_num + 1):
                continue
            
            position_set = True
            # randomly choose sample from X and make them equal length
            # This sampling also preserves the order
            X_bch = []
            ratings_bch = []

            _ = [ ( X_bch.append(X[i]) , ratings_bch.append(ratings[i]) ) for i in sorted(random.sample(range(num_train_examples), batch_size)) ]

            # almost done here - make all the reviews of equal length now

            maxlen_rev = max(X_bch, key=len)
            maxlen = len(maxlen_rev)

            max_seq_len = min(maxlen, MASTER_MAX_LEN)
            
            X_bach = np.empty([batch_size,max_seq_len])
            ratings_bach = np.empty([batch_size,1])

            for iterr in range(batch_size):
                currentlen = len(X_bch[iterr])
                if (currentlen < max_seq_len):
                    zero_count = max_seq_len - currentlen
                    X_bch[iterr].extend([0]*zero_count)
                else:
                    X_bch[iterr] = X_bch[iterr][0:max_seq_len]
                # X_bch[iterr] is now a list containing indices of words
                # Convert it into a Variable ?
                to_append = np.array( X_bch[iterr] )
    # 				X_bach = np.append(X_bach, [to_append], axis = 0)
                X_bach[iterr] = to_append
                to_append = np.array( ratings_bch[iterr] )
    # 				ratings_bach = np.append(ratings_bach, to_append, axis = 0)
                ratings_bach[iterr] = to_append
            # X_bach is a 2d numpy array of size :: batch_size X maxlen

            if (use_cuda):
                X_bach_tensor = torch.from_numpy(X_bach).type(torch.LongTensor).cuda()
                ratings_bach_tensor = torch.from_numpy(ratings_bach).type(torch.FloatTensor).cuda()
            else:
                X_bach_tensor = torch.from_numpy(X_bach).type(torch.LongTensor)
                ratings_bach_tensor = torch.from_numpy(ratings_bach).type(torch.FloatTensor)
            X_batch = Variable(X_bach_tensor)
            ratings_batch = Variable(ratings_bach_tensor)
            # call train with this batch
            cur_tot_iters = iter_num + 1 + epoch * iters_per_epoch
            if (print_grad_every > 0 and cur_tot_iters % print_grad_every == 0):
                cur_loss = train(X_batch, ratings_batch, attn_encoder, \
                                                   attn_encoder_optimizer, True)
            else:
                cur_loss = train(X_batch, ratings_batch, attn_encoder, \
                                                   attn_encoder_optimizer, False)
            
            print_loss_total += cur_loss
            plot_loss_total += cur_loss

            
          
            if (cur_tot_iters) % print_every == 0:
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0
                print('%s (%d %d%%) %.4f' % (timeSince(start, 1.0 * (cur_tot_iters) / n_iters),
                                             cur_tot_iters, 1.0 * (cur_tot_iters) / n_iters * 100, print_loss_avg),flush=True)
            
            if (cur_tot_iters) % val_every == 0:
                val_acc = getAccuracy(X_val, ratings_val, attn_encoder)
                print("Val Acc: ", val_acc)
                if (val_acc < best_val_acc):
                    best_val_acc = val_acc
                    best = True
                else:
                    best = False
                    
                save_dict = {}
                save_dict['attn_encoder_model'] = attn_encoder
                save_dict['attn_encoder_optimizer'] = attn_encoder_optimizer.state_dict()
                save_dict['attn_encoder_scheduler'] = attn_encoder_scheduler.state_dict()
                
                save_dict['tot_iter'] = cur_tot_iters
                save_dict['val_acc'] = val_acc
                save_dict['best_so_far'] = best
                torch.save(save_dict, save_folder+'chkpt_'+str(cur_tot_iters)+str(best))

            if iter_num % plot_every == 0:
                plot_loss_avg = plot_loss_total / plot_every
                plot_losses.append(plot_loss_avg)
                plot_loss_total = 0

    showPlot(plot_losses)

In [12]:
# defining pretrained_embeddings
pretrained_embeddings = np.empty([len(dict_ind2vec), dim_vecSpace])
for key in sorted(dict_ind2vec.keys()):
    vec_here = dict_ind2vec[key]
    pretrained_embeddings[key] = np.array(vec_here)

In [13]:
if use_cuda:
    print("Using GPU")
else:
    print("Using CPU")

Using GPU


In [None]:
# Initialing hyperparam containers
learning_rates = [0.00025883]
length_regs = [0.0003]
continuity_regs = [0.0006]
learning_rate_decays = [1]
weight_regs = [0]
num_epochs = 60

load_dict=None
for lrate_decay in learning_rate_decays:
    for length_reg in length_regs:
        for continuity_reg in continuity_regs:
            for l_rate in learning_rates:
                for wt_reg in weight_regs:
                    load_dict = torch.load('test1_' + str(wt_reg) + '/chkpt_60000False')
                    attn_encoder = AttnEncoder.AttnEncoder(pretrained_embeddings, 200, 2, 50, 'LSTM', dropout=0.1)
                    attn_encoder.float()

                    if (use_cuda):
                        attn_encoder.cuda()
                    save_folder = 'test1_' + str(wt_reg) + '/'
                    trainIters(X, ratings, X_val, ratings_val, attn_encoder, 
                                learning_rate=l_rate, learning_rate_decay=lrate_decay, num_epochs=num_epochs, \
                                print_every=100,val_every=1000,load_dict=load_dict, print_grad_every=-1, \
                              save_folder=save_folder,weight_decay=wt_reg)

  hidden, (_, _) = self.lstm_i2h(x_transpose)


0.4.0
