In [1]:
import torch
import numpy as np
from pathlib import Path
import glob, os
from tqdm import tqdm
import pandas as pd
from torch.utils import data
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions import Categorical

In [2]:
PAD = 0
UNK = 1
BOS = 2
EOS = 3

PAD_WORD = '<blank>'
UNK_WORD = '<unk>'
BOS_WORD = '<s>'
EOS_WORD = '</s>'


def np_encode_string(s, char0 = ord(' ')):
    """converts a string into a numpy array of bytes
    (char0 - 1) is subtracted from all bytes values (0 is used for PAD)
    string is pre-pended with BOS and post-pended with EOS"""
    chars = np.array(list(s), dtype='S1').view(np.uint8)
    # normalize to 1 - 96, 0 being PAD
    chars = chars - char0 + 1

    chars = np.insert(chars, 0, BOS)
    chars = np.insert(chars, len(chars), EOS)
    return chars

def np_decode_string(chars, char0 = ord(' ')):
    """converts a numpy array of bytes into a UTF-8 string
    (char0 - 1) is added to all bytes values (0 is used for PAD)
    BOS/EOS are removed before utf-8 decoding"""
    chars = chars.astype(np.uint8)
    chars = chars + char0 - 1
    chars = chars[:-1]
    chars = chars.tobytes()
    s = chars.decode('UTF-8')
    return s

class LazyFileMathDataset(data.Dataset):
    """Stream loads math dataset file in a lazy way (optional)
    pandas is used for naive streaming as Python doesn't provide any better tool for that critical feature"""
    def __init__(self, file, lazy_load=False, max_elements=None, log=False):
        self.file = Path(file)
        self.lazy_load = lazy_load
        self.max_elements = max_elements

        fn = self.file.name.replace(".txt", "")
        self.category, self.module = fn.split("__")

        if not self.lazy_load:
            self.build_dataset()
            if log:
                print(f"Initialized MathDataset with file {self.file} (category:{self.category}, module:{self.module}) containing {self.qas.shape[0]} pairs of questions/answers")
        else:
            self.qas = None
            if log:
                print(f"Initialized MathDataset with file {self.file} (category:{self.category}, module:{self.module}) in lazy mode")

      
    def _read_build_dataset(self):
        self.df = pd.read_csv(self.file, header=None, sep='\n', names=['qa'], engine='c')
        self._build_dataset()
    
    def _build_dataset(self):
        if self.max_elements is not None:
            self.df_max = self.df.iloc[0:self.max_elements*2]
        else:
            self.df_max = self.df
        self.questions = self.df_max[0::2]
        self.questions.reset_index(inplace=True, drop=True)
        self.questions.rename(columns={ "qa" : "questions" }, inplace=True)
        self.answers = self.df_max[1::2]
        self.answers.reset_index(inplace=True, drop=True)
        self.answers.rename(columns={ "qa" : "answers" }, inplace=True)
        self.qas = pd.concat([self.questions, self.answers], axis=1)
        
    def set_max_elements(self, max_elements):
        self.max_elements = max_elements
        if self.qas is None:
            self._read_build_dataset()
        else:
            self._build_dataset()
        
    def __getitem__(self, idx):
        if self.qas is None:
            self._read_build_dataset()            
        question, answer = self.qas.iloc[idx]
        return {
            "q": question, "q_enc": np_encode_string(question),
            "a": answer, "a_enc": np_encode_string(answer),
        }

    def __len__(self):
        if self.qas is None:
           self._read_build_dataset() 
        return self.qas.shape[0]
    

class MathDatasetManager(data.Dataset):
    """A Math Dataset manager starting at root directory (like v1.0) to extract files and build torch datasets
    in a lazy loading and streamed way based on specific types/categories/modules presented in paper.
    
    It indexes difficulty/use-case types:
        - train-easy
        - train-medium
        - train-hard
        - interpolate
        - extrapolate
    
    and all categories:
        - algebra
        - numbers
        - polynomials
        - arithmetic
        - measurement
        - comparison
        - probability
        - calculus
        
    and all modules in those categories:
        - mul
        - add_or_sub_in_base
        - simplify_surd
        - mul_div_multiple
        - mixed
        - nearest_integer_root
        - div
        - add_or_sub
        - add_sub_multiple
        - add_sub_multiple_longer
        - mul_div_multiple_longer
        - div_big
        - mul_big
        - mixed_longer
        - add_or_sub_big
        - etc...
    """
    def __init__(self, root_dir, log=False):
        self.root_dir = Path(root_dir)

        self.dirs = {
            "train-easy" : self.root_dir / "train-easy",
            "train-medium" : self.root_dir / "train-medium",
            "train-hard" : self.root_dir / "train-hard",
            "interpolate" : self.root_dir / "interpolate",
            "extrapolate" : self.root_dir / "extrapolate",
        }
        
        self.dfs = {}
                
        for k, dir in self.dirs.items():
            files = [ff for ff in glob.glob(str(dir) + "/**/*.txt", recursive=True)]
            for f in files:
                ds = LazyFileMathDataset(f, lazy_load = True, log=log)
                if ds.category not in self.dfs:
                    self.dfs[ds.category] = {}
                if ds.module not in self.dfs[ds.category]:
                    self.dfs[ds.category][ds.module] = {
                        "easy" : {}, "medium" : {}, "hard" : {},
                        "interpolate": {}, "extrapolate": {}
                    }

                self.dfs[ds.category][ds.module][k] = ds                    

        print(f"initialized MultiFilesMathDataset with categories {list(self.dfs.keys())} and types {list(self.dirs.keys())}")

    def get_types(self):
        """retrieves all math typesfor this multi-file dataset"""
        return self.dirs.keys()            
        
    def get_categories(self):
        """retrieves all math problem categories in this multi-file dataset"""
        return self.dfs.keys()
    
    def get_modules_for_category(self, c):
        """retrieves all mathematical modules in a math problem category"""
        return self.dfs[c].keys()
    
    def _build_datasets_from_category(self, category, typ, max_elements=None):
        ds = []
        for k, m in self.dfs[category].items():
            if typ in m:
                m[typ].set_max_elements(max_elements)
                ds.append(m[typ])
                print(f"added module {category}/{k}/{typ}")
        return ds
        
    def build_dataset_from_category(self, category, typ, max_elements=None):
        """Build a dataset for all modules in a category"""
        print(f"adding category {category}/../{typ}")
        ds = self._build_datasets_from_category(category, typ, max_elements=max_elements)
        return data.ConcatDataset(ds)
    
    def build_dataset_from_categories(self, categories, typ, max_elements=None):
        """Build a dataset for all modules in several categories"""
        ds = []
        for c in categories:
            print(f"adding category {c}/../{typ}")
            dss = self._build_datasets_from_category(c, typ, max_elements=max_elements)
            ds.extend(dss)
        return data.ConcatDataset(ds)

    def build_dataset_from_module(self, category, module, typ, max_elements=None):
        """Build a dataset from a single module in a category"""
        self.dfs[category][module][typ].set_max_elements(max_elements)
        return self.dfs[category][module][typ]

    def build_dataset_from_modules(self, category, modules, typ, max_elements=None):
        """Build a dataset from several modules in a category"""
        ds = []
        for module in modules:
            self.dfs[category][module][typ].set_max_elements(max_elements)
            ds.append(self.dfs[category][module][typ])
        return data.ConcatDataset(ds)
    
    def build_dataset_from_categories_and_types(self, categories, types, max_elements=None):
        """Build a dataset for all modules in several categories"""
        ds = []
        for c in categories:
            for typ in types:
                try:
                    print(f"adding category {c}/../{typ}")
                    dss = self._build_datasets_from_category(c, typ, max_elements=max_elements)
                    ds.extend(dss)
                except:
                    continue
        return data.ConcatDataset(ds)
    
    
def question_answer_to_batch_collate_fn(qas):
    ''' Gather + Pad the question/answer to the max seq length in batch '''

    max_q_len = max(len(qa["q_enc"]) for qa in qas)
    max_a_len = max(len(qa["a_enc"]) for qa in qas)

    batch_qs = []
    batch_as = []
    batch_pos = []
    for qa in qas:
      batch_qs.append(np.pad(qa["q_enc"], (0, max_q_len - len(qa["q_enc"])), mode='constant', constant_values=PAD))
      batch_as.append(np.pad(qa["a_enc"], (0, max_a_len - len(qa["a_enc"])), mode='constant', constant_values=PAD))
    
    batch_qs = torch.LongTensor(batch_qs)
    batch_as = torch.LongTensor(batch_as)

    return batch_qs, batch_as

In [3]:
mdsmgr = MathDatasetManager("mathematics_dataset-v1.0")

# ds = mdsmgr.build_dataset_from_categories_and_types(list(mdsmgr.get_categories())[:1], list(mdsmgr.get_types())[:1])
ds = mdsmgr.build_dataset_from_module('arithmetic', 'add_or_sub', 'train-easy')

initialized MultiFilesMathDataset with categories ['numbers', 'algebra', 'probability', 'arithmetic', 'comparison', 'polynomials', 'measurement', 'calculus'] and types ['train-easy', 'train-medium', 'train-hard', 'interpolate', 'extrapolate']


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(**kwargs)


In [4]:
train_loader = data.DataLoader(
    ds, batch_size=128, shuffle=True,#, num_workers=1,#num_workers=4,
    collate_fn=question_answer_to_batch_collate_fn)

In [5]:
for i in train_loader:
    batch_qs, batch_as = i
    break

In [8]:
# from transformer.Models import Transformer
import transformer
# ASCII CHARS
VOCAB_SIZE = 95
# questions have less than 160 chars
MAX_QUESTION_SZ = 160
# answers have less than 30 chars
MAX_ANSWER_SZ = 30

num_layers = 1
num_heads = 1
key_dimension = 16
value_dimension = 16
dropout = 0.1
n_position = 160
d_char_vec = 32
model_dimension = 32
inner_dimension = 124
n_trg_position = MAX_ANSWER_SZ
n_src_position = MAX_QUESTION_SZ


model = transformer.Models.Transformer(n_src_vocab=VOCAB_SIZE + 1, n_trg_vocab=VOCAB_SIZE+1, src_pad_idx=0, trg_pad_idx=0,
                   d_char_vec=d_char_vec, d_model=d_char_vec, d_inner=inner_dimension, n_layers=num_layers,
                   n_head=num_heads, d_k=key_dimension, d_v=value_dimension, dropout=dropout,
                   n_trg_position=n_trg_position, n_src_position=n_src_position,
                   trg_emb_prj_weight_sharing=True, emb_src_trg_weight_sharing=True)

In [5]:
class PositionalEncoding(nn.Module):

    def __init__(self, d_hid, n_position=200):
        super(PositionalEncoding, self).__init__()

        # Not a parameter
        self.register_buffer('pos_table', self._get_sinusoid_encoding_table(n_position, d_hid))

    def _get_sinusoid_encoding_table(self, n_position, d_hid):
        ''' Sinusoid position encoding table '''
        # TODO: make it with torch instead of numpy

        def get_position_angle_vec(position):
            return [position / np.power(10000, 2 * (hid_j // 2) / d_hid) for hid_j in range(d_hid)]

        sinusoid_table = np.array([get_position_angle_vec(pos_i) for pos_i in range(n_position)])
        sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2])  # dim 2i
        sinusoid_
        table[:, 1::2] = np.cos(sinusoid_table[:, 1::2])  # dim 2i+1

        return torch.FloatTensor(sinusoid_table).unsqueeze(0)

    def forward(self, x):
        return x + self.pos_table[:, :x.size(1)].clone().detach()

class Critic(nn.Module):
    
    def __init__(self, conv_layers=4, d_char_vec=512, n_vocab=96, kernel_size=4, dropout=0.1, padding=1, 
                 src_embedding=None, trg_embedding=None, src_position_enc=None, trg_position_enc=None, pad_idx=0):
        
        super(Critic, self).__init__()
        
        self.src_word_emb = src_embedding if src_embedding != None else nn.Embedding(n_vocab, d_char_vec, padding_idx=pad_idx)
        self.trg_word_emb = trg_embedding if trg_embedding != None else nn.Embedding(n_vocab, d_char_vec, padding_idx=pad_idx)
        
        self.src_position_enc = src_position_enc if src_position_enc != None else PositionalEncoding(d_char_vec, n_position=MAX_QUESTION_SIZE)
        self.trg_position_enc = trg_position_enc if trg_position_enc != None else PositionalEncoding(d_char_vec, n_position=MAX_ANSWER_SIZE)
        self.dropout = nn.Dropout(p=dropout)
        
        self.conv_layers = [nn.Conv1d(in_channels=d_char_vec, out_channels=d_char_vec, kernel_size=kernel_size, padding=padding) for _ in range(conv_layers)]
        self.value_layer = nn.Linear(d_char_vec, 1)
        
    def forward(self, src_seq, trg_seq):
        
        batch_sz, max_src_pos, max_trg_pos = src_seq.shape[0], src_seq.shape[1], trg_seq.shape[1]
        src_seq_pad = torch.cat((src_seq, torch.zeros((batch_sz, MAX_QUESTION_SIZE-max_src_pos), dtype=torch.int64)), dim=1)
        trg_seq_pad = torch.cat((trg_seq, torch.zeros((batch_sz, MAX_ANSWER_SIZE-max_trg_pos), dtype=torch.int64)), dim=1)
        src_emb = self.dropout(self.src_position_enc(self.src_word_emb(src_seq_pad)))
        trg_emb = self.dropout(self.trg_position_enc(self.trg_word_emb(trg_seq_pad)))
        
        x = torch.cat((src_emb, trg_emb), dim=1)
        conv_out = torch.transpose(x, 1, 2).contiguous()
        for conv in self.conv_layers:
            conv_out = conv(conv_out)
        conv_out = torch.mean(F.relu(conv_out), dim=2)
        output = self.value_layer(conv_out)
        
        return output

In [84]:
from transformer.Models import Transformer

# ASCII CHARACTERS
VOCAB_SIZE = 95
# questions have less than 160 chars
MAX_QUESTION_SIZE = 160
# answers have less than 30 chars
MAX_ANSWER_SIZE = 30

class Policy_Network(nn.Module):
#     def __init__(self, num_layers = 6, num_heads = 8, key_dimension = 64, 
    def __init__(self, num_layers = 1, num_heads = 2, key_dimension = 64, 
                 value_dimension = 64, dropout = 0.1, n_position = 160, 
                 d_char_vec = 512, inner_dimension = 2048, 
                 n_trg_position = MAX_ANSWER_SIZE, n_src_position = MAX_QUESTION_SIZE, padding = 1,
                critic_num_layers=4, critic_kernel_size=4, critic_padding=1, model=None):
        
        super(Policy_Network, self).__init__()
        
        self.action_transformer = Transformer(n_src_vocab=VOCAB_SIZE + 1, n_trg_vocab=VOCAB_SIZE+1, src_pad_idx=0, trg_pad_idx=0,
                               d_char_vec=d_char_vec, d_model=d_char_vec, d_inner=inner_dimension, n_layers=num_layers,
                               n_head=num_heads, d_k=key_dimension, d_v=value_dimension, dropout=dropout,
                               n_trg_position=n_trg_position, n_src_position=n_src_position,
                               trg_emb_prj_weight_sharing=True, emb_src_trg_weight_sharing=True) if model == None else model
        
        
#         output of action transformer for state is shape (B X MAX_QUESTION_SZ*2 + MAX_ANSWQ_SZ X WORD_EMB)
#         input to convolution layer is B X WORD_EMB X MAX_QUESTION_SIZE*2 + MAX_ANSWER_SIZE
        self.value_head = Critic(conv_layers=critic_num_layers, d_char_vec=d_char_vec, kernel_size=critic_kernel_size,
                                n_vocab=VOCAB_SIZE+1, dropout=dropout, padding=critic_padding, 
                                src_embedding=self.action_transformer.encoder.src_word_emb, 
                                trg_embedding=self.action_transformer.decoder.trg_word_emb, 
                                 src_position_enc=self.action_transformer.encoder.position_enc, 
                                 trg_position_enc=self.action_transformer.decoder.position_enc)
        
    def forward(self, src_seq, trg_seq):
        
        action_prob = self.action_transformer(src_seq, trg_seq)
        action_prob = action_prob[:, -1, :]
        state_values = self.value_head(src_seq, trg_seq)
        
        return action_prob, state_values

class Trainer:
    def __init__(self, device='cpu'):
        self.device=device
        self.eps = np.finfo(np.float32).eps.item()
    
    def calc_reward(self, actions_pred, actions, ignore_index=0):
        # 1 if character is correct
        return (actions_pred==actions).float()
    
    
    def train_mle_epoch(self, training_data, model, optimizer):
        
    
    
    
    
    
    

    def train_policy_epoch(self, training_data, model, gamma, optimizer):
        
        model.train()
        ignore_index = model.action_transformer.trg_pad_idx
        
    #     sample batch of questions and answers
        for batch_idx, batch in enumerate(tqdm(training_data, mininterval=2, leave=False)):
            batch_qs, batch_as = map(lambda x: x.to(self.device), batch)
            batch_size = batch_qs.shape[0]
            to_store = [[]*batch_size]
            current_as = batch_as[:, :1]
            complete = torch.ones((batch_size, 1))
            rewards = torch.zeros((batch_size, 0))
            values = torch.zeros((batch_size, 0))
            log_probs = torch.zeros((batch_size, 0))
            advantages_mask = torch.ones((batch_size, 0))
            for t in range(1, MAX_ANSWER_SIZE):
                advantages_mask = torch.cat((advantages_mask, complete), dim=1)
                action_probs, curr_values = model(batch_qs, current_as)
                m = Categorical(F.softmax(action_probs, dim=-1))
                actions = m.sample().contiguous().view(-1, 1)
                
                trg_t = batch_as[:, t].contiguous().view(-1, 1)
                
                # update decoder output
                current_as = torch.cat((current_as, actions), dim=1)
                
                curr_log_probs = -F.cross_entropy(action_probs, trg_t.view(-1), ignore_index=0, reduction='none').contiguous().view(-1, 1)
                
                # calculate reward based on character cross entropy
                curr_rewards = self.calc_reward(actions, trg_t)
                
                # update terms
                rewards = torch.cat((rewards, curr_rewards), dim=1)
                values = torch.cat((values, curr_values), dim=1)
                log_probs = torch.cat((log_probs, curr_log_probs), dim=1)
                
                # if the action taken is EOS or if end of sequence trajectory ends
                complete *= (1 - ((actions==EOS) | (trg_t==EOS)).float())
            

            returns = self.get_returns(rewards, batch_size, gamma)
            
            advantages = returns - values
            advantages *= advantages_mask

            policy_losses = (-log_probs * advantages).sum(dim=-1).mean()

            value_losses = F.mse_loss(values, rewards, reduction='mean')

            optimizer.zero_grad()
            loss = policy_losses + value_losses

            loss.backward()
            optimizer.step()

    def get_returns(self, rewards, batch_size, gamma):
        T = rewards.shape[1]
        discounts = torch.tensor(np.logspace(0, T, T, base=gamma, endpoint=False)).view(1, -1)
        all_returns = torch.zeros((batch_size, T))
        
        for t in range(T):
            temp = (discounts[:, :T-t]*rewards[:, t:]).sum(dim=-1)
            all_returns[:, t] = temp
            (all_returns - all_returns.mean(dim=-1).view(-1, 1)) / (all_returns.std(dim=-1).view(-1, 1) + self.eps)
    
        return all_returns

In [85]:
# def main():

model = Policy_Network()
trainer = Trainer()
optimizer = optim.Adam(model.parameters(), lr=1e-2)

In [86]:
# train_loader
trainer.train_policy_epoch(train_loader, model, .99, optimizer)
















  0%|          | 0/5209 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A[A[A[A[A[A[A

here
torch.Size([128, 1])
here
torch.Size([128, 1])
here
torch.Size([128, 1])
here
torch.Size([128, 1])
torch.Size([128, 4])
torch.Size([128, 4])
torch.Size([128, 4])
torch.Size([128, 4])
torch.Size([128, 4])

















  0%|          | 1/5209 [00:37<54:53:04, 37.94s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A[A

here
torch.Size([128, 1])
here
torch.Size([128, 1])
here


KeyboardInterrupt: 

In [None]:
# print(model.value_head.src_word_emb(torch.tensor([0]))[0][:4])

# print(model.action_transformer.encoder.src_word_emb(torch.tensor([0]))[0][:4])

In [13]:
train_ds, val_ds = data.random_split(x, [int(.9*len(x)), len(x)-int(.9*len(x))])

In [None]:
def train_epoch(model, training_data, optimizer, device, epoch, tb=None, log_interval=100):
    model.train()
    
    total_rewards = 0
    n_char_total = 0
    n_char_correct = 0
    for batch_idx, batch in enumerate(tqdm(training_data, mininterval=2, leave=False)):
        
        # all in shape [B, MAX_LENGTH] where representation is in integers used in embedding lookup in transformer
        batch_qs, batch_qs_pos, batch_as, batch_as_pos = map(lambda x: x.to(device), batch)
        
        target_as = batch_as[:, 1:]
        
        optimizer.zero_grad()
        pred_as = model(batch_qs, batch_qs_pos, batch_as, batch_as_pos)

        loss, n_correct = compute_performance(pred_as, gold_as, smoothing=True)    
        loss.backward()
        # update parameters
        optimizer.step()
    
        # note keeping
        total_loss += loss.item()

        non_pad_mask = gold_as.ne(Constants.PAD)
        n_char = non_pad_mask.sum().item()
        n_char_total += n_char
        n_char_correct += n_correct
        

In [1]:
# input chars are selected from basic ASCII chars
VOCAB_SZ = 95
# questions have less than 160 chars
MAX_QUESTION_SZ = 160
# answers have less than 30 chars, need to check this and the above can also just automate with the dataset
MAX_ANSWER_SZ = 30