In [10]:
import torch as T
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from pyemd import emd
import gensim
from nltk.corpus import stopwords
from nltk import download
download('stopwords')
from transformers import AutoTokenizer
from transformers import (
    AutoModelForSeq2SeqLM,
    AutoTokenizer,
    Seq2SeqTrainingArguments,
    Seq2SeqTrainer,
    DataCollatorForSeq2Seq,
)
import torch 
from collections import namedtuple, deque
from collections import defaultdict
import random

import datasets
from transformers import pipeline
from transformers.pipelines.pt_utils import KeyDataset
from transformers import logging

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/felixmeng/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
model_name = "facebook/bart-base"
embedding_model=gensim.models.KeyedVectors.load_word2vec_format("GoogleNews-vectors-negative300.bin.gz", binary=True)
pretrained_model=AutoModelForSeq2SeqLM.from_pretrained(model_name)
pretrained_tokenizer=AutoTokenizer.from_pretrained(model_name)

In [3]:
class Env():
    def __init__(self,input_sentence,model_name,sentence,reward):
        """
        input_sentence: the input sentence x 
        model_name: the transformer model that we are using
        sentence: the sentence class that contains helper function for the currently decoded word
        reward: the reward class that will return the reward of a new action
        """
        self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        input_ids = self.tokenizer(input_sentence,return_token_type_ids=False,return_tensors='pt').input_ids
        self.input_ids = input_ids
        self.decoder_input_ids = torch.tensor([[self.model.config.decoder_start_token_id]])
        outputs = self.model(input_ids, decoder_input_ids= self.decoder_input_ids, return_dict=True)
        self.encoded_sequence = (outputs.encoder_last_hidden_state,)
        self.lm_logits = outputs.logits
        self.word_embed = self.model.state_dict()['model.encoder.embed_tokens.weight']
        self.input_sentence=input_sentence
        ## additional stuff to call from 
        self.reward=reward()
        self.sentence_class=sentence()

    def action(self,action_word_id):
        """
        Returns next state given an action word
        """
        next_decoder_input_ids = torch.tensor([[action_word_id]])
        self.decoder_input_ids = torch.cat([self.decoder_input_ids, next_decoder_input_ids], axis=-1)
        self.lm_logits  = self.model(None, encoder_outputs= self.encoded_sequence, decoder_input_ids= self.decoder_input_ids, return_dict=True).logits
        return {
            'word_embeddings': [self.word_embed[i,:] for i in self.decoder_input_ids[0]],
            'logits': self.lm_logits
          }
    
    def generated_sentence_so_far(self):
        return self.tokenizer.decode(self.decoder_input_ids[0], skip_special_tokens = True)
    
    def encoded_input(self):
        return self.encoded_sequence[0]
    
    def reference_decode(self):
        #generate reference target from greedy,beam, topk.
        #maybe add more 
        greedy_output = self.model.generate(self.input_ids)

        beam_output = self.model.generate(
              self.input_ids,
              num_beams=1, 
              no_repeat_ngram_size=2, 
              early_stopping=True
      )
        topk_output = self.model.generate(
              self.input_ids, 
              do_sample=True, 
              top_k=50,
              temperature=0.7
      )
        return [
            self.tokenizer.decode(greedy_output[0], skip_special_tokens=True),
            self.tokenizer.decode(beam_output[0], skip_special_tokens=True),
            self.tokenizer.decode(topk_output[0], skip_special_tokens=True)
        ]
    ## a bunch of classifiers for a given sentence
    def fluency_score(self,action):
        pass
    def sentiment_class(self,action):
        pass
    
    
    def step(self,action):
        """
        returns next_state, reward, termination
        """
        next_state= self.action(action)
        termination= self.sentence_class.is_termination()
        reward= self.reward.evaluate_reward()
        return next_state,reward,termination

In [12]:
def sentiment_pred(list_of_text):
    logging.set_verbosity_error()
    pipe = pipeline(model="siebert/sentiment-roberta-large-english")
    # pipe = pipeline(model="roberta-large-mnli")
    return pipe(list_of_text)

print(sentiment_pred(["I really like this one"]))


Downloading:   0%|          | 0.00/1.51k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/687 [00:00<?, ?B/s]

Metal device set to: Apple M1 Max


Downloading:   0%|          | 0.00/1.32G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/256 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/780k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/150 [00:00<?, ?B/s]

[{'label': 'POSITIVE', 'score': 0.99888676404953}]


In [9]:
class Reward():
    def __init__(self,embedding_model,action,target_sentence,end_token,stopwords,discount=1,done=False):
        self.embedding_model=embedding_model
        self.action=action
        self.target_sentence=target_sentence
        self.end_token=end_token
        self.stopwords=stopwords
        self.discount=discount
        self.done=False
        self.simulated_sentence=None
    def simulate_sentence(self,sentence,action,idx,prob,max_iteration=10):
        Sentence=sentence()
        sentence.update_sentence(action,idx,prob)
        for _ in range(k):
            ## todo: env gives idx,prob
            self.simulated_sentence=sentence.simulate_sentence(idx,prob)
    def evaluate_reward(self):
        ## todo: waiting for the classifiers
        pass
    def sentence_mover_distance(self,generated_sentence,base_line_sentence,target_sentence):
        target_sentence = target_sentence.lower().split()
        base_line_sentence= base_line_sentence.lower().split()
        print(generated_sentence,base_line_sentence, target_sentence)

        generated_sentence = [w for w in generated_sentence if w not in self.stopwords]
        target_sentence = [w for w in target_sentence if w not in self.stopwords]
        base_line_sentence=[w for w in base_line_sentence if w not in self.stopwords]
        generated_distance= self.model.wmdistance(generated_sentence, target_sentence)
        baseline_distance= self.model.wmdistance(base_line_sentence, target_sentence)
        print("baseline_distance",baseline_distance,"generated distance",generated_distance)
        return baseline_distance,generated_distance

In [7]:
class Sentence():
    def __init__(self,start_token="<s>",end_token="<\s>"):
        """
        Initialize the sentence with a start token
        """
        self.sentence=[start_token]
        self.possible_actions={"add_word":0,"replace_word":1,"remove_word":2}
        self.end_token=end_token
        self.start_token=start_token
    def get_sentence(self,sentence):
        """sentence getter"""
        return self.sentence
    def sample_string(self,idx,value):
        """sampling the next possible word given the likelihood"""
        assert(len(idx)==len(value))
        self.next_word=np.random.choice(idx, p=prob)
        return self.next_word
    def update_sentence(self,action,idx,prob):
        """
        update the sentence according to the given list of possible actions {add_word,replace_word,remove_word}
        """
        if action=="add_word":
            word=self.sample_string(idx,prob)
            self.sentence.append(word)
        if action== "replace_word":
            self.sentence[-1]=self.next_word
        if action== "delete_word":
            self.sentence.pop()
    def isTermination():
        """
        if is termination end
        """
        return self.sentence[-1]==self.end_token
    def simulate(self,idx,prob):
        """
        idx= words with different probabilities
        prob= probablity of those different words
        """
        action=random.choice(list(self.possible_actions.keys()))
        idx=["i","like","to","eat"]
        prob=[0.1,0.2,0.3,0.4]
        self.update_sentence(action,idx,prob)
        return self.sentence
    

In [12]:
Transition= namedtuple('Transition',('state', 'action', 'next_state', 'reward','termination'))
class ReplayMemory(object):
    def __init__(self, capacity):
        """
        Used deque so that if our counter is greater than maxlen, the previous will be removed
        """
        self.memory = deque([], maxlen=capacity)

    def push(self, *args):
        """Save a transition with each corresponding state"""
        self.memory.append(Transition(*args))

    def sample(self, batch_size):
        """
        sample a random sample from the batch
        return 'state', 'action', 'next_state', 'reward','termination'
        """
        return random.sample(self.memory, batch_size)

    def __len__(self):
        """
        Return current length of deque
        """
        return len(self.memory)

In [14]:
class DQN():
    """
    Initialized a basic neural network dimension
    """
    def __init__(self,n_actions,input_dim,fc1_dims=3000,fc2_dims=4000,lr=5e-5):
        super(DQN, self).__init__()
        self.n_actions=n_actions
        self.input_dim=input_dim
        self.lr=lr
        self.fc1=nn.Linear(input_dim,fc1_dims)
        self.fc2=nn.Linear(fc1_dims,fc2_dims)
        self.fc3=nn.Linear(fc2_dims,n_actions)
        self.flatten=nn.Flatten()
        self.optimizer= optim.Adam(self.parameters(),lr=lr)
        self.loss= nn.HuberLoss()
        self.device= T.device('cuda:0' if T.cuda.is_available() else 'cpu')
        self.to(self.device)
    def forward(self,state):
        x=self.flatten(state)
        x= F.relu(self.fc1(state))
        x= F.relu(self.fc2(x))
        actions = self.fc3(x)
        return actions

In [50]:
class DQNAgent:
    """
    Using Clipped double Q learning, so we have two deep Q networks and we are always updating with the min of the two Q values
    """
    def __init__(self,env,DQN,replay_memory,learning_rate=3e-4,gamma=0.99, buffer_size=1000):
        self.env=env()
        self.learning_rate= learning_rate
        self.gamma= gamma
        self.buffer_size=buffer_size
        self.replay_buffer= ReplayMemory(buffer_size)
        self.device=torch.device("cuda" if torch.cuda.is_availabe() else "cpu")
        ## 
        self.model1=DQN(n_actions=3,input_dim=10).to(self.device) 
        self.model2=DQN(n_actions=3,input_dim=10).to(self.device) 
  
        self.optimizer1 = torch.optim.Adam(self.model1.parameters())
        self.optimizer2 = torch.optim.Adam(self.model2.parameters())
    def get_action(self,state,eps=0.2):
        qvals=self.model1.forward(state)
        action= np.argmax(qvals.cpu().detach().numpy())
        
        if(np.random.randn()<eps):
            return self.env.action_space.sample()
    def compute_loss(self,batch_size):
        ## reading from batch and reinitializing the state
        states, actions, rewards, next_states, terminations= self.replay_buffer.sample(batch_size)[0]
        states=torch.FloatTensor(states).to(self.device)
        actions=torch.LongTensor(actions).to(self.device)
        rewards=torch.FloatTensor(rewards).to(self.device)
        next_states= torch.FloatTensor(next_states).to(self.device)
        terminations=torch.FloatTensor(terminations)
        
        ## possible resizing necessary
        
        """
        compute the current-state-Q-values and the next-state-Q-values of both models,
        but use the minimum of the next-state-Q-values to compute the expected Q value
        """
        curr_Q1= self.model1.forward(states)
        curr_Q2= self.model2.forward(states)
        next_Q1= self.model1.forward(next_states)
        next_Q2= self.model2.forward(next_states)
        
        next_Q= torch.min(
            torch.max(next_Q1,1)[0],
            torch.max(next_Q2,1)[0]
        )
        
        next_Q= next_Q.view(next_Q.size(0),1)
        expected_Q= rewards+ (1-dones)*self.gamam*next_Q
        
        
        loss1= F.huber_loss(curr_Q1, expected_Q.detach())
        loss2= F.huber_loss(curr_Q2, expected_Q.detach())
        return loss1,loss2
    def update(self,batch_size):
        loss1,loss2=self.compute_loss(batch_size)
        self.optimizer1.zero_grad()
        loss1.backward()
        self.optimizer1.step()
        self.optimizer2.zero_grad()
        loss2.backward()
        self.optimizer2.step()
        

In [51]:
def train(env,agent,max_epochs,max_steps,batch_size):
    """
    env: need to implement step
    agent:
    max_epochs: max training length
    max_steps: max sequence length of our sentence
    batch_size: the batch that we are taking for each training epoch
    """
    epoch_rewards=[]
    for epoch in range(max_epochs):
        #state= env.reset()
        epoch_reward=0
        for step in range(max_steps):
            action= agent.get_action(state)
            next_state,reward,done=env.step(action)
            agent.replay_buffer.push(state, action, reward, next_state, done)
            epoch_reward+=reward
            
            if len(agent.replay_buffer) > batch_size:
                agent.update(batch_size)
            if done or step == max_steps-1:
                epoch_rewards.append(epoch_reward)
                print("epoch" + str(epoch) + ": " + str(epoch_reward))
                break
            state= next_state
    return epoch_rewards

In [5]:
class Dataset_Reader:
    def __init__(self, data_name="gigaword", test_size=0.1, data_set_size=20000,mode="training"):
        """
        data set reader
        """
        if mode=="training":
            dataset=load_dataset(data_name,split = 'train')
            train_data, test_data= dataset.train_test_split(test_size=test_size).values()
            small_dataset= datasets.DatasetDict({'train':train_data,'test':test_data})
            self.input_sentences=small_dataset['train']['document'][:data_set_size]
            self.output_sentences=small_dataset['train']['summary'][:data_set_size]
        if mode== "testing":
            train_ds, test_ds = datasets.load_dataset(data_name, split=['train', 'test'])
            ## todo:: filling this up
    def get_input(self):
        return self.input_sentences
    def get_output(self):
        return self.output_sentences