In [2]:
import os
import re
import random
import torch
import traceback
import numpy as np
from transformers import AutoTokenizer, AutoConfig, AutoModelForPreTraining

# Configurations
SEED = 2020  # random seed value
MODEL = "sagorsarker/emailgenerator"  # hf model name
MAXLEN = 768
# CUSTOM_MODEL_PATH = "./model/pytorch_model.bin"
SPECIAL_TOKENS  = { "bos_token": "<|BOS|>",
                    "eos_token": "<|EOS|>",
                    "unk_token": "<|UNK|>",
                    "pad_token": "<|PAD|>",
                    "sep_token": "<|SEP|>"}

GREET_TOKENS = ["hi", "hey", "hello", "dear"]
START_GREET = "hi [name]\n"
CONCLUSION_GREET = "\nsincerely\n[name]"

# Utils - seed initialization
def seed_everything(seed):
    """initialize random seed

    Args:
        seed (int): random seed value
    """
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

def get_tokenizer(special_tokens=None):
    """Get tokenizer

    Args:
        special_tokens (dict, optional): special token dictionary. Defaults to None.

    Returns:
        obj: tokenizer object
    """
    tokenizer = AutoTokenizer.from_pretrained(MODEL)

    if special_tokens:
        tokenizer.add_special_tokens(special_tokens)
    return tokenizer

def get_model(tokenizer, special_tokens=None, load_model_path=None):
    """Get model from pretrained model

    Args:
        tokenizer (obj): tokenizer object
        special_tokens (dict, optional): special token dictionary. Defaults to None.
        load_model_path (str, optional): load model path. Defaults to None.

    Returns:
        obj: model object
    """
    config = AutoConfig.from_pretrained(
            MODEL,
            output_hidden_states=False
        )
    model = AutoModelForPreTraining.from_pretrained(MODEL, config=config)
    if special_tokens:
        model.resize_token_embeddings(len(tokenizer))

    if load_model_path:
        model.load_state_dict(torch.load(load_model_path))

    model.cuda()
    return model

def remove_special_token(text):
    """Remove special token from text

    Args:
        text (str): text to be processed

    Returns:
        _str: processed text
    """
    for key, token in SPECIAL_TOKENS.items():
        text = text.replace(token, '')
    return text

def start_with_greet(text, greet):
    """Check if text starts with greet

    Args:
        text (str): text to be checked
        greet (list): list of greet tokens

    Returns:
        (bool, str): (True, greet token) if text starts with greet,
                    (False, None) otherwise
    """
    for g in greet:
        if text.startswith(g):
            return True, g
    return False, None

def post_processing(text, prompt):
    """Post processing generated email

    Args:
        text (str): text to be processed
        prompt (str): prompt to be replaced

    Returns:
        str: processed text
    """
    # remove prompt first
    text = text.replace(prompt, '')
    text = text.strip()
    gexist, greet = start_with_greet(text, GREET_TOKENS)
    if gexist:
        regex = f"^{greet}\s?(\w+)?[\.,\s]?"
        start_greet = greet + " [name]\n"
        text = re.sub(regex, start_greet, text)
        text = text + CONCLUSION_GREET
    else:
        text = START_GREET + text + CONCLUSION_GREET
    return text


def join_keywords(keywords, randomize=True):
    """Join keywords with separator

    Args:
        keywords (list): list of bullets keywords
        randomize (bool, optional): randomly take keyword. Defaults to True.

    Returns:
        str: joined keywords
    """
    N = len(keywords)
    if randomize:
        M = random.choice(range(N + 1))
        keywords = keywords[:M]
        random.shuffle(keywords)

    return ','.join(keywords)


def generate_email(input_text: str, token_count: int,
                   temperature: float, n_gen: int,
                   keywords=None) -> dict:
    """Generate email from input text, keywords and trained model

    Args:
        input_text (str): input text
        token_count (int): number of tokens to be generated
        temperature (float): temperature for sampling
        n_gen (int): number of generated emails
        keywords (list, optional): list of keywords. Defaults to None.

    Returns:
        dict: generated emails
        {
            "status": "success",
            "ai_results": []
        }
    """
    output = {}
    try:
        tokenizer = get_tokenizer()
        tokenizer.add_special_tokens(SPECIAL_TOKENS)
        model = get_model(tokenizer,special_tokens = SPECIAL_TOKENS)
        if keywords:
            kw = join_keywords(keywords, randomize=False)
            prompt = SPECIAL_TOKENS['bos_token'] + input_text + \
                     SPECIAL_TOKENS['sep_token'] + kw + SPECIAL_TOKENS['sep_token']
        else:
            prompt = SPECIAL_TOKENS['bos_token'] + input_text + \
                     SPECIAL_TOKENS['sep_token'] + SPECIAL_TOKENS['sep_token']

        generated = tokenizer.encode(prompt, return_tensors="pt")
        device = torch.device("cuda")
        generated = generated.to(device)

        model.eval()
        sample_outputs = model.generate(generated,
                                        do_sample=True,
                                        min_length=50,
                                        max_length=MAXLEN,
                                        top_k=30,
                                        top_p=0.7,
                                        temperature=temperature,
                                        repetition_penalty=2.0,
                                        num_return_sequences=n_gen
                                        )
        output['status'] = 'success'
        output['ai_results'] = []
        prompt = remove_special_token(prompt)
        for i, sample_output in enumerate(sample_outputs):
            text = tokenizer.decode(sample_output, skip_special_tokens=True)
            text = post_processing(text, prompt)
            output['ai_results'].append({'generated_text': text, "text_length": len(text)})
        output['ai_results'].reverse()
    except Exception as e:
        traceback.print_exc()
        output['status'] = 'error'

    return output


def test():
    data = {
        "prompt": "love to visit again",  # subject of the email
        "token_count": 128,
        "temperature": 0.6,
        "n_gen": 4,
        "keywords": ['experience', 'joyfull', 'sea']
    }

    response = generate_email(data['prompt'], data['token_count'], data['temperature'], data['n_gen'], data['keywords'])
    for result_item in response['ai_results']:
      print("Subject : GENERATED EMAIL : \n")
      print(result_item['generated_text'])
      print("\n---\n")


if __name__ == '__main__':
    test()


Subject : GENERATED EMAIL : 

hi [name]
i am so happy that you asked me for a gift. i received the package from my secret ssn last week and it is wonderful! thankyou very much!! there will be no more gifts this year without your lovely note!!! as we get closer together in life please write back with lots of kisses...it means alot when two people are truly into each other.. love mark
sincerely
[name]

---

Subject : GENERATED EMAIL : 

hi [name]
i am so glad that you asked me back. i enjoyed my time with yao last summer and it was fun spending the entire weekend at your house - we went out on saturday night after work (you were there from houston) for a little drink in honor of our engagement day! hope all is well now as far an experience going forward...and if things are up this week let's try not get discouraged!! love mark
sincerely
[name]

---

Subject : GENERATED EMAIL : 

hi [name]
: i'm back in the office. my parents are coming for a few days next week and wanted me (and you) as 

In [3]:
def test():
    data = {
        "prompt": "Requesting for sick leave",  # subject of the email
        "token_count": 128,
        "temperature": 0.6,
        "n_gen": 4,
        #"keywords": ['experience', 'joyfull', 'sea']
    }

    response = generate_email(data['prompt'], data['token_count'], data['temperature'], data['n_gen'])
                              #, data['keywords'])
    for result_item in response['ai_results']:
      print("Subject : GENERATED EMAIL : \n")
      print(result_item['generated_text'])
      print("\n---\n")


if __name__ == '__main__':
    test()

Subject : GENERATED EMAIL : 

hi [name]
i have been asked to request that i take 12 weeks of vacation from the 1st - 31 december 2001. if you need any further information please call me at 713-853 3488 or e:mail, jason wolfe thankyou
sincerely
[name]

---

Subject : GENERATED EMAIL : 

hi [name]
i have a request to take off from work on friday, december 18th. i will be out of the office that day and need your assistance in getting me covered by my insurance while you are gone! please call or email with any questions at x30596 if this is something we can help answer together so it may get processed sooner!! thankyou very much!!!
sincerely
[name]

---

Subject : GENERATED EMAIL : 

hi [name]
i am requesting a 10 day of vacation on friday, october 22nd. i have been ill since last wednesday and would like to take this time off as well so that my family can enjoy the holidays in peace (and hopefully without all those nasty doctors). please let me know if you are interested or not! thanks - 

In [None]:
from google.colab import drive
drive.mount('/content/drive')

save_dir = '/content/drive/My Drive/Email_Generation'

# Save the model to the specified directory
model.save(save_dir)