In [1]:
%pip install transformers datasets torch scikit-learn accelerate spacy evaluate nltk rouge_score

Note: you may need to restart the kernel to use updated packages.


In [2]:
from datasets import load_dataset

# Load the dataset
dataset = load_dataset('LightTai/personalized-email')

In [3]:
print(dataset)
print(dataset['train'][:5])
df = dataset['train'].to_pandas()
print(df.head())
print(df.shape)

DatasetDict({
    train: Dataset({
        features: ['product', 'gender', 'profession', 'hobby', 'email'],
        num_rows: 30
    })
})
{'product': ['piano lessons', 'guitar lessons', 'vacation plans', 'vacation plans', 'vacation plans'], 'gender': ['male', 'male', 'male', 'female', 'female'], 'profession': ['college students', 'college students', 'college students', 'college students', 'company employees'], 'hobby': ['like to play piano', 'like to play piano', 'like swimming', 'like to look at the scenery', 'like to look at the scenery'], 'email': ["Subject: Elevate Your Piano Skills - Exclusive Offer Inside!\n\nHey [Name],\n\nLooking to unlock your piano potential? As a fellow male college student and a passionate piano player, I understand your love for music. That's why I'm thrilled to offer you exclusive piano lessons designed to fit your busy student schedule.\n\nMaster your favorite melodies, refine techniques, and gain a deeper understanding of music theory-all while enjoyin

In [4]:
from transformers import AutoTokenizer
from sklearn.model_selection import train_test_split
from datasets import Dataset

model_checkpoint = "postbot/distilgpt2-emailgen-V2"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)

def tokenize_function(examples):
    features = [f"{prod} {gen} {prof} {hob}" 
                for prod, gen, prof, hob in zip(examples["product"], 
                                                examples["gender"], 
                                                examples["profession"], 
                                                examples["hobby"])]
    tokenized_inputs = tokenizer(features, truncation=True, padding="max_length", max_length=512)

    # Tokenize the email column which is our target
    tokenized_targets = tokenizer(examples["email"], truncation=True, padding="max_length", max_length=512)

    tokenized_inputs['labels'] = tokenized_targets['input_ids']  # Assign target token ids as labels for training
    return tokenized_inputs


# Assuming 'df' is your DataFrame and it's already loaded
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Convert the DataFrames back to Hugging Face dataset format if necessary
train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)

# Tokenize the datasets
tokenized_train_dataset = train_dataset.map(tokenize_function, batched=True, num_proc=4, remove_columns=["product", "gender", "profession", "hobby", "email"])
tokenized_test_dataset = test_dataset.map(tokenize_function, batched=True, num_proc=4, remove_columns=["product", "gender", "profession", "hobby", "email"])
# Assuming 'dataset' is a Hugging Face 'datasets' object
# tokenized_datasets = dataset.map(tokenize_function, batched=True, num_proc=4, remove_columns=["product", "gender", "profession", "hobby", "email"])


Map (num_proc=4):   0%|          | 0/24 [00:00<?, ? examples/s]

Map (num_proc=4):   0%|          | 0/6 [00:00<?, ? examples/s]

In [5]:
print(tokenized_train_dataset)

Dataset({
    features: ['__index_level_0__', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 24
})


In [6]:
tokenized_train_dataset[1]

{'__index_level_0__': 24,
 'input_ids': [6966,
  27757,
  4257,
  4409,
  3504,
  12,
  1886,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  50256,
  5025

In [7]:
import evaluate
import torch
import numpy as np
rouge = evaluate.load('rouge')
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    # Convert logits from numpy arrays to PyTorch tensors
    logits_tensor = torch.tensor(logits)
    
    # Convert logits to predicted token IDs using argmax
    predictions_ids = torch.argmax(logits_tensor, dim=-1)

    # Decode predictions and labels
    predictions = tokenizer.batch_decode(predictions_ids, skip_special_tokens=True)
    # Assuming labels are already decoded; if not, decode them similarly
    references = tokenizer.batch_decode(labels, skip_special_tokens=True)
    # Compute ROUGE scores
    results = rouge.compute(predictions=predictions, references=references)

    return results

In [8]:

# test_predictions = tokenized_train_dataset[1]["input_ids"]
# test_references = tokenized_train_dataset[1]["labels"]
# test_eval_pred = (test_predictions, test_references)
# rouge_results = compute_metrics(test_eval_pred)
# print(rouge_results)

In [9]:
from transformers import AutoModelForCausalLM, TrainingArguments, Trainer

model = AutoModelForCausalLM.from_pretrained(model_checkpoint)

training_args = TrainingArguments(
    output_dir="./model_output",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=10,
    weight_decay=0.01,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_test_dataset,
    tokenizer=tokenizer,
		compute_metrics=compute_metrics
)

trainer.train()

# Save the model and tokenizer after training is complete
model.save_pretrained('./saved_model')
tokenizer.save_pretrained('./saved_model')


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


  0%|          | 0/18 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 6.660834789276123, 'eval_rouge1': 0.021925645041699898, 'eval_rouge2': 0.0, 'eval_rougeL': 0.021110100532562275, 'eval_rougeLsum': 0.02119088841883852, 'eval_runtime': 57.5775, 'eval_samples_per_second': 0.104, 'eval_steps_per_second': 0.035, 'epoch': 1.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 4.186565399169922, 'eval_rouge1': 0.028891390452366064, 'eval_rouge2': 0.0027548209366391185, 'eval_rougeL': 0.028953425191820155, 'eval_rougeLsum': 0.02588916459884202, 'eval_runtime': 48.7059, 'eval_samples_per_second': 0.123, 'eval_steps_per_second': 0.041, 'epoch': 2.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 4.7408928871154785, 'eval_rouge1': 0.01988552732612043, 'eval_rouge2': 0.0, 'eval_rougeL': 0.01814900511746635, 'eval_rougeLsum': 0.019059697109619012, 'eval_runtime': 74.3375, 'eval_samples_per_second': 0.081, 'eval_steps_per_second': 0.027, 'epoch': 3.0}
{'train_runtime': 2164.9761, 'train_samples_per_second': 0.033, 'train_steps_per_second': 0.008, 'train_loss': 5.988736046685113, 'epoch': 3.0}


('./saved_model/tokenizer_config.json',
 './saved_model/special_tokens_map.json',
 './saved_model/vocab.json',
 './saved_model/merges.txt',
 './saved_model/added_tokens.json',
 './saved_model/tokenizer.json')

In [10]:
import math
eval_results = trainer.evaluate()
print(f"Perplexity: {math.exp(eval_results['eval_loss']):.2f}")
print(eval_results)

  0%|          | 0/2 [00:00<?, ?it/s]

Perplexity: 85.27
{'eval_loss': 4.4457783699035645, 'eval_rouge1': 0.0393718765311122, 'eval_rouge2': 0.0005324813631522896, 'eval_rougeL': 0.03623211660950514, 'eval_rougeLsum': 0.03342282173771027, 'eval_runtime': 51.5, 'eval_samples_per_second': 0.117, 'eval_steps_per_second': 0.039, 'epoch': 3.0}


In [11]:
# model.push_to_hub("24NLPGroupO/EmailGeneration")

In [4]:
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
import re

# Load saved model and tokenizer
model_checkpoint = "24NLPGroupO/EmailGeneration"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, truncation=True)
model = AutoModelForCausalLM.from_pretrained(model_checkpoint)

# Set up the generation pipeline
generator = pipeline('text-generation', model=model, tokenizer=tokenizer)

def clean_generated_text(text):
    # Basic cleaning
    text = re.sub(r'^(Re:|Fwd:)', '', text)  # Remove reply and forward marks
    text = re.sub(r'Best regards,.*$', '', text, flags=re.DOTALL)  # Remove everything after signature
    text = re.sub(r'PHONE.*$', '', text, flags=re.DOTALL)  # Remove everything after phone numbers
    text = re.sub(r'Email:.*$', '', text, flags=re.DOTALL)  # Remove everything after email addresses
    text = re.sub(r'Cc:.*$', '', text, flags=re.DOTALL)  # Remove CC list
    text = re.sub(r'\* Attachments:.*', '', text, flags=re.S)  # Remove 'Attachments:' and everything following it
    text = re.sub(r'©️ .*$', '', text, flags=re.DOTALL)  # Remove copyright and ownership statements
    text = re.sub(r'URL If this message is not displaying properly, click here.*$', '', text, flags=re.DOTALL)  # Remove error display message and links
    text = re.sub(r'\d{5,}', 'NUMBER', text)  # Replace long sequences of numbers, likely phone numbers or ZIP codes
    return text.strip()

def generate_email(product, gender, profession, hobby):
    input_text = f"{product} {gender} {profession} {hobby}"
    result = generator(
        input_text,                 # Initial text to prompt the model. Sets the context or topic for text generation.
        max_length=256,             # Maximum length of the generated text in tokens, limiting the output size.
        do_sample=True,             # Enables stochastic sampling; the model can generate diverse outputs at each step.
        top_k=20,                   # Limits the vocabulary considered at each step to the top-k most likely next words.
        top_p=0.6,                  # Uses nucleus sampling: Narrows down to the smallest set of words totaling 60% of the likelihood.
        temperature=0.4,            # Scales logits before sampling to reduce randomness and produce more deterministic output.
        repetition_penalty=1.5,     # Penalizes words that were already mentioned, reducing repetition in the text.
        # truncation=True,            # Truncates the output to the maximum length if it exceeds it.
        num_return_sequences=3      # Generates three different sequences to choose from, enhancing output variety.
        ) 
    # Select the best output from the generated sequences
    best_text = sorted([clean_generated_text(r['generated_text']) for r in result], key=len)[-1]
    return best_text

# Example parameters and generation
product = "Laptop"
gender = "Male"
profession = "Software Engineer"
hobby = "Gaming"

email_text = generate_email(product, gender, profession, hobby)
print("Generated Email:")
print(email_text)
print("--------------------")


TypeError: __init__() got an unexpected keyword argument 'use_fast'

Generated Email:
Laptop Male Software Engineer Gaming Technology - COMPANY
Experience the power of mobile devices and make sure you are comfortable with your smartphone.
Mobile device Management is a key component to achieving success in our industry, helping us achieve breakthroughs for business leaders by leveraging the best-in-class experience available. We have been working closely together on numerous projects that will benefit from this evolution into an integrated marketing tool used throughout all aspects including media creation & distribution; digital content development/retail fulfillment (FOD), social networking forums such as Twitter, Facebook etc.; strategic communications solutions like Magento CMS or even SAPPRFT products.
We would be honored if you could join us at one another's pace! Please let me know what time works best for everyone else who needs it most especially during their busy schedules. Thank you so much for considering this request. I look forward to hearing from you soon. Best regards, Karen Kondo | Senior Vice President Corporate Social Responsibility | COMPANY Corporation of America NUMBER Madison Avenue, Room NUMBERSNUMBERCulver City, CA NUMBER( PHONE| EMAIL)
Attachments:
x (PHONE Bytes)
ATTNUMBER.htm ("");<EMAIL
X (PHONE By
--------------------

Generated Email:
Laptop Male Software Engineer Gaming Technology - COMPANY Corporation of America (NYSE: SNE)
Experience the power and excitement that drives gaming, social games & entertainment. Experience your personalized experiences with a variety of mobile devices from anywhere in our network including tablets to smartphones, game consoles, plasma TVs, Blu-ray players etc.. The experience is immersive so you can experience all aspects at one time while exploring other technologies. You will also be able to access exclusive content such as Pandora's World which allows users to listen directly on their phones or watch videos instantly on any device. We are confident this will help us achieve more than NUMBER million people worldwide who have already purchased product via apps like Facebook Messenger, Twitter API for Android and Windows Phone/iPad Air Connectivity. For more information please visit URL
URL logo
Questions? Call Igor Gruppin PHONE Fax +PHONE EMAIL
To contact Member Services | View Our Privacy Statement | Unsubscribe
©️NUMBER Microsoft Corporation. All rights reserved. | Acceptable Use Policy | Contact Customer Service | NUMBER-HOUR CONTROL CENTER| USA
This email was sent by: Microsoft Corporation
NUMBER Marta St., Suite NUMBERSan Diego del Norte, CA NUM

from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, set_seed
import re
import spacy

# Load Spacy's English tokenizer
nlp = spacy.load('en_core_web_sm')

# Set a random seed for reproducible results
set_seed(42)

# Load the model and tokenizer
model_checkpoint = "./saved_model"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True, padding_side='left')
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
model = AutoModelForCausalLM.from_pretrained(model_checkpoint)

# Set up the pipeline using the freshly trained model and tokenizer
generator = pipeline('text-generation', model=model, tokenizer=tokenizer)

def advanced_text_cleaning(text):
    # Use Spacy to parse sentences and filter out overly short fragments and placeholders
    doc = nlp(text)
    cleaned_text = []
    for sent in doc.sents:
        # Filtering out too short sentences and placeholders
        if len(sent.text) > 20 and not re.search(r'NUMB[RL]|NUMBE[RSZ]', sent.text):
            cleaned_text.append(sent.text.replace('\n', ' ').strip())
    return ' '.join(cleaned_text)

def clean_generated_text(text):
    return advanced_text_cleaning(text.strip())

def generate_email(product, gender, profession, hobby, use_pipeline=False):
    # 构建输入文本
    input_text = f"As a {gender.lower()} {profession} interested in {hobby}, I am looking for a {product} that suits my needs."

    if use_pipeline:
        result = generator(input_text, max_length=1024, do_sample=True, top_k=50, temperature=0.9, repetition_penalty=1.1, truncation=True)
        generated_text = result[0]['generated_text']
    else:
        inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True, padding="max_length")
        outputs = model.generate(
            **inputs,
            max_length=1024,  # Increase maximum length
            temperature=0.9,  # Higher diversity
            top_k=50,         # Broader vocabulary choice
            top_p=0.95,       # Nucleus sampling
            no_repeat_ngram_size=2,  # Allow minimal repetition
            repetition_penalty=1.1   # Slightly less repetition penalty
        )
        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    return clean_generated_text(generated_text)


# 示例参数
product = "Laptop"
gender = "Male"
profession = "Software Engineer"
hobby = "Gaming"

# 生成电子邮件
email_text = generate_email(product, gender, profession, hobby, use_pipeline=True)
print("Generated Email:")
print(email_text)
print("--------------------")



In [None]:

# Basic cleanup: remove redundant phrases and unwanted tokens
    # text = re.sub(r'\b(URL)\b\s*', '', text)  # Remove 'URL' placeholders
    # text = re.sub(r'\b(\d{1,2}[:]\d{2}\s*(AM|PM))\b', '', text)  # Remove standalone time
    # text = re.sub(r'\[\d+\]', '', text)  # Remove citation-like numbers
    # text = re.sub(r"NUMBER|EMAIL|PHONE|FAX", "", text)
    # text = re.sub(r"\[.*?\]|-\s*-\s*|<.*?>", "", text)  # Clean up brackets and dashed lines
    # text = re.sub(r'\s{2,}', ' ', text)  # Replace multiple spaces with a single space
    # text = re.sub(r'\* Attachments:.*', '', text, flags=re.S)  # 删除'Attachments:'及其后的所有内容
    # text = re.sub(r'\bAttachments:\s*(\( Bytes\)\s*)+(x\s*\( Bytes\)\s*)*', '', text, flags=re.S)
    # text = re.sub(r'\|\s.*', '', text)  # Remove suffixes after names # This regex removes everything after a '|' character

# 使用pipeline生成文本
custom_text1_with_pipeline = generate_email("Brief project update email for a software development team", use_pipeline=True)
custom_text2_with_pipeline = generate_email("Detailed financial report request by a senior analyst", use_pipeline=True)
custom_text3_with_pipeline = generate_email("Creative brief for a new marketing campaign targeting young adults", use_pipeline=True)

# 不使用pipeline生成文本
custom_text1_without_pipeline = generate_email("Brief project update email for a software development team", use_pipeline=False)
custom_text2_without_pipeline = generate_email("Detailed financial report request by a senior analyst", use_pipeline=False)
custom_text3_without_pipeline = generate_email("Creative brief for a new marketing campaign targeting young adults", use_pipeline=False)

# 打印使用pipeline生成的文本
print("Software Development Team Email (with pipeline):")
print(custom_text1_with_pipeline)
print("--------------------")
print("Financial Report Request Email (with pipeline):")
print(custom_text2_with_pipeline)
print("--------------------")
print("Marketing Campaign Creative Brief (with pipeline):")
print(custom_text3_with_pipeline)
print("--------------------")

# 打印不使用pipeline生成的文本
print("Software Development Team Email (without pipeline):")
print(custom_text1_without_pipeline)
print("--------------------")
print("Financial Report Request Email (without pipeline):")
print(custom_text2_without_pipeline)
print("--------------------")
print("Marketing Campaign Creative Brief (without pipeline):")
print(custom_text3_without_pipeline)
print("--------------------")

# Result Verison 3

Software Development Team Email 
Brief project update email for a software development team meeting this weekend was provided below in advance of Tuesday's meeting. We understand that the schedule for this meeting has changed. I wanted to get everyone an update on our progress. Please provide your feedback by this Thursday, May th after we have been completed and finalized. I wish you all the best in the best possible future. Best regards, Maxene A. Verma
--------------------

# Result Verison 2

Software Development Team Email 
Brief project update email for a software development team meeting this weekend was provided below in advance of Tuesday's meeting. We understand that the schedule for this meeting has changed. I wanted to get everyone an update on our progress. Please provide your feedback by this Thursday, May th after we have been completed and finalized. I wish you all the best in the best possible future. Best regards, Maxene A. Verma | Marketing & Research Paralegal | COMPANY Pictures Releasing International, West Washington Blvd., Thalberg Bldg. | Culver City, CA ( | |
--------------------

# Result Verison 1

Software Development Team Email 
Brief project update email for a software development team meeting this weekend was provided below in advance of Tuesday's meeting. We understand that the schedule for this meeting has changed. I wanted to get everyone an update on our progress. Please provide your feedback by this Thursday, May th after we have been completed and finalized. I wish you all the best in the best possible future. Best regards, Maxene A. Verma | Marketing & Research Paralegal | COMPANY Pictures Releasing International, West Washington Blvd., Thalberg Bldg. | Culver City, CA ( | | * Attachments: ( Bytes) x ( Bytes) x ( Bytes) x ( Bytes) ( Bytes) x ( Bytes) ( Bytes) x ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) x ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) m ( Bytes) x ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) Each individual must participate in the "Join Together" component of the Round Up Process. The Rules will be reviewed with the Rules by the Committee in partnership with the Chamber. Please contact me if you have any questions about these materials. Regards, Maxene A. Verma | Marketing & Research Paralegal | COMPANY Pictures Releasing International, West Washington Blvd., Thalberg Bldg. | Culver City, CA ( | | * Attachments: x ( Bytes) x ( Bytes) x ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) m ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) ( Bytes) (
--------------------
Financial Report Request Email (with pipeline):
Detailed financial report request by a senior analyst for a Northwest bank, the company is currently conducting an online survey of its employees and business operations. Our goal is to identify, among other things, companies that have "met Wall Street's expectations" or "the market needs that are needed," the information being gathered from the survey should be deemed complete. An objective of the survey was that we would send an electronic resume, either verbal or by email in the next few days. To this end, the deadline for the survey has been extended, starting in March, to December February and ending in April th February. This will take place as follows: EES will conduct the survey using the online questionnaire to date. It will focus on identifying the most market-sceptical "attributable" employees. The results will be disclosed on Monday, with the second survey being conducted early next week, and our input will be considered during the second survey in the coming weeks. Regards, COMPANY Williams Associate Director, Public Relations If you are contacted regarding the above survey, please e-mail If you would like to be removed from future survey distribution, please e-mail If you would like to be removed from future surveys, please Please be advised that your direct reports will not be given again until completed. Thank you, Ingrid O'Donnell, Senior Manager, Commercial Support EES Business Development Contact Information You can also send an electronic resume, if you would like to have them taken a look on their qualifications for positions. Sincerely, The Survey Company DataSite Team - Customer Relationship Studies Team - POLITICAL PARTYic National Committee Montgomery Street, Suite LOCATION, Texas USA Telephone: (U) - Fax: (M) - Site Visit Map:<(toll-free):<(filing location: Hyatt Regency, Orange County, CA) Thanks for your participation and participation on the survey! Sincerely, The Survey Company DataSite Team - Customer Relationship Studies Team - POLITICAL PARTYic National Committee Montgomery Street, Suite LOCATION, Texas USA Telephone: (U) - Fax: (M) - Site Visit Map:<(toll-free):<(filing location: Hyatt Regency, Orange County, CA) All e-mails come directly from The Survey Company. In addition to being members of the Company's Executive Team who are not on the Company's Executive Team please consider the environment before printing this e-mail. This message was sent to: If you no longer wish to receive these emails, click on the following link: Unsubscribe. <a style="text-align:center!important; width="px!important; }.ExternalClass p:link, span.intent, div.us_SignInRow {margin: px ;} .ExternalClass font { font-family:"Calibri", "Verdana", "Helvetica", line-height:px; padding: px!important;} @media only screen and (max-width: px) { *.layout-, *.footer-layout { width:px!important; } *.layout-, *.center-align, *.show-icons,*.one-column, *.mediumgrid-header, *.outer-layout, *.media-layout, *.grid-layout, *.header-layout, *.featureagentads-layout, *.ads-layout, *.housead-layout, *.app, *.subscribe, *.unlimited-access, *.footer, *.access, *.app-class, *.app-span, *.copyright-block, *.view-layout { width:px!important; } *.media-layout { padding: px!important; margin:!important; } *.view-layout { margin: px !important; } *.grid-layout { margin:px px px!important; } *.header-layout
--------------------
Marketing Campaign Creative Brief (with pipeline):
Creative brief for a new marketing campaign targeting young adults and their fans: It's not clear what the plan is for this animated movie from the beginning. ) How to Give an EXTRA EXPENSION of the trailer with the promise of a new brand, it's going to work very well for us - especially as in the new trailer for "Grown Ups ," but we need to have a feeling about how much fun and powerful "Grown Ups " has been. We will be discussing this with everyone this weekend. ) The new trailer for Interview # (which is still in production) promises a lot more emotion and action for the film. Cc: Vollack, Lia; Marshall, Bob; Guerin, Jean This wasn't made in-the-works by Mike. They'd also like to give the current joke - an old joke, without any real conflict, to the movie, without the promise that it's a sequel or something. The script (and script) incorporates language I understand from the conversations with James and Phil last week: Regarding Interview # we just wanted you to know that we are committed to working on all the points on the horizontal and vertical artwork to show off COMPANY's upcoming campaign trailer. We want to address a number of concerns namely, if the'story' is used incorrectly, it's no good to use references from the previous trailers. If this isn't clear, then we may want to adjust and clarify what we mean by using some additional language to the story/concept. As we talked on Friday, I'm sending you the following email (from JLL) regarding several instances where we were able to bypass out our own campaign-include one that's more generic. But again, we will need to communicate with JLL about these issues/ concerns so that they don't pop into places where we are literally communicating. If this isn't clear then we may want to go ahead with the other campaign to get us next steps. Hereto be clear: when we talk about the possibility of splitting up the various businesses (Digital, Media Networks, Home Entertainment etc.), we understand the scope of the partnership, but are concerned about different businesses that can serve multiple brands in one fashion or another. We don't want to appear too stupid or wrong, however when we talk about a partnership that could deliver big gains in the long run. Again, this is confidential, and we will need to ask JLL to help provide the answer asap. On May , , at : PM, "Belgrad, Doug" < wrote: Hey guys, Just got off the phone with JLL about the below-referenced positioning for the Playstation in the wake of today's announcement. We're excited to be collaborating with the Alliance and Disney on Uncharted and Bloodshot, both coming out before The Avengers. JLL and the Alliance have made a tremendous impact in terms of funding for the three games (the "Smurfs" series, Venom and Sinister Six). There are some exciting new features that these are potential tentpoles in the future: Battle of the Five - A look at our two game plans by Shane Black's team as well as looking at how Activision can benefit from this in developing four separate game based games and whether he is leveraging the technology here. Booming In Productions - We want to do an ambitious and successful pitch to support the vision of our motion picture assets. We feel that this approach will deliver big returns for us. The biggest difference from our pre-COMPANY Pictures strategy was with the release of Rise of Electro, which cost more than CURRENCY million at D (a jump from Budget). We are currently working on many more franchises, including Bond franchise, Dr. Seuss, Batman and more. We're also expanding the Spider-Man universe and expanding the world of DC with a darker tone and feel for Spider-Man. Additionally, we would like to get a sense of what COMPANY can and can't hide and what we're working on to achieve in those three games. Hoping you all have a good weekend. Doug On May , , at : PM, "Gumpert, Andrew" < wrote: Believe me, I am looking forward to speaking with you soon. Look forward to talking. mike....... Andrew Gumpert President, Worldwide Business Affairs & Operations COMPANY Telephone: Facsimile: E-Mail: PRIVACY NOTICE: ument by mistake, please e-mail the sender securely dispose of it.
--------------------
Software Development Team Email (without pipeline):
Brief project update email for a software development team, please contact me. I look forward to hearing from you. If you have any questions, you can reach me at . I will be in meetings this afternoon. Thanks so much! Best regards, Shelly SHALINIE BERMAN | CONTACT | SOLUTIONS INC | P: | F: () Email: Please see attached draft correspondence from the SEC on file sharing. We have also discussed the possibility of sending around an email that shows marked "Full Schedule". We are scheduled to meet as soon as possible, but would love to have you guys review. My team has also gotten some comments from Kaz Hirai and want to make sure this is covered as well. Thank you all very much for your cooperation. Best, - Sharon > > > _____ This e-mail and any files transmitted with it are intended solely for the use of the individual or entity to whom they are addressed. This e mai has been sent to the attention of only the named addressee(s). If the reader of this e mail is not the intended recipient or the employee or agent responsible for delivering the message to thi you are hereby notified that any use dissemination, forwarding, printing or copying of th e com e of any sort is strictly prohibited. Any views or opinions expressed are solely those of sakethe only author and do not necessarily represent those or should necessarily be taken as an excuse. e ___________________________________________________________ SUBSCRIBE TO OUR AWARD WINNING NEWSLETTERS TRAVEL TIPS, THE WINE LIST AND THE DAILY FOOD & Wine THE DISH NUM
--------------------
Financial Report Request Email (without pipeline):
Detailed financial report request by a senior analyst that includes related companies. The analyst estimates that the analyst's annual risk forecast will be CURRENCY billion by July S. The analyst believes this year will reach C- million. As such, it was profitable to report that number from NUMABS and other non-financial companies while also operating as margin and basis. He estimated that COMPANY may see a net loss of CMM, which would hit COMPAE, or C/b. If all alternatives are not feasible, then he expects his team to continue through NUMP and report to each of them this November. After that, management thinks the stock should close to its October start date. And COMPENSENSON is preparing additional capital expenditure in the coming months, according to Tomn Bersch. Berszch was also informed last year that at the end of FYNUMBIRING THE SECURITY RANGE, NUMRBIO would require more capital, because of this, had COMPensenRON's business practices reported on their website last September. That was more accurate than the sum sheet in October that came out next week. His team made clear they did not want a "crashed" release as a result. At the same time, Berszlch also admitted that some people have assumed the public can view COMPenron's assets in a certain light. Below is his thoughts. According to an internal memo sent to me by the SEC this afternoon, no one from COMP's group has responded via email to us with the numbers that were not included in our analysis. I think what I will assume, however, is that we will put on solid media tomorrow. Hopefully that nothing leaks before Friday. To my credit, you probably already know that there are several possible investors this fiscal year - including several senior COMPanyer named as potential investment bankers. All of the above is considered a necessary investment. However, in order to gain a timely and accurate portrayal of our overall portfolio, we are going to rely heavily on media coverage throughout the process. When we do look at what media is covering, I believe that this proceeds will likely flow as we work closely with both sides to understand what the opportunities are and what types of questions we're looking for;
--------------------
Marketing Campaign Creative Brief (without pipeline):
Creative brief for a new marketing campaign targeting young adults + -Minute TV Spot for First Time ABC-Disney's 'The Walking Dead' (Video) o It looks like ABC has the right to promote the show on a big screen. Here are a few highlights: The Young Adult Swim, Disney Channel and ABC Family were among the top rated networks in the last year of its second season Digital TV Spots for st Century Fox, NBCU Cable, FOX Sports Network and Yahoo! Television announced last week that they will be joining the network along with their networks' digital ad network, Nickelodeon and VHNUMB. "The idea of creating video content in order to drive revenue through the process," said Doug Belgrad, president of worldwide commercial for COMPANY. "This is the first time in our history that a network could have an effect on ad viewing. " In addition, AOL launched video streaming service VuduStream in February. The service allows subscribers to watch live sports videos on their mobile devices. Vubu is a major player in that industry, bringing in more than NUMB of pay television subscribers from around the world to view and comment on content across their homes. Ad-supported services also allow viewers to stream live sporting events across multiple platforms. About VUDuSource presents a powerful advertising strategy. For more information please click here. Contact Lisa Woo at to schedule a phone interview or email. This e-mail and any files transmitted with it are intended solely for the use of the individual or entity to whom they are addressed. If the reader of this e E-Mail is not the intended recipient or the employee or agent responsible for delivering the message to the original recipient, you are hereby notified that any use dissemination, forwarding, printing or copying of thi Attachments: [imageNUMb. A Letter from the Publisher and CEO of Vodu Digital Distribution, Inc. vid. This electronic file (including any attachments) contains information concerning VOD and/or electronic sell-through (EST), content protection and other technology related to VAN. IF YOU HAVE RECEIVED THIS COMMUNICATION IN ERROR, PLEASE NOTIFY US IMMEDIATELY BY TELEPHONING THE ORIGINAL COMMUNITY AND DESTROY ALL COPIES, BOTH ELECTRONIC AND OTHER, OF
--------------------

In [None]:
# # Generate text using the pipeline
# email_text1 = generate_email("Hello, Following up on the bubblegum shipment.", use_pipeline=True)
# email_text2 = generate_email("Please confirm the delivery date for our next order.", use_pipeline=True)
# email_text3 = generate_email("Can you update me on the status of the invoice?", use_pipeline=True)

# # Print the generated emails
# print(email_text1)
# print("--------------------")
# print("--------------------")
# print("--------------------")
# print(email_text2)
# print("--------------------")
# print("--------------------")
# print("--------------------")
# print(email_text3)
# print("--------------------")
# print("--------------------")
# print("--------------------")

# V2 Pipeline Output

Hello, Following up on the bubblegum shipment.
I'll send you the info.
Hope you are well.
Best,
Janeh.
Janeh.
On Tue, Feb ,  at : AM, Cavanaugh, Kristin < wrote:Hi Janeh,I hope you are well.I spoke to Tom Rothman and he said he'd give me a call regarding your order, I told him I wanted to talk to you about your order and the order and I said I would send you a separate email that was so that I could communicate the order.I don't know if you had a chance to look at it and he suggested I send it to him directly. I've enclosed the order and my contact info for your reference.Thank you.Best,KristinErnest,Thank you for purchasing your itemPlease note that it has arrived in your carrier.
The items listed in this shipment are:x x x inch-wide x inch-wide x inch-wide x inch-wide x inch-wide x inch-wide
--------------------
--------------------
--------------------
Please confirm the delivery date for our next order.
If you have any questions, please contact the Global Service Desk or your Local IT Representative.
Global Service Desk Contact Information:
North America: 
US Toll Free: -SPE-SONY
Europe: ()-International Toll Fee Numbers
OnNet: - or -

GSD Live Chat
Regards,
SPE Identity Management
MP/WPF
ASAP
CRB(Competitive Releases)
DICER
FCM
GPMS - CopyRight
GPMS - MAGIC
GPMS - SCRY
GPMS - Titles and Registration
IntSales
Motion Pictures Portal
Script Tracker
SpiritWorld
Superbad
Worldwide Print Tracking System (WPTS)
Worldwide Publicity Website
Productions
C
Calypso
Dropzone
GPAS
Motion Pictures Production Database(MPPDB)
Tview
TV
BB
CC
Carmen
DealTracker
Dr. Oz
DTSM
ITSM/SARA
--------------------
--------------------
--------------------
Can you update me on the status of the invoice?
If not, please let me know.
Thanks,
Larry
Larry Marino

On Apr , , at : AM, Larry Marino < wrote:
Hi Larry,
Jane was hoping to see if we could get an invoice from ECS for this month's sales tax. We are working on a deal to make this work; the invoice is supposed to be CURRENCY,. I'm sorry but we won't be able to get a hard copy.
Thanks again!
Larry Marino

On Apr , , at : AM, Larry Marino < wrote:
Thanks Larry,
I hope that you are well.
I wanted to follow up with you briefly regarding the payment for the June invoice. I have received an invoice from ECS to make certain that we were billed by ECS for this year.
The invoice is supposed to be CURRENCY,.
The invoice is for May  through  and the invoice will be CURRENCY,.
Thanks again,
--------------------
--------------------
--------------------


# V2 Detailed Output

Laptop Male Software Engineer Cycling Club
URL URL URL URL
_____
This e-mail message is intended only for the individual or entity to which it is addressed and may contain information that is privileged, confidential, or exempt from disclosure under applicable Federal or State law. If the reader of this e-mai-mail is not the intended recipient, or the employee or agent responsible for delivering the message to the intended recipients, you are hereby notified that any dissemination, distribution or copying of this communication is strictly prohibited. If you
--------------------
--------------------
--------------------
Mobile Female Data Scientist Hiking in the US
URL
--URL This message was sent by: BBC Worldwide Americas
 Avenue of the Americas New York, NY, 
This email is to provide you with a personal digital file of all emails from BBC Worldwide into your inbox. If you prefer not to continue receiving email communications, please unsubscribe here instead of replying to this email.
To update your profile and customize what email alerts and newsletters you receive, please click here.
Having
--------------------
--------------------
--------------------
Desktop Male Graphic Designer Painting -.
URL URL URL URL #
>
> -Houzz Logo
> //RESERVATION STAMPED HANDS AND LINE BLUES CURRENCY.
Image Credit: Kiki Bentonka
This is a photo message from my Houzz ideabook.
If you are having trouble viewing this, click here.
Share This: URL URL
URL
Thank You,
--------------------
--------------------
--------------------


# V1 Mode Output

<!-- # Laptop Male Software Engineer Cycling Team
# <NUMBER>-<PHONE>
# Email: <EMAIL> | Twitter: @Cycling_Team
# Cyclists are invited to participate in the Cycling Cycling World Cup in Brazil.
# The Cycling Federation of Brazil is a global organization dedicated to promoting cycling 
# and the development of sustainable living. Cycling is an international organization that promotes the health, 
# safety and well-being of all people. The Cycling Foundation is dedicated solely to the pursuit of the highest quality 
# and quality of life -->