This notebook follows the tutorial found at:
https://www.analyticsvidhya.com/blog/2018/11/introduction-text-summarization-textrank-python/


In [1]:
import mailparser
import sys
from os import listdir
from os.path import isfile, join
import pandas as pd
import email
import numpy as np
from talon.signature.bruteforce import extract_signature
import nltk
from nltk.tokenize import sent_tokenize
import re
from nltk.corpus import stopwords
from sklearn.metrics.pairwise import cosine_similarity
import networkx as nx
import rouge
import pickle
import time
from joblib import Parallel, delayed
import multiprocessing

#one time executions
#nltk.download('punkt')
#nltk.download('stopwords')
#! wget http://nlp.stanford.edu/data/glove.6B.zip
#! unzip glove*.zip
#!pip install py-rouge




Load dataframe produced by the Data_Wranlging notebook. 

In [2]:
#Load Data
#PICKLE_LOC = "../data/dataframes/wrangled_enron_df.pkl" #Single Mailbox
ENRON_PICKLE_LOC = "../data/dataframes/wrangled_enron_full_df.pkl"
BC3_EMAIL_PICKLE_LOC = "../data/dataframes/wrangled_BC3_email_df.pkl"
BC3_SUMMARY_PICKLE_LOC = "../data/dataframes/wrangled_BC3_summary_df.pkl"

enron_df = pd.read_pickle(ENRON_PICKLE_LOC)
BC3_emails_df = pd.read_pickle(BC3_EMAIL_PICKLE_LOC)
BC3_summary_df = pd.read_pickle(BC3_SUMMARY_PICKLE_LOC)

In [3]:
#Outputs a subset of the enron dataset masked by the person and a timeframe. 
def subset_emails(df, start_date, end_date, person):   
    summarization_mask = (enron_df['Date'] >= start_date) & (enron_df['Date'] <= end_date) & (enron_df['Employee'] == person)
    enron_masked_df = df.loc[summarization_mask]
    return enron_masked_df   

In [4]:
#Retrieve original sentences and index them. This will be used to generate the extracted summaries. 
def get_extractive_sentences(df):
    sentences = df.Extractive_Sentences.tolist()
    #flatten list as tuples containting (sentence, dataframe index) to be used to reassociate summary with original email. 
    sentences = []
    sentences_list = df.Extractive_Sentences.tolist()
    for counter, sublist in enumerate(sentences_list):
        for item in sublist:
            sentences.append([counter, item]) 
    return sentences

In [5]:
#Pull out clean tokenized sentences. 
def get_tokenized_sentences(df):
    clean_sentences = df.Tokenized_Body.tolist()
    #flatten list
    clean_sentences = [y for x in clean_sentences for y in x]
    return clean_sentences

In [6]:
#get glove word vectors
def extract_word_vectors():
    word_embeddings = {}
    f = open('glove.6B.300d.txt', encoding='utf-8')
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        word_embeddings[word] = coefs
    f.close()
    return word_embeddings

In [7]:
#Create sentence_vectors
def create_sentence_vectors(clean_sentences, word_embeddings):
    sentence_vectors = []
    for i in clean_sentences:
        if len(i) != 0:
            v = sum([word_embeddings.get(w, np.zeros((300,))) for w in i.split()])/(len(i.split())+0.001)
        else:
            v = np.zeros((300,))
        sentence_vectors.append(v)
    return sentence_vectors

In [8]:
#def rank_sentences(sentences, sentence_vectors):
#    sen_len = range(len(sentences))
    #Create sentence vectors using list comphrehsion. 
#    sim_mat = [
#       [cosine_similarity(sentence_vectors[i].reshape(1,300), sentence_vectors[j].reshape(1,300))[0,0] for j in sen_len if j != i]
#        for i in sen_len
#    ]

#    nx_graph = nx.from_numpy_array(sim_mat)
#    scores = nx.pagerank(nx_graph)
    #Pair sentence with it's similarity score then sort. 
#    ranked_sentences = sorted(((scores[i],s[0],s[1]) for i,s in enumerate(sentences)), reverse=True)
#    return ranked_sentences

#Returns a list of sorted scores with the index of the email the extracted sentence came from. 
def rank_sentences(sentences, sentence_vectors):
    sim_mat = np.zeros([len(sentences), len(sentences)])
    #Initialize matrix with cosine similarity scores. 
    for i in range(len(sentences)):
        for j in range(len(sentences)):
            if i != j:
              sim_mat[i][j] = cosine_similarity(sentence_vectors[i].reshape(1,300), sentence_vectors[j].reshape(1,300))[0,0]
    nx_graph = nx.from_numpy_array(sim_mat)
    scores = nx.pagerank(nx_graph)
    #Pair sentence with it's similarity score then sort. 
    #ranked_sentences = sorted(((scores[i],s[0],s[1]) for i,s in enumerate(sentences)), reverse=True)
    ranked_sentences = ((scores[i],s[0],s[1]) for i,s in enumerate(sentences))
    return ranked_sentences

In [9]:
#color scheme to help distinguish summarizaiton text. 
class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'

def display_summary(enron_masked_df, ranked_sentences):
  # Specify number of sentences as a fraction of total emails. 
  sn = len(enron_masked_df) // 10

  # Generate summary
  for i in range(sn):
    #pull date and subject from original email
    email_date = str(enron_masked_df['Date'].iloc[ranked_sentences[i][1]])
    email_subject = str(enron_masked_df['Subject'].iloc[ranked_sentences[i][1]])
    email_from = str(enron_masked_df['From'].iloc[ranked_sentences[i][1]])
    print( bcolors.BOLD + "Date: "+ email_date  + 
          " Subject: " + email_subject +
          " From: " + email_from + bcolors.ENDC +
          "\nSummary: " + str(ranked_sentences[i][2]))

In [10]:
#Function to wrap up summarization process
def summarize_emails(word_embeddings, masked_df):
    print("Total number of emails to summarize: " + str(len(masked_df)))
    sentences = get_extractive_sentences(masked_df)
    clean_sentences = get_tokenized_sentences(masked_df)
    #Generate sentence vectors
    sentence_vectors = create_sentence_vectors(clean_sentences, word_embeddings)
    #Create a list of ranked sentences. 
    ranked_sentences = rank_sentences(sentences, sentence_vectors)
    #return enron_masked_df, ranked_sentences
    #display_summary(masked_df, ranked_sentences)
    return ranked_sentences

In [11]:
#Extract word vectors. Only need to be done once. 
word_embeddings = extract_word_vectors()

# Summarizing BC3 Dataset and Evaluate with Rouge
Using: https://pypi.org/project/py-rouge/

The rouge metric is an evaluation metric used to test machine generated summaries against a human "Gold standard". Using the same Text rank summarization methods used on the Enron dataset, the following evaluates the algorithim against the BC3 Corpus. This is one of the few email datasets that contain human summarizations. 

In [None]:
#Look into summarizing single email
masked_df = BC3_emails_df[:1]
masked_summaries = BC3_summary_df['Summary'].loc[(BC3_summary_df['Listno'] == masked_df.iloc[0]['Listno']) & (BC3_summary_df['Email_num'] == str(masked_df['Email_num'].iloc[0]))]
ranked_sentences = summarize_emails(word_embeddings, masked_df)

In [None]:
#There are three different human summaries for the same email. 
masked_summaries

In [None]:
evaluator = rouge.Rouge(metrics=['rouge-n'],
                           max_n=1,
                           limit_length=True,
                           length_limit=100,
                           length_limit_type='words',
                           alpha=0.5, # Default F1_score
                           weight_factor=1.2,
                           stemming=True)
full_body = masked_df['Body'].iloc[0]
hypothesis = ranked_sentences[0][2]
reference = masked_summaries.iloc[0]

#scores = evaluator.get_scores(hypothesis, reference)
print("Full Email: " + full_body + '\n')
print("ML Summary: " + hypothesis + "\n") 
print("Human Summary: " + reference + "\n") 

The following shows the R-1 scores. Current benchmarks for text summarization can be found at: https://summari.es/

In [None]:
perfect_score = evaluator.get_scores(reference, reference)
scores = evaluator.get_scores(hypothesis, reference)
print(scores)

# Summarizing Enron Dataset

In [15]:
#Define emails to be summarized. 
start_date = '2001-10-01 00:00:00'
end_date = '2001-10-14 23:59:59'
person = 'skilling-j'
masked_df = subset_emails(enron_df, start_date, end_date, person)
ranked_sentences = summarize_emails(word_embeddings, masked_df)

Total number of emails to summarize: 72
[1mDate: 2001-10-08 08:36:38 Subject: Update - Basel Conference New York From: news@ibcuk.co.uk[0m
Summary: However, as the business community appears to be trying to get back to normal as far as possible, we have decided that the conference should proceed as planned.
[1mDate: 2001-10-02 19:04:24 Subject: Jeffrey Skilling, your October E-lert is now available From: mccann@nc.rr.com[0m
Summary: A complete story on the origin of Halloween will be on the Business Cafe Web site October 2531 at http://www.BusinessCafeOnline.com Your October 2001 issue of Elert for personal development in business is now available on http://www.BusinessCafeOnline.com Included in the October 2001 issue are these three articles: Flying Off Over Office Politics What Men Can Learn
[1mDate: 2001-10-04 03:13:05 Subject: h: Eyeforenergy Briefing From: bruno@eyeforenergy.com[0m
Summary: EDITORIAL A Focus on the latest developments in Europe ARTICLES European Utility Sect

In [24]:
ranked_sentences = summarize_emails(word_embeddings, masked_df)

Total number of emails to summarize: 72


In [25]:
ranked_sentences

<generator object rank_sentences.<locals>.<genexpr> at 0x7f755e19a8b8>

In [26]:
ranked_list = list(ranked_sentences)

In [27]:
ranked_list

[(0.0007875445566595563, 0, 'Please ignore.'),
 (0.0010081311464166171,
  0,
  'Get your FREE download of MSN Explorer at http://explorer.msn.com/intl.asp'),
 (0.0012244132397189018, 1, 'Good Monday Morning Comments'),
 (0.0013877848702168068,
  2,
  'During the weekend of October 6 and 7, 2001 the Enterprise Storage Team will be migrating all production users off the current hardware (Solar) that houses their home and application directories (no production databases are affected, but client software will be) to new hardware.'),
 (0.0011576050576941831,
  2,
  'This migration requires a total system outage of approximately 6 hours.'),
 (0.0011340903464222992,
  2,
  'The outage will occur Saturday night beginning at 7:00 PM and will last until Sunday morning at 1:00 AM.'),
 (0.0013324856596917604,
  2,
  'All users will need to be logged off during this time period.'),
 (0.0014484539613239349,
  2,
  'In order to validate the migration, production users need to test access to their hom

In [37]:
#Examples of a full email. 
masked_df['Body'].iloc[ranked_list[0][1]]

'Please ignore. Get your FREE download of MSN Explorer at http://explorer.msn.com/intl.asp'

In [53]:
masked_df

Unnamed: 0,Body,Chain,Date,Email Folder,Employee,From,Full_Email_Path,Message-ID,Signature,Subject,Extractive_Sentences,Tokenized_Body,TextRanks
15419,Please ignore. Get your FREE download of MSN E...,,2001-10-02 21:51:28,inbox,skilling-j,dalak@hotmail.com,../data/maildir/skilling-j/inbox/1035.,<18520966.1075852669231.JavaMail.evans@thyme>,,Test,"[Please ignore., Get your FREE download of MSN...","[please ignore, get free download msn explorer...","[0.0007875445566595563, 0.0010081311464166171]"
15440,Good Monday Morning Comments,The Local Guys!\n\nThe Federal Reserve meets ...,2001-10-01 19:45:15,inbox,skilling-j,david.morris@lehman.com,../data/maildir/skilling-j/inbox/1056.,<15375789.1075852669906.JavaMail.evans@thyme>,,"The Morning Market Call - Monday October 1st, ...",[Good Monday Morning Comments],[good monday morning comments],[0.0012244132397189018]
15449,"During the weekend of October 6 and 7, 2001 th...",,2001-10-04 22:45:18,inbox,skilling-j,bob.ambrocik@enron.com,../data/maildir/skilling-j/inbox/1020.,<21007375.1075852668632.JavaMail.evans@thyme>,,Solar Migration - Third Notice - Time Change!!!!!,"[During the weekend of October 6 and 7, 2001 t...",[weekend october enterprise storage team migra...,"[0.0013877848702168068, 0.0011576050576941831,..."
15453,Schwab StreetSmart Pro Schwab StreetSmart Pro...,there you will be able to update your email i...,2001-10-08 21:07:40,inbox,skilling-j,activetrader@schwab.com,../data/maildir/skilling-j/inbox/999.,<33230100.1075852667852.JavaMail.evans@thyme>,,StreetSmart Pro is here.,[ Schwab StreetSmart Pro Schwab StreetSmart Pr...,[schwab streetsmart pro schwab streetsmart pro...,"[0.0013277194099462052, 0.0015131937957199868,..."
15476,kudlow100901 (2).doc,,2001-10-09 21:43:05,inbox,skilling-j,svarga@kudlow.com,../data/maildir/skilling-j/inbox/987.,<12590153.1075852667374.JavaMail.evans@thyme>,,L. Kudlow & J. Park Commentary (10/9/2001),[ kudlow100901 (2).doc],[kudlow doc],[-1.7511599161875722e-05]
15480,"Attached is a the agenda, minutes of previous ...",,2001-10-01 14:41:34,inbox,skilling-j,bpaddock@ghcf.org,../data/maildir/skilling-j/inbox/1061.,<11478331.1075852670125.JavaMail.evans@thyme>,,Oct. 3rd meeting,"[Attached is a the agenda, minutes of previous...",[attached agenda minutes previous meeting new ...,"[0.0014190575677898738, 0.0012705486947875901,..."
15485,The migration of UNIX home directories and app...,,2001-10-05 17:55:41,inbox,skilling-j,bob.ambrocik@enron.com,../data/maildir/skilling-j/inbox/1011.,<12254508.1075852668316.JavaMail.evans@thyme>,,Solar Migration - DATE CHANGE - VERY IMPORTANT,[The migration of UNIX home directories and ap...,[migration unix home directories applications ...,"[0.0012655032725738906, 0.0014778958205377965,..."
15523,"Brother Betas, I am George Pereira from the gr...",,2001-10-07 16:53:03,inbox,skilling-j,gpereira7@hotmail.com,../data/maildir/skilling-j/inbox/1007.,<7577685.1075852668102.JavaMail.evans@thyme>,,[smu-betas] New member,"[Brother Betas, I am George Pereira from the g...",[brother betas george pereira graduating class...,"[0.0006472868722849441, 0.001303472120074173, ..."
15543,"Woody, I like this one!! Will be in all week s...",\nFrom: woody berry [mailto:boethia@hotmail.co...,2001-10-09 18:12:54,inbox,skilling-j,chuck.paul@wiseshops.com,../data/maildir/skilling-j/inbox/993.,<20878155.1075852667561.JavaMail.evans@thyme>,,RE: [smu-betas] Fwd: This is one of the funnie...,"[Woody, I like this one!!, Will be in all week...","[woody like one, week news breaks dicks deal g...","[0.0010967417801525448, 0.0012912139805925547,..."
15544,Jeff. FYI,"\nFrom: Albert Ibanez \nSent: Thursday, O...",2001-10-04 20:07:05,inbox,skilling-j,fparra@swbanktx.com,../data/maildir/skilling-j/inbox/1022.,<16610529.1075852668684.JavaMail.evans@thyme>,,FW: Skilling #10386513,"[Jeff., FYI]","[jeff, fyi]","[0.0005049507192780072, -0.00018546044954957465]"


In [54]:
ranked_list

[(0.0007875445566595563, 0, 'Please ignore.'),
 (0.0010081311464166171,
  0,
  'Get your FREE download of MSN Explorer at http://explorer.msn.com/intl.asp'),
 (0.0012244132397189018, 1, 'Good Monday Morning Comments'),
 (0.0013877848702168068,
  2,
  'During the weekend of October 6 and 7, 2001 the Enterprise Storage Team will be migrating all production users off the current hardware (Solar) that houses their home and application directories (no production databases are affected, but client software will be) to new hardware.'),
 (0.0011576050576941831,
  2,
  'This migration requires a total system outage of approximately 6 hours.'),
 (0.0011340903464222992,
  2,
  'The outage will occur Saturday night beginning at 7:00 PM and will last until Sunday morning at 1:00 AM.'),
 (0.0013324856596917604,
  2,
  'All users will need to be logged off during this time period.'),
 (0.0014484539613239349,
  2,
  'In order to validate the migration, production users need to test access to their hom

In [51]:
#masked_df['TextRanks'] = np.empty((len(masked_df), 0)).tolist()

for rank in ranked_list:
    masked_df.TextRanks.iloc[rank[1]] += [rank[0]]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


KeyboardInterrupt: 

In [50]:
ranked_list[0]

(0.0007875445566595563, 0, 'Please ignore.')

In [20]:
masked_df['Body'].iloc[ranked_sentences[0][1]]

"Dear Mr Skilling, http://www.ibcfinancial.com/bm1272/?source=bm1272em2 Update on IBC's major international conference: BASEL MEETING THE PRACTICAL CHALLENGES 31st October and 1st November 2001 New York We have obviously considered very carefully whether to continue with this event in the light of the recent terrible events in New York. However, as the business community appears to be trying to get back to normal as far as possible, we have decided that the conference should proceed as planned. We therefore hope you will take this excellent opportunity to hear the industry response to the latest Basel proposals on Capital Adequacy and Risk Management, and to gain practical advice on meeting the significant business challenges that these proposals pose to the management of risk within the financial services industry. Conference highlights include: A keynote address from William Rutledge, Executive Vice President, FEDERAL RESERVE BANK OF NEW YORK An impressive panel of leading industry s

In [None]:
#Summarization from another inbox
start_date = '2001-10-01 00:00:00'
end_date = '2001-10-14 23:59:59'
person = 'arnold-j'
masked_df = subset_emails(enron_df, start_date, end_date, person)
ranked_sentences = summarize_emails(word_embeddings, masked_df)

In [None]:
#Examples of a full email. 
masked_df['Body'].iloc[ranked_sentences[0][1]]

In [None]:
#One more example
start_date = '2001-10-01 00:00:00'
end_date = '2001-10-14 23:59:59'
person = 'lenhart-m'
masked_df = subset_emails(enron_df, start_date, end_date, person)
ranked_sentences = summarize_emails(word_embeddings, masked_df)

In [None]:
#Examples of a full email. 
masked_df['Body'].iloc[ranked_sentences[0][1]]

In [None]:
def rank_sentences(sentences, sentence_vectors):
    sen_len = range(len(sentences))
    #Create sentence vectors using list comphrehsion. 
    sim_mat = [
       [cosine_similarity(sentence_vectors[i].reshape(1,300), sentence_vectors[j].reshape(1,300))[0,0] for j in sen_len if j != i]
        for i in sen_len
    ]

    nx_graph = nx.from_numpy_array(sim_mat)
    scores = nx.pagerank(nx_graph)
    #Pair sentence with it's similarity score then sort. 
    ranked_sentences = sorted(((scores[i],s[0],s[1]) for i,s in enumerate(sentences)), reverse=True)
    return ranked_sentences

In [None]:
#testing
start_date = '2001-10-01 00:00:00'
end_date = '2001-10-01 23:59:59'
person = 'skilling-j'

enron_masked_df = subset_emails(enron_df, start_date, end_date, person)
print("Total number of emails to summarize: " + str(len(enron_masked_df)))
sentences = get_extractive_sentences(enron_masked_df)
clean_sentences = get_tokenized_sentences(enron_masked_df)
print(str(len(clean_sentences)) + " " + str(len(sentences)))
len(word_embeddings)
#Generate sentence vectors
sentence_vectors = create_sentence_vectors(clean_sentences, word_embeddings)
len(sentence_vectors)
#Create a list of ranked sentences. 
#ranked_sentences = rank_sentences(sentences, sentence_vectors)
#display_summary(enron_masked_df, ranked_sentences)

In [None]:
ranked_sentences = rank_sentences(sentences, sentence_vectors)

In [None]:
sen_len = range(len(sentences))
#Create sentence vectors using list comphrehsion. 
sim_mat = [
    [cosine_similarity(sentence_vectors[i].reshape(1,300), sentence_vectors[j].reshape(1,300))[0,0] for j in sen_len if j != i]
    for i in sen_len
]

In [None]:
df = pd.DataFrame(np.random.rand(500000,100)) 
df['distances'] = cosine_similarity(df, df.iloc[0:1]) # Here I assume that the parent vector is stored as the first row in the dataframe, but you could also store it separately

n = 10 # or however many you want
n_largest = df['distances'].nlargest(n + 1) # this contains the parent itself as the most similar entry, hence n+1 to get n children

In [None]:
df.iloc[0:1]
#df

In [None]:
#sentence_vectors[0].reshape(1,300)

In [None]:
# make some small data
from scipy.stats import pearsonr
df = pd.DataFrame(np.random.rand(100, 40))

C1 = np.array([[pearsonr(df[i], df[j])[0] for i in df] for j in df])
C2 = np.corrcoef(df.values.T)
np.allclose(C1, C2)

In [None]:
#sim_mat
C1[0][1]
#df

In [None]:
#def rank_sentences(sentences, sentence_vectors):
#    sim_mat = np.zeros([len(sentences), len(sentences)])
#    #Initialize matrix with cosine similarity scores. 
#    for i in range(len(sentences)):
#        for j in range(len(sentences)):
#            if i != j:
#              sim_mat[i][j] = cosine_similarity(sentence_vectors[i].reshape(1,300), sentence_vectors[j].reshape(1,300))[0,0]
#    nx_graph = nx.from_numpy_array(sim_mat)
#    scores = nx.pagerank(nx_graph)
    #Pair sentence with it's similarity score then sort. 
#    ranked_sentences = sorted(((scores[i],s[0],s[1]) for i,s in enumerate(sentences)), reverse=True)
#    return ranked_sentences
cosine_similarity(sentence_vectors[0].reshape(1,300), sentence_vectors[1].reshape(1,300))[0,0]

In [None]:
sim_mat_test = pd.DataFrame(index=data.columns, columns=)

for i in range(len(sentences)):
    for j in range(len(sentences)):
        sim_mat[i][j] = cosine_similarity(sentence_vectors[i].reshape(1,300), sentence_vectors[j].reshape(1,300))[0,0]

In [None]:
cosine_similarity(sentence_vectors[0].reshape(1,300), sentence_vectors[1].reshape(1,300))

In [None]:
pearsonr(sentence_vectors[0], sentence_vectors[1])

In [None]:
from sklearn.metrics import pairwise_distances
pairwise_distances(sentence_vectors[1].reshape(1,300), sentence_vectors[0].reshape(1,300), metric='cosine')

In [None]:
test_vec1 = [sentence_vectors[0].reshape(1,300), sentence_vectors[1].reshape(1,300)]


In [None]:
type(sentence_vectors[0].reshape(1,300))

In [14]:
#Testing way to summarize full dataset. 
summarization_mask = ((enron_df['Employee'] == 'allen-p'))
enron_masked_df = enron_df.loc[summarization_mask]

In [15]:
enron_masked_df

Unnamed: 0,Body,Chain,Date,Email Folder,Employee,From,Full_Email_Path,Message-ID,Signature,Subject,Extractive_Sentences,Tokenized_Body
346947,"Jeff, I got your voice mail about the school b...",,2001-09-04 14:13:46,sent_items,allen-p,k..allen@enron.com,../data/maildir/allen-p/sent_items/231.,<29845868.1075858642856.JavaMail.evans@thyme>,,,"[Jeff, I got your voice mail about the school ...","[jeff got voice mail school board approval, th..."
346948,"Adrianne, I spoke to Brent D. I would recommen...","\nFrom: Engler, Adrianne \nSent: Monday, Octob...",2001-10-29 17:05:08,sent_items,allen-p,k..allen@enron.com,../data/maildir/allen-p/sent_items/305.,<5299392.1075858644525.JavaMail.evans@thyme>,,RE: the candidate we spoke about this morning...,"[Adrianne, I spoke to Brent D. I would recomme...",[adrianne spoke brent would recommend given ch...
346949,"Jeff, Can you resend the info on the three pro...","\nFrom: \t""JEFF SMITH"" <jsmith@austintx.com>@E...",2001-11-26 15:25:16,sent_items,allen-p,k..allen@enron.com,../data/maildir/allen-p/sent_items/1.,<25828831.1075855376669.JavaMail.evans@thyme>,,RE: Additional properties in San Antonio,"[Jeff, Can you resend the info on the three pr...",[jeff resend info three properties mailed one ...
346950,"Don, Jeff Gosset runs the risk management grou...",\nFrom: \tdonald.l.barnhart@accenture.com@ENRO...,2001-09-14 19:41:59,sent_items,allen-p,k..allen@enron.com,../data/maildir/allen-p/sent_items/259.,<25041277.1075858643477.JavaMail.evans@thyme>,,RE:,"[Don, Jeff Gosset runs the risk management gro...","[jeff gosset runs risk management group, spoke..."
346951,"Erik, All of the desk heads are leaving for an...","\nFrom: \tSimpson, Erik \nSent:\tThursday, Au...",2001-08-16 15:22:39,sent_items,allen-p,k..allen@enron.com,../data/maildir/allen-p/sent_items/212.,<20032513.1075858642442.JavaMail.evans@thyme>,,RE: jobs on the gas desk,"[Erik, All of the desk heads are leaving for a...",[erik desk heads leaving offsite afternoon wee...
346952,Is this the weekly fundamentals report produce...,"\nFrom: \tBruce, James \nSent:\tMonday, Octob...",2001-10-30 13:17:09,sent_items,allen-p,k..allen@enron.com,../data/maildir/allen-p/sent_items/368.,<17229495.1075862164644.JavaMail.evans@thyme>,,RE: Distribution of report,[Is this the weekly fundamentals report produc...,[weekly fundamentals report produced tim heize...
346954,"Jerry, I had a couple of questions about the e...",\nFrom: \tJerry Caskey <jerry@texas1031.com>@E...,2001-11-02 17:19:48,sent_items,allen-p,k..allen@enron.com,../data/maildir/allen-p/sent_items/377.,<28859857.1075862164873.JavaMail.evans@thyme>,,RE: Exchange Documents,"[Jerry, I had a couple of questions about the ...","[jerry couple questions exchange, , place repl..."
346955,"Michael, I finally got in touch with the depar...",,2001-07-12 19:55:20,sent_items,allen-p,k..allen@enron.com,../data/maildir/allen-p/sent_items/143.,<13141541.1075858640571.JavaMail.evans@thyme>,,,"[Michael, I finally got in touch with the depa...",[michael finally got touch department handles ...
346957,"Jacques, I need to pull together the Bishops C...",,2001-10-29 21:27:12,sent_items,allen-p,k..allen@enron.com,../data/maildir/allen-p/sent_items/308.,<9608796.1075858644591.JavaMail.evans@thyme>,,,"[Jacques, I need to pull together the Bishops ...",[jacques need pull together bishops corner org...
346958,"Jeff, I understand you spoke to Jacques Craig ...",,2001-06-18 18:18:57,sent_items,allen-p,k..allen@enron.com,../data/maildir/allen-p/sent_items/110.,<28397305.1075858639786.JavaMail.evans@thyme>,,,"[Jeff, I understand you spoke to Jacques Craig...",[jeff understand spoke jacques craig consent a...
