In [5]:
import requests
import pandas
from dateutil import parser
host = 'http://18.188.56.207:9200/'
requests.get(host + '_cat/indices/enron').content

b'yellow open enron RK6efVAlS9q_hKydGnRNdQ 1 1 251734 0 539.8mb 539.8mb\n'

In [6]:
doc = {
    "query" : {
        "match_all" : {}
    }
}
import json
r=requests.get(host + 'enron/_search', data=json.dumps(doc), headers={'Content-Type':'application/json'})
print(len(r.json()['hits']['hits']))

10


In [7]:
def elasticsearch_results_to_df(results):
    '''
    A function that will take the results of a requests.get 
    call to Elasticsearch and return a pandas.DataFrame object 
    with the results 
    '''
    hits = results.json()['hits']['hits']
    data = pandas.DataFrame([i['_source'] for i in hits], index = [i['_id'] for i in hits])
    data['date'] = data['date'].apply(parser.parse)
    return(data)

def print_df_row(row):
    '''
    A function that will take a row of the data frame and print it out
    '''
    print('____________________')
    print('RE: %s' % row.get('subject',''))
    print('At: %s' % row.get('date',''))
    print('From: %s' % row.get('sender',''))
    print('To: %s' % row.get('recipients',''))
    print('CC: %s' % row.get('cc',''))
    print('BCC: %s' % row.get('bcc',''))
    print('Body:\n%s' % row.get('text',''))
    print('____________________')


In [8]:
# Put elasticsearch results into a pandas.DataFrame object
df = elasticsearch_results_to_df(r)
print(df)
print_df_row(df.iloc[0])

                         date  \
269 2001-05-08 11:47:00+00:00   
270 2000-06-28 18:39:00+00:00   
271 2002-02-02 04:04:41+00:00   
272 2000-07-24 12:25:00+00:00   
273 2002-02-21 21:21:56+00:00   
274 2000-02-11 08:45:00+00:00   
275 2001-10-16 01:01:21+00:00   
276 2001-05-17 16:54:00+00:00   
277 2001-03-15 09:18:00+00:00   
278 2002-01-15 15:49:14+00:00   

                                                  text  \
269  Attached is a letter Marathon drafted to send ...   
270  It is amazing and yet not surprising how much ...   
271  Aloha ,\n\nYou're receiving this newsletter as...   
272  ---------------------- Forwarded by Daren J Fa...   
273         Attachment\n\n\nThanks\nRandy Peschka\n\n    
274  ---------------------- Forwarded by Tana Jones...   
275  800-437-9209\n3787-465-808-01001\n\n__________...   
276  I think I will be out of town---can you attend...   
277  Ditto Credit.\n\nThanks,\n\nShari \n\n\n\n\tTa...   
278  Lucy, Scott Neal and I ( we run the East Gas b...  

In [9]:
# Query For a full text match in the "text" field
# Uses the "match" query: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query.html
doc = {
    "query": {
        "match" : {
            "text" : "important reporting"
        } 
    },
    "from" : 0, # Starting message to return. 
    "size" : 10000, # Return this many messages. Can't be more than 10,000
}
r=requests.get(host + 'enron/_search',
               data=json.dumps(doc), headers={'Content-Type':'application/json'})
r.raise_for_status()
print("Found %s messages matching the query, of " % r.json()['hits']['total'])
df = elasticsearch_results_to_df(r)
print("Returned %s messages" % df.shape[0])
print_df_row(df.iloc[0])

Found {'value': 10000, 'relation': 'gte'} messages matching the query, of 
Returned 10000 messages
____________________
RE: RE: Reporting replication issue is now fixed
At: 2001-10-09 17:11:18+00:00
From: lynn.blair@enron.com
To: jennifer.lowry@enron.com  group.dl-ets@enron.com
CC: nan
BCC: nan
Body:
	Jennifer, how long has this been a problem?  Is there a concern we have caused
	customers problems in nominating due to bad information?  Thanks. Lynn

 -----Original Message-----
From: 	Lowry, Jennifer   
Sent:	Tuesday, October 09, 2001 10:11 AM
To:	DL-ETS TMS Modification Group
Subject:	Reporting replication issue is now fixed


Yesterday we noticed a problem where reports were not reporting on the correct cycle, or were not picking up information between cycles.  As it turns out, an important table was not being replicated from the application database to the reporting database.  

I was told that this problem has been fixed, and on first inspection of the tables, everything looks corr

In [12]:
df.head().text

21857     Adjusted purchase price s/b $209,159,347.\nBui...
87085     Ike, it is unfortunate we could not make this ...
150367    Richard and Bill:\n\nDamage figures with preju...
205273    ----- Forwarded by Richard B Sanders/HOU/ECT o...
213212    \n----- Forwarded by Richard B Sanders/HOU/ECT...
Name: text, dtype: object

In [10]:
# Query For a text match in the "text" or "subject" fields. Uses the multi-match query:
# https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html
doc = {
  "query": {
    "multi_match" : {
      "query":    "settlement", 
      "fields": [ "subject", "text" ] 
    }
  }
}
r=requests.get(host + 'enron/_search',
               data=json.dumps(doc), headers={'Content-Type':'application/json'})
r.raise_for_status()
print("Found %s messages matching the query, of " % r.json()['hits']['total'])
df = elasticsearch_results_to_df(r)
print("Returned %s messages" % df.shape[0])
print_df_row(df.iloc[9])

Found {'value': 4102, 'relation': 'eq'} messages matching the query, of 
Returned 10 messages
____________________
RE: Status of final statements and replacement invoices
At: 2001-10-24 01:34:50+00:00
From: thailu@ercot.com
To: jackson.amie@enron.com  pratka.amy@enron.com  mitrey.andy@enron.com  williams.angela@enron.com  garza.beth@enron.com  palmer.bill@enron.com  cooper.bob@enron.com  edwards.brady@enron.com  green.brenda@enron.com  smith.carl@enron.com  smith.carl@enron.com  carey.dan@enron.com  sarti.dan@enron.com  leger.dana@enron.com  wessels.david@enron.com  pawlik.debbie@enron.com  bailey.debra@enron.com  dyc.dennis@enron.com  slover.eric@enron.com  nitschmann.frances@enron.com  herndon.gary@enron.com  geissler.ginger@enron.com  striedel.james@enron.com  holland.janet@enron.com  jeffrey.miller@enron.com  doyas.jenny@enron.com  burt.jerry@enron.com  barker.joe@enron.com  favalora.joe@enron.com  forney.john@enron.com  fitzmaurice.kathy@enron.com  minear.kelly@enron.com  koliba.k

In [None]:
# "OR" query for two phrase matches. Generally you get fancy query parsing with this:
# https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html
doc = {
    "query": {
        "query_string" : {
            "default_field" : "text",
            "query" : "(reach a settlement) OR (continue to pursue)" 
        }
    }
}
r=requests.get(host + 'enron/_search',
               data=json.dumps(doc), headers={'Content-Type':'application/json'})
r.raise_for_status()
print("Found %s messages matching the query" % r.json()['hits']['total']['value'])
df = elasticsearch_results_to_df(r)
print("Returned %s messages" % df.shape[0])
print_df_row(df.iloc[0])

Found 10000 messages matching the query, of 
Returned 10 messages
____________________
RE: Settlement Conversation Recap
At: 2002-05-02 18:26:52+00:00
From: michael.bridges@enron.com
To: koikosp@talgov.com
CC: chris.germany@enron.com
BCC: chris.germany@enron.com
Body:
Hi Pete,

Wanted to recap our conversation yesterday, update you on our timing and get you my contact information.

Regarding a possible settlement, it appears to me that this is a concept that you have considered and are willing to pursue.  We will have a valuation of the contract for you Monday.  It is my suggestion that you review the proposal, make sure that you agree with the outstanding transactions and complete your own valuation.  Once we have agreed on the universe of transactions, you and I will discuss and finalize a settlement amount that we are comfortable will be approved by the creditor committee and bankruptcy judge.

From this point, Enron will begin the process of filing a motion for settlement and sched

In [None]:
df.iloc[9]

date                                  2001-05-31 11:21:00+00:00
recipients    lisa.mellencamp@enron.com  mary.heinitz@enron....
sender                                      kate.cole@enron.com
subject                     FW: Sale of East Coast Power L.L.C.
text          Please note that an additional subsidiary was ...
Name: 21281, dtype: object

In [None]:
# Do a count of all documents in the database by month
doc = {
    "aggs" : {
        "aggregation_var_name" : {
            "date_histogram" : {
                "field" : "date",
                "interval" : "month"
            }
        }
    }
}
r=requests.get(host + 'enron/_search',
               data=json.dumps(doc), headers={'Content-Type':'application/json'})
r.raise_for_status()
def aggregation_to_df(response,var_name='aggregation_var_name'):
    r = response.json()
    r['aggregations'][var_name]
    df = pandas.DataFrame(r['aggregations'][var_name]['buckets'])
    df['date'] = df['key_as_string'].apply(parser.parse)
    df = df[(df['date'] >= '1999-01-1') & (df['date'] < '2002-07-01')]
    df = df[['date','doc_count']]
    return df
df = aggregation_to_df(r)
print(df)

                         date  doc_count
228 1999-01-01 00:00:00+00:00          8
229 1999-02-01 00:00:00+00:00          1
230 1999-03-01 00:00:00+00:00          7
231 1999-04-01 00:00:00+00:00          2
232 1999-05-01 00:00:00+00:00         35
233 1999-06-01 00:00:00+00:00         24
234 1999-07-01 00:00:00+00:00         45
235 1999-08-01 00:00:00+00:00         41
236 1999-09-01 00:00:00+00:00         74
237 1999-10-01 00:00:00+00:00         67
238 1999-11-01 00:00:00+00:00         64
239 1999-12-01 00:00:00+00:00        131
240 2000-01-01 00:00:00+00:00        200
241 2000-02-01 00:00:00+00:00        245
242 2000-03-01 00:00:00+00:00        261
243 2000-04-01 00:00:00+00:00        289
244 2000-05-01 00:00:00+00:00        328
245 2000-06-01 00:00:00+00:00        512
246 2000-07-01 00:00:00+00:00        449
247 2000-08-01 00:00:00+00:00        641
248 2000-09-01 00:00:00+00:00        711
249 2000-10-01 00:00:00+00:00        796
250 2000-11-01 00:00:00+00:00       1089
251 2000-12-01 0

In [None]:
# Do a count of all documents matching a query by month
doc = {
    "query": {
        "match" : {
            "text" : "important reporting"
        }
    },
    "aggs" : {
        "aggregation_var_name" : {
            "date_histogram" : {
                "field" : "date",
                "interval" : "month"
            }
        }
    }
}
r=requests.get(host + 'enron/_search',
               data=json.dumps(doc), headers={'Content-Type':'application/json'})
r.raise_for_status()
df = aggregation_to_df(r)
print(df)

                         date  doc_count
228 1999-01-01 00:00:00+00:00          2
229 1999-02-01 00:00:00+00:00          0
230 1999-03-01 00:00:00+00:00          0
231 1999-04-01 00:00:00+00:00          0
232 1999-05-01 00:00:00+00:00          4
233 1999-06-01 00:00:00+00:00          0
234 1999-07-01 00:00:00+00:00          3
235 1999-08-01 00:00:00+00:00          1
236 1999-09-01 00:00:00+00:00          2
237 1999-10-01 00:00:00+00:00          0
238 1999-11-01 00:00:00+00:00          5
239 1999-12-01 00:00:00+00:00          6
240 2000-01-01 00:00:00+00:00          6
241 2000-02-01 00:00:00+00:00         14
242 2000-03-01 00:00:00+00:00         16
243 2000-04-01 00:00:00+00:00          4
244 2000-05-01 00:00:00+00:00         14
245 2000-06-01 00:00:00+00:00         19
246 2000-07-01 00:00:00+00:00         21
247 2000-08-01 00:00:00+00:00         35
248 2000-09-01 00:00:00+00:00         31
249 2000-10-01 00:00:00+00:00         39
250 2000-11-01 00:00:00+00:00         52
251 2000-12-01 0

In [None]:
# Search an exact match in a specific feild
doc = {
    "query": {
        "match" : {
            "recipients" : "stephen.schwarzbach@enron.com"
        } 
    },
}
r=requests.get(host + 'enron/_search',
               data=json.dumps(doc), headers={'Content-Type':'application/json'})
r.raise_for_status()
print("Found %s messages matching the query, of " % r.json()['hits']['total']['value'])
df = elasticsearch_results_to_df(r)
print("Returned %s messages" % df.shape[0])
print_df_row(df.iloc[0])

Found 3 messages matching the query, of 
Returned 3 messages
____________________
RE: Data Request
At: 2001-03-23 22:13:53+00:00
From: thomas.meers@enron.com
To: stephen.schwarzbach@enron.com
CC: tracy.geaccone@enron.com
BCC: tracy.geaccone@enron.com
Body:

Steve,

The attached files contain the headcount and G&A costs information you requested based on the 2001 Plan.  My apologies for not getting this to you sooner.  Please note that this information is representative of the three Clean Fuels companies but is based on my interpretation of what you wanted included within a particular line item.  Feel free to call me with any questions or comments you may have regarding this data.

Thanks,
	Tom Meers   X54899



  
____________________
