In [1]:
import numpy as np
import pandas as pd

# next command ensures that plots appear inside the notebook
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns  # also improves the look of plots
sns.set()  # set Seaborn defaults
plt.rcParams['figure.figsize'] = [10, 5]  # default hor./vert. size of plots, in inches
plt.rcParams['lines.markeredgewidth'] = 1  # to fix issue with seaborn box plots; needed after import seaborn

# reveal a hint only while holding the mouse down
from IPython.display import HTML
HTML("<style>.h,.c{display:none}.t{color:#296eaa}.t:active+.h{display:block;}</style>")

# hide FutureWarnings, which may show for Seaborn calls in most recent Anaconda
import warnings
warnings.filterwarnings("ignore", category=FutureWarning) 

In [2]:
df_enron = pd.read_csv('enron-v1.csv')

In [3]:
df_enron

Unnamed: 0,date,fromId,fromEmail,fromJobtitle,toId,toEmail,toJobtitle,messageType,sentiment
0,2000-08-13,96,matthew.lenhart@enron.com,Employee,77,eric.bass@enron.com,Trader,CC,0.013699
1,2000-08-13,96,matthew.lenhart@enron.com,Employee,112,phillip.m.love@enron.com,Unknown,TO,0.013699
2,2001-11-22,64,danny.mccarty@enron.com,Vice President,121,rod.hayslett@enron.com,Vice President,TO,0.000000
3,2000-10-29,139,susan.scott@enron.com,Unknown,104,monique.sanchez@enron.com,Unknown,TO,0.000000
4,2000-10-11,48,andy.zipper@enron.com,Vice President,2,mark.e.taylor@enron.com,Employee,TO,0.027397
...,...,...,...,...,...,...,...,...,...
31036,2001-09-01,42,jeff.dasovich@enron.com,Employee,36,james.d.steffes@enron.com,Vice President,TO,-0.052632
31037,2001-03-07,125,sara.shackleton@enron.com,Unknown,7,kim.ward@enron.com,Unknown,TO,0.000000
31038,2000-01-03,73,drew.fossum@enron.com,Vice President,139,susan.scott@enron.com,Unknown,CC,0.054795
31039,2000-11-18,57,chris.germany@enron.com,Employee,32,judy.townsend@enron.com,Employee,TO,0.027397


In [4]:
df_enron_dated = df_enron.sort_values(by='date')
df_enron_dated

Unnamed: 0,date,fromId,fromEmail,fromJobtitle,toId,toEmail,toJobtitle,messageType,sentiment
16734,1998-11-12,2,mark.e.taylor@enron.com,Employee,141,tana.jones@enron.com,Unknown,TO,0.013699
17383,1998-11-18,2,mark.e.taylor@enron.com,Employee,97,michelle.cash@enron.com,Employee,CC,0.000000
26725,1998-11-18,2,mark.e.taylor@enron.com,Employee,97,michelle.cash@enron.com,Employee,TO,-0.052632
23347,1998-11-23,2,mark.e.taylor@enron.com,Employee,1,marie.heard@enron.com,Unknown,TO,0.041096
23337,1998-11-23,2,mark.e.taylor@enron.com,Employee,141,tana.jones@enron.com,Unknown,CC,0.041096
...,...,...,...,...,...,...,...,...,...
22290,2002-06-18,57,chris.germany@enron.com,Employee,57,chris.germany@enron.com,Employee,TO,0.000000
19231,2002-06-20,133,stephanie.panus@enron.com,Employee,57,chris.germany@enron.com,Employee,TO,0.013699
19232,2002-06-20,133,stephanie.panus@enron.com,Employee,140,susan.bailey@enron.com,Unknown,TO,0.013699
19233,2002-06-20,133,stephanie.panus@enron.com,Employee,125,sara.shackleton@enron.com,Unknown,TO,0.013699


In [5]:
Person_ID_1 = 68
person_send = df_enron['fromId'] == Person_ID_1
person_received = df_enron['toId'] == Person_ID_1
df_1 = df_enron[person_send]
df_2 = df_1[['fromEmail']]
df_3 = df_2.describe()
ID_mail = df_3['fromEmail']['top']
df_describe_person = df_enron[person_send][['fromJobtitle']].describe()
job_title = df_describe_person['fromJobtitle']['top']
mails_send = df_enron[person_send]['sentiment'].count()
mean_sentiment_send = df_enron[person_send]['sentiment'].mean()
min_sentiment_send = df_enron[person_send]['sentiment'].min()
max_sentiment_send = df_enron[person_send]['sentiment'].max()
mails_received = df_enron[person_received]['sentiment'].count()
mean_sentiment_received = df_enron[person_received]['sentiment'].mean()
min_sentiment_received = df_enron[person_received]['sentiment'].min()
max_sentiment_received = df_enron[person_received]['sentiment'].max()

Person_ID_1, ID_mail, job_title, mails_send, mean_sentiment_send, min_sentiment_send, max_sentiment_send, mails_received, mean_sentiment_received, min_sentiment_received, max_sentiment_received


(68,
 'david.w.delainey@enron.com',
 'CEO',
 320,
 0.01407714491708724,
 -0.1578947368421053,
 0.2465753424657534,
 297,
 0.003976316881868425,
 -0.2631578947368421,
 0.3424657534246575)

In [6]:
df_person_send_1 = df_enron[person_send].groupby('toId').describe()
df_person_send_2 = df_person_send_1['fromId']
df_person_send = df_person_send_2[['count']]
df_person_send #All ID nmbrs Person_ID send mails to, and the number of mails he send to that ID

Unnamed: 0_level_0,count
toId,Unnamed: 1_level_1
5,14.0
11,6.0
14,9.0
15,40.0
25,6.0
27,2.0
30,3.0
33,1.0
35,10.0
36,22.0


In [7]:
most_send = df_person_send['count'].max()
ID_most_send_1 = df_person_send[df_person_send['count'] == most_send].index
ID_most_send = ID_most_send_1[0]
ID_most_send

15

In [8]:
df_person_received_1 = df_enron[person_received].groupby('fromId').describe()
df_person_received_2 = df_person_received_1['toId']
df_person_received = df_person_received_2[['count']]
df_person_received #All ID nmbrs Person_ID received mails from, and the number of mails he received from that ID

Unnamed: 0_level_0,count
fromId,Unnamed: 1_level_1
5,5.0
9,1.0
11,2.0
14,7.0
15,1.0
16,20.0
26,24.0
36,40.0
42,50.0
44,5.0


In [9]:
most_received = df_person_received['count'].max()
ID_most_received_1 = df_person_received[df_person_received['count'] == most_received].index
ID_most_received = ID_most_received_1[0]
ID_most_received

42

In [10]:
df_person_ID = pd.DataFrame({'ID': [Person_ID_1], 'Mail adress': [ID_mail], 'Send': [mails_send], 'Mean sentiment send': [mean_sentiment_send], 'Min sentiment send': [min_sentiment_send], 'Max sentiment send': [max_sentiment_send], 'Received': [mails_received], 'Mean sentiment received': [mean_sentiment_received], 'Min sentiment received': [min_sentiment_received], 'Max sentiment received': [max_sentiment_received], 'Most mails send to ID': [ID_most_send], 'Most mails received from ID': [ID_most_received]})
df_person_ID

Unnamed: 0,ID,Mail adress,Send,Mean sentiment send,Min sentiment send,Max sentiment send,Received,Mean sentiment received,Min sentiment received,Max sentiment received,Most mails send to ID,Most mails received from ID
0,68,david.w.delainey@enron.com,320,0.014077,-0.157895,0.246575,297,0.003976,-0.263158,0.342466,15,42


In [11]:
Person_ID_2 = 9
ID_1_send = df_enron['fromId'] == Person_ID_1
ID_1_received = df_enron['toId'] == Person_ID_1
ID_2_send = df_enron['fromId'] == Person_ID_2
ID_2_received = df_enron['toId'] == Person_ID_2

df_correspondance_send = df_enron[ID_1_send & ID_2_received]
df_correspondance_send

Unnamed: 0,date,fromId,fromEmail,fromJobtitle,toId,toEmail,toJobtitle,messageType,sentiment


In [12]:
df_correspondance_received = df_enron[ID_2_send & ID_1_received]
df_correspondance_received

Unnamed: 0,date,fromId,fromEmail,fromJobtitle,toId,toEmail,toJobtitle,messageType,sentiment
763,2000-09-26,9,kay.mann@enron.com,Employee,68,david.w.delainey@enron.com,CEO,CC,0.0


In [13]:
mean_sentiment_1_to_2 = df_correspondance_send['sentiment'].mean()
mean_sentiment_2_to_1 = df_correspondance_received['sentiment'].mean()

df_correspondance = pd.DataFrame({'ID_1': [Person_ID_1], 'ID_2': [Person_ID_2], 'Mean sentiment ID_1 to ID_2': [mean_sentiment_1_to_2], 'Mean sentiment ID_2 to ID_1': [mean_sentiment_2_to_1]})
df_correspondance.fillna('x')

Unnamed: 0,ID_1,ID_2,Mean sentiment ID_1 to ID_2,Mean sentiment ID_2 to ID_1
0,68,9,x,0.0
