Starter code for exploring the Enron dataset (emails + finances); loads up the dataset (pickled dict of dicts).

The dataset has the form:
```
enron_data["LASTNAME FIRSTNAME MIDDLEINITIAL"] = { features_dict }
```

`{features_dict}` is a dictionary of features associated with that person. You should explore features_dict as part of the mini-project, but here's an example to get you started:
```
enron_data["SKILLING JEFFREY K"]["bonus"] = 5600000
```

In [1]:
import pickle

enron_data = pickle.load(open('final_project_dataset.pkl', 'r'))

# Quiz: Size of the Enron dataset

In [4]:
print len(enron_data.keys())

146


# Features in the Enron Dataset

In [25]:
first = enron_data[enron_data.keys()[0]]
print first.keys()
print len(first.keys())

['salary', 'to_messages', 'deferral_payments', 'total_payments', 'exercised_stock_options', 'bonus', 'restricted_stock', 'shared_receipt_with_poi', 'restricted_stock_deferred', 'total_stock_value', 'expenses', 'loan_advances', 'from_messages', 'other', 'from_this_person_to_poi', 'poi', 'director_fees', 'deferred_income', 'long_term_incentive', 'email_address', 'from_poi_to_this_person']
21


# Finding POIs in the Enron Data

In [19]:
pois = [key for key,value in enron_data.items() if value['poi'] == 1]
print len(pois)

18


# How Many POIs Exist?

http://usatoday30.usatoday.com/money/industries/energy/2005-12-28-enron-participants_x.htm

1. (y) Lay, Kenneth
1. (y) Skilling, Jeffrey
1. (n) Howard, Kevin
1. (n) Krautz, Michael
1. (n) Yeager, Scott
1. (n) Hirko, Joseph
1. (n) Shelby, Rex
1. (n) Bermingham, David
1. (n) Darby, Giles
1. (n) Mulgrew, Gary
1. (n) Bayley, Daniel
1. (n) Brown, James
1. (n) Furst, Robert
1. (n) Fuhs, William
1. (n) Causey, Richard
1. (n) Calger, Christopher
1. (n) DeSpain, Timothy
1. (n) Hannon, Kevin
1. (n) Koenig, Mark
1. (y) Forney, John
1. (n) Rice, Kenneth
1. (n) Rieker, Paula
1. (n) Fastow, Lea
1. (n) Fastow, Andrew
1. (y) Delainey, David
1. (n) Glisan, Ben
1. (n) Richter, Jeffrey
1. (n) Lawyer, Larry
1. (n) Belden, Timothy
1. (n) Kopper, Michael
1. (n) Duncan, David
1. (n) Bowen, Raymond
1. (n) Colwell, Wesley
1. (n) Boyle, Dan
1. (n) Loehr, Christopher

# Query the Dataset 1

In [27]:
[k for k,v in enron_data.items() if k.startswith('P')]

['PIPER GREGORY F',
 'POWERS WILLIAM',
 'PIRO JIM',
 'PEREIRA PAULO V. FERRAZ',
 'PRENTICE JAMES',
 'PICKERING MARK R',
 'PAI LOU L']

In [28]:
enron_data['PRENTICE JAMES']['total_stock_value']

1095040

# Query the Dataset 2

In [29]:
[k for k,v in enron_data.items() if k.startswith('C')]

['CORDES WILLIAM R',
 'COLWELL WESLEY',
 'CHAN RONNIE',
 'CLINE KENNETH W',
 'CUMBERLAND MICHAEL S',
 'CALGER CHRISTOPHER F',
 'COX DAVID',
 'CHRISTODOULOU DIOMEDES',
 'CARTER REBECCA C',
 'CAUSEY RICHARD A']

In [30]:
enron_data['COLWELL WESLEY'].keys()

['salary',
 'to_messages',
 'deferral_payments',
 'total_payments',
 'exercised_stock_options',
 'bonus',
 'restricted_stock',
 'shared_receipt_with_poi',
 'restricted_stock_deferred',
 'total_stock_value',
 'expenses',
 'loan_advances',
 'from_messages',
 'other',
 'from_this_person_to_poi',
 'poi',
 'director_fees',
 'deferred_income',
 'long_term_incentive',
 'email_address',
 'from_poi_to_this_person']

In [31]:
enron_data['COLWELL WESLEY']['from_this_person_to_poi']

11

# Querying the Dataset 3

In [33]:
[k for k,v in enron_data.items() if k.startswith('SK')]

['SKILLING JEFFREY K']

In [34]:
enron_data['SKILLING JEFFREY K']['exercised_stock_options']

19250000

# Follow the Money

In [43]:
keys = [k for k,v in enron_data.items() if k.startswith('LAY') or k.startswith('FASTOW') or k.startswith('SKILLING')]

name_payments = []
for key in keys:
    values = enron_data[key]
    name_payments.append((key, values['total_payments']))
    
max(name_payments, key=lambda item:item[1])

('LAY KENNETH L', 103559793)

# Unfilled Features

In [46]:
for key, value in enron_data['LAY KENNETH L'].items():
    print key, value

salary 1072321
to_messages 4273
deferral_payments 202911
total_payments 103559793
exercised_stock_options 34348384
bonus 7000000
restricted_stock 14761694
shared_receipt_with_poi 2411
restricted_stock_deferred NaN
total_stock_value 49110078
expenses 99832
loan_advances 81525000
from_messages 36
other 10359729
from_this_person_to_poi 16
poi True
director_fees NaN
deferred_income -300000
long_term_incentive 3600000
email_address kenneth.lay@enron.com
from_poi_to_this_person 123


# Dealing with Unfilled Features

In [52]:
print 'Salary: ', len([k for k,v in enron_data.items() if v['salary'] != 'NaN'])
print 'Email Addresses: ', len([k for k,v in enron_data.items() if v['email_address'] != 'NaN'])

Salary:  95
Email Addresses:  111


# Missing POIs 1 (optional)

In [60]:
no_payments_count = len([k for k,v in enron_data.items() if v['total_payments'] == 'NaN'])
float(no_payments_count) / len(enron_data.keys()) * 100

14.383561643835616

# Missing POIs 2 (optional)

In [68]:
[k for k,v in enron_data.items() if v['poi'] == 1 and v['total_payments'] == 'NaN']
float(no_payments_count) / len(enron_data.keys()) * 100

0.0

# Missing POIs 4 (optional)

In [70]:
print len(enron_data.keys()) + 10

156


In [72]:
print len([k for k,v in enron_data.items() if v['total_payments'] == 'NaN']) + 10

31


# Missing POIs 5 (optional)

In [73]:
print len([k for k,v in enron_data.items() if v['poi'] == 1]) + 10

28


In [74]:
len([k for k,v in enron_data.items() if v['poi'] == 1 and v['total_payments'] == 'NaN']) + 10

10