In [1]:
#  For Python 2.7
from __future__ import print_function
from sklearn.cross_validation import train_test_split

In [None]:
#  For Python 3
import sklearn.model_selection
from sklearn.model_selection import train_test_split

In [2]:
import pickle
import sys
sys.path.append("../tools/")
from feature_format import featureFormat, targetFeatureSplit
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import itertools

#from show_confusion_matrix import show_confusion_matrix
from tester import dump_classifier_and_data
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import confusion_matrix

In [3]:
data_dict = pickle.load(open("../final_project/final_project_dataset.pkl", "rb"))

df = pd.DataFrame.from_records(list(data_dict.values()))
employees = pd.Series(list(data_dict.keys()))
# set the index of df to be the employees series:
df.set_index(employees, inplace=True)

# DATA EXPLORATION

We assume that all the email-related data were collected using valid email addresses, therefore if some people have their email addresses missing, that implies the corresponding email features will be missing as well (NaN). The contrary is not true as we are about to see:

In [4]:
print("Number of people with missing email data: ",df['from_messages'].value_counts().max())
print("Number of people with missing email address: ",df['email_address'].value_counts().max())

Number of people with missing email data:  60
Number of people with missing email address:  35


Who are these people?

In [5]:
extrange_cases = df.index[(df['email_address'] != 'NaN') & (df['from_messages']=='NaN')].tolist()
print("People with at least one email address but without email-related features: ")
print()
for i, name in enumerate(extrange_cases):
    print(i + 1, name)

People with at least one email address but without email-related features: 

1 ELLIOTT STEVEN
2 MORDAUNT KRISTINA M
3 WESTFAHL RICHARD K
4 WODRASKA JOHN
5 ECHOLS JOHN B
6 KOPPER MICHAEL J
7 BERBERIAN DAVID
8 DETMERING TIMOTHY J
9 GOLD JOSEPH
10 KISHKILL JOSEPH G
11 LINDHOLM TOD A
12 BUTTS ROBERT H
13 HERMANN ROBERT J
14 SCRIMSHAW MATTHEW
15 FASTOW ANDREW S
16 OVERDYKE JR JERE C
17 STABLER FRANK
18 PRENTICE JAMES
19 WHITE JR THOMAS E
20 CHRISTODOULOU DIOMEDES
21 DIMICHELE RICHARD G
22 YEAGER F SCOTT
23 HIRKO JOSEPH
24 PAI LOU L
25 BAY FRANKLIN R


Those are 25 extrange cases, where actual Enron employees with a valid email address,  do not have email-related features.

In [6]:
emailless_people = df.index[df['email_address'] == 'NaN'].tolist()
print("People without an email address: ")
print()
for i, name in enumerate(emailless_people):
    print(i + 1, name)

People without an email address: 

1 BAXTER JOHN C
2 LOWRY CHARLES P
3 WALTERS GARETH W
4 CHAN RONNIE
5 BELFER ROBERT
6 URQUHART JOHN A
7 WHALEY DAVID A
8 MENDELSOHN JOHN
9 CLINE KENNETH W
10 WAKEHAM JOHN
11 DUNCAN JOHN H
12 LEMAISTRE CHARLES
13 SULLIVAN-SHAKLOVITZ COLLEEN
14 WROBEL BRUCE
15 MEYER JEROME J
16 CUMBERLAND MICHAEL S
17 GAHN ROBERT S
18 GATHMANN WILLIAM D
19 GILLIS JOHN
20 BAZELIDES PHILIP J
21 LOCKHART EUGENE E
22 PEREIRA PAULO V. FERRAZ
23 BLAKE JR. NORMAN P
24 GRAY RODNEY
25 THE TRAVEL AGENCY IN THE PARK
26 NOLES JAMES L
27 TOTAL
28 JAEDICKE ROBERT
29 WINOKUR JR. HERBERT S
30 BADUM JAMES P
31 REYNOLDS LAWRENCE
32 YEAP SOON
33 FUGH JOHN L
34 SAVAGE FRANK
35 GRAMM WENDY L


Here we found two entities, that are not real people: THE TRAVEL AGENCY IN THE PARK and TOTAL. These entities do not contribute in any meaninful way to the purpose of this study, so let say that from this moment on we mark them for deletion. 

The email address is the only field that could not be converted to numeric. We chose to remove it from the dataframe, because it is of no use for the purpose of identifying poi from the data given.
Also, in the case of the poi column only zeroes (0) and ones (1) are allowed: 1 = poi, 0 = non-poi

In [7]:
df=df.apply(lambda x: pd.to_numeric(x, errors='coerse'))
del df['email_address']
df['poi']=df['poi'].astype(int)

In [8]:
cols = ['poi', 'salary', 'total_payments', 'bonus', 'deferral_payments', 'deferred_income', 'other', 
        'director_fees', 'expenses', 'loan_advances', 'long_term_incentive', 'exercised_stock_options', 
        'restricted_stock', 'restricted_stock_deferred', 'total_stock_value', 'from_messages', 
        'from_poi_to_this_person', 'from_this_person_to_poi', 'shared_receipt_with_poi', 'to_messages']
df=df[cols]
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 146 entries, METTS MARK to GLISAN JR BEN F
Data columns (total 20 columns):
poi                          146 non-null int64
salary                       95 non-null float64
total_payments               125 non-null float64
bonus                        82 non-null float64
deferral_payments            39 non-null float64
deferred_income              49 non-null float64
other                        93 non-null float64
director_fees                17 non-null float64
expenses                     95 non-null float64
loan_advances                4 non-null float64
long_term_incentive          66 non-null float64
exercised_stock_options      102 non-null float64
restricted_stock             110 non-null float64
restricted_stock_deferred    18 non-null float64
total_stock_value            126 non-null float64
from_messages                86 non-null float64
from_poi_to_this_person      86 non-null float64
from_this_person_to_poi      86 non-null flo

Counting the number of poi and non-poi in the dataset.

In [9]:
df['poi'].value_counts()

0    128
1     18
Name: poi, dtype: int64

From the finantial data we learned that NaN means zero, therefore we proceed to make the corresponding changes in our dataframe.

In [10]:
df.iloc[:, 1:15] = df.iloc[:, 1:15].fillna(0)

After performing such an operation, the number of NaN values was dramatically reduced from 1323 up to 300, which ultimately is the amount of missing email-related entries.

In [11]:
print("Number of email-related missing data: ", df.isnull().sum().sum())

Number of email-related missing data:  300


We have a few amount of data available. In the case of poi, there are just 18 people in our dataset, four of them (over 20%) did not have email-related data.

In [12]:
print("Number of poi without email data: ", df[(df['poi']==1) & (~df.to_messages.notnull())].shape[0])

Number of poi without email data:  4


# DATA COMPLETION

We wondered what to do with those missing entries. As we were going to use the enron email data anyway to create new features, we decided to launch a search for those missing email addresses. That proved to be a time consuming task. After searching with a complex pattern of regular expresions and using specific search criteria based on the observed email addresses patterns, we were able to find up to 424 different email addresses linked to the people under study. Our search methods were far from optimal as they included final manual adjudications in many cases. That is why we have reasons to believe that there could be more email addresses than the ones we were able to find (but we decided to leave that as a subject of a more detailed study to be carried out in the future). In any case, our search allowed us to find some of the missing email addresses and with that information, we built the email-based existing features for the employees including those "extrange 25 cases" where no features were listed even tough there were email addresses available. The code is too large to be inserted here, but we provide a text file with the procedure followed alonside with the script files we used.

In [13]:
new_data_dict = pickle.load(open("../final_project/new_data_dict.pickle", "rb"))
df_new = pd.DataFrame.from_records(list(new_data_dict.values()))
employees = pd.Series(list(new_data_dict.keys()))
# set the index of df_new to be the employees series:
df_new.set_index(employees, inplace=True)

In [14]:
missing_data = df_new[((df_new.index.isin(extrange_cases)) | (
    df_new.index.isin(emailless_people))) &(df_new['from_messages']!= 'NaN') ][cols[15:]]

We were able to find email-related data for 44 out of our targeted 58 individuals.

In [15]:
print("Number of people with email data recovered", missing_data.shape[0])
missing_data

Number of people with email data recovered 44


Unnamed: 0,from_messages,from_poi_to_this_person,from_this_person_to_poi,shared_receipt_with_poi,to_messages
ELLIOTT STEVEN,0,8,0,87,194
MORDAUNT KRISTINA M,6,7,3,121,403
WESTFAHL RICHARD K,2,36,0,21,64
CHAN RONNIE,0,2,0,12,10
WODRASKA JOHN,0,0,0,13,96
URQUHART JOHN A,0,0,0,0,1
WHALEY DAVID A,0,4,0,0,5
ECHOLS JOHN B,8,8,5,78,90
CLINE KENNETH W,0,0,0,3,3
KOPPER MICHAEL J,0,11,0,183,192


Observations:

Except for Colleen Sullivan-Shaklovitz, the amount of messages sent by the other 43 people is suspiciously low (or inexistent) for the timeframe considered. One particular case is worth noticing: Andrew S. Fastow.

It is hard to believe that the chief financial officer of a corporation, (who received at least 1183 emails) just sent 9 emails in more than a year, including the time when the financial scandal shattered the company.
A similar disproportion between emails sent and received is observed for the other 3 poi in this portion of the dataset.

I believe that the process of emails removal from the dataset due to privacy protection issues that occurred at some point after the first release of the Enron email data, might have something to do not only with Fastow's sent emails low count, but also with the issues in the amount of email data belonging to the other 42 people as well. 

It is reasonable to asume that this particular situation could be at least in part responsible for the abcence of email-related features for those 25 "extrange cases" we found earlier.

When we applied those found values to the existing dataframe, the number of NaN entries was dramatically reduced.

In [16]:
df = df.combine_first(missing_data)
print("Number of email-related missing data: ", df.isnull().sum().sum())

Number of email-related missing data:  80


More importantly, we were able to find the missing email data for all poi in the dataset.

In [17]:
print("Number of poi without email data: ", df[(df['poi']==1) & (~df.to_messages.notnull())].shape[0])

Number of poi without email data:  0


A good way to procede with the remaining NaN values is to impute them with the median for non-poi people.

In [18]:
email_cols = cols[15:]
df[email_cols]=df[email_cols].fillna(df.groupby("poi")[email_cols].transform("median"))
print("Amount of remaining NaN entries in the dataframe:", df.isnull().sum().sum())

Amount of remaining NaN entries in the dataframe: 0


# DATA CLEANSING

As we have a relatively low number of data points, we have to proceed extra-carefully at removing them.
For the moment, we are going to do it just to the items we previously have marked for deletion and we will analyze any further need in a case by case manner as we proceed with our ML algorithms.

In [19]:
df=df.drop(['TOTAL'])
df=df.drop(['THE TRAVEL AGENCY IN THE PARK'])

# CREATING NEW FEATURES

We are going to create two different kind of new features. Some are going to be the rate of existing features (i.e. from_this_person_to_poi / from_messages), and others are going to be the result of working with the entire email dataset. 
In the second case, we created an intermediate feature, called pubIndex, that is not going to be used explicitly but it was involved in computing most of the new features; pubIndex accounts for the number of people involved in a given email (To and Cc fields) correcting for when people sent emails to themselves. The lowest possible value for this feature is zero (if someone sent an email just to him or herself with no Cc), it is equal to one if there is just a single person in the To field and none in the Cc field, and so on. It is worth noticing that there is in principle no upper limit for this feature.  

Our New Features:

- to_poi_rate: ratio of from_this_person_to_poi / from_messages

- from_poi_rate: ratio of from_poi_to_this_person / to_messages

- from_poi_cc_this_person: This is the complement of the existent from_poi_to_this_person, it takes into account the Cc field of the emails sent from poi accounts, it was corrected for any email that poi sent (Cc) to themselves.

- median_from_poi_cc_this_person_pubIndex: We grouped all the emails sent from poi with Cc to a given person and calculated the median of the pubIndex feature.

- median_from_poi_to_this_person_pubIndex: The same as above but for the field To.

- median_cc_this_person_pubIndex: We grouped all the emails where a person was in the Cc field and calculated the median of the pubIndex feature.

- median_from_this_person_pubIndex: We grouped all the emails sent by a given person and took the median of the pubIndex feature.

The rest of the new features could be easily understood

- median_from_this_person_cc_poi_pubIndex

- median_from_this_person_to_poi_pubIndex

- median_to_this_person_pubIndex



In [20]:
df['to_poi_rate'] = df['from_this_person_to_poi']/df['from_messages']
df['from_poi_rate'] = df['from_poi_to_this_person']/df['to_messages']
df[['to_poi_rate', 'from_poi_rate']]=df[['to_poi_rate', 'from_poi_rate']].fillna(
    df.groupby("poi")[['to_poi_rate', 'from_poi_rate']].transform("median")) # 19 NaN values imputed with the median
new_feat = ['from_poi_cc_this_person', 'median_from_poi_cc_this_person_pubIndex', 
            'median_from_poi_to_this_person_pubIndex', 'median_cc_this_person_pubIndex', 
            'median_from_this_person_pubIndex','median_from_this_person_cc_poi_pubIndex', 
           'median_from_this_person_to_poi_pubIndex', 'median_to_this_person_pubIndex']
nf_df = df_new[new_feat]
nf_df=nf_df.apply(lambda x: pd.to_numeric(x, errors='coerse'))
df = pd.concat([df, nf_df], axis=1)
# 127 NaN values imputed with the median
df[new_feat]=df[new_feat].fillna(df.groupby("poi")[new_feat].transform("median"))

In [28]:
full_cols = cols[1:] + ['to_poi_rate', 'from_poi_rate'] + new_feat


In [29]:
# Rename columns back to their original names when as a result of applying a scaler the column names become numbers
rename_cols={}
for i in range(29):
    rename_cols[i] = full_cols[i]

In [48]:
# Taken from internet for testing purposes
import tester
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif

scaler = StandardScaler()

df_norm = df[["poi"]+full_cols]
df_norm = scaler.fit_transform(df_norm.iloc[:,1:])

clf = GaussianNB()

kbest_feat = 8
features_list2 = ['poi']+list(range(kbest_feat))

selector = SelectKBest(f_classif, k=kbest_feat)
select_k_best_classifier = selector.fit_transform(df_norm, df.poi)

mask = selector.get_support() #list of booleans
kselect_features = [] # The list of your K best features
for bool, feature in zip(mask, full_cols):
    if bool:
        kselect_features.append(feature)

my_dataset = pd.DataFrame(select_k_best_classifier, index = df.index, columns = kselect_features)
my_dataset.insert(0, "poi", df.poi)

my_dataset = my_dataset.to_dict(orient = 'index')  

dump_classifier_and_data(clf, my_dataset, ["poi"] + kselect_features )
tester.main()

GaussianNB()
	Accuracy: 0.84467	Precision: 0.41791	Recall: 0.42000	F1: 0.41895	F2: 0.41958
	Total predictions: 15000	True positives:  840	False positives: 1170	False negatives: 1160	True negatives: 11830



In [47]:
my_dataset

{'ALLEN PHILLIP K': {'bonus': 2.8473426924782155,
  'deferred_income': -4.7811825199403435,
  'exercised_stock_options': -0.072457224518588756,
  'long_term_incentive': -0.046952675297334427,
  'median_from_this_person_to_poi_pubIndex': -0.2796690397332342,
  'poi': 0.0,
  'salary': 0.08407637944803549,
  'to_poi_rate': -0.69477885807657058,
  'total_stock_value': -0.19136543741091505},
 'BADUM JAMES P': {'bonus': -0.55009850558791185,
  'deferred_income': 0.32071903300808124,
  'exercised_stock_options': -0.38042445938875769,
  'long_term_incentive': -0.4920584003709369,
  'median_from_this_person_to_poi_pubIndex': -0.36169010230265525,
  'poi': 0.0,
  'salary': -0.94443418591633621,
  'to_poi_rate': -0.81374859861168569,
  'total_stock_value': -0.42999148522908853},
 'BANNANTINE JAMES M': {'bonus': -0.55009850558791185,
  'deferred_income': 0.31226734826819547,
  'exercised_stock_options': 0.41230882113670481,
  'long_term_incentive': -0.4920584003709369,
  'median_from_this_person_t

In [23]:
mydict = df.to_dict(orient = 'index')
clf = GaussianNB()
tester.dump_classifier_and_data(clf, mydict, cols)
tester.main();

GaussianNB()
	Accuracy: 0.74580	Precision: 0.23440	Recall: 0.40000	F1: 0.29558	F2: 0.35048
	Total predictions: 15000	True positives:  800	False positives: 2613	False negatives: 1200	True negatives: 10387



In [27]:
scaler = StandardScaler()
x_scaled = scaler.fit_transform(df.iloc[:,1:])
df3 = pd.DataFrame(x_scaled, index=df.index)
df3.insert(0, "poi", df.poi)
xdict = df3.to_dict(orient = 'index')
xcols = ["poi"] + list(range(19))
clf = GaussianNB()
tester.dump_classifier_and_data(clf, xdict, xcols)
tester.main();

GaussianNB()
	Accuracy: 0.99320	Precision: 1.00000	Recall: 0.94900	F1: 0.97383	F2: 0.95878
	Total predictions: 15000	True positives: 1898	False positives:    0	False negatives:  102	True negatives: 13000



In [24]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import RobustScaler
from sklearn.svm import SVC
scaler = RobustScaler()
x_scaled = scaler.fit_transform(df.iloc[:,1:])
df3 = pd.DataFrame(x_scaled, index=df.index)
df3.insert(0, "poi", df.poi)
xdict = df3.to_dict(orient = 'index')
xcols = ["poi"] + list(range(19))
clf = DecisionTreeClassifier()
tester.dump_classifier_and_data(clf, xdict, xcols)
tester.main();

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best')
	Accuracy: 1.00000	Precision: 1.00000	Recall: 1.00000	F1: 1.00000	F2: 1.00000
	Total predictions: 15000	True positives: 2000	False positives:    0	False negatives:    0	True negatives: 13000



In [25]:
df3.columns

Index([u'poi',      0,      1,      2,      3,      4,      5,      6,      7,
            8,      9,     10,     11,     12,     13,     14,     15,     16,
           17,     18,     19,     20,     21,     22,     23,     24,     25,
           26,     27,     28],
      dtype='object')

In [30]:
ren_cols

{0: 'salary',
 1: 'total_payments',
 2: 'bonus',
 3: 'deferral_payments',
 4: 'deferred_income',
 5: 'other',
 6: 'director_fees',
 7: 'expenses',
 8: 'loan_advances',
 9: 'long_term_incentive',
 10: 'exercised_stock_options',
 11: 'restricted_stock',
 12: 'restricted_stock_deferred',
 13: 'total_stock_value',
 14: 'from_messages',
 15: 'from_poi_to_this_person',
 16: 'from_this_person_to_poi',
 17: 'shared_receipt_with_poi',
 18: 'to_messages',
 19: 'to_poi_rate',
 20: 'from_poi_rate',
 21: 'from_poi_cc_this_person',
 22: 'median_from_poi_cc_this_person_pubIndex',
 23: 'median_from_poi_to_this_person_pubIndex',
 24: 'median_cc_this_person_pubIndex',
 25: 'median_from_this_person_pubIndex',
 26: 'median_from_this_person_cc_poi_pubIndex',
 27: 'median_from_this_person_to_poi_pubIndex',
 28: 'median_to_this_person_pubIndex'}

In [33]:
from sklearn.svm import SVC
clf = SVC(kernel='linear')
tester.dump_classifier_and_data(clf, xdict, xcols)
tester.main();

KeyboardInterrupt: 

In [30]:
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier()
tester.dump_classifier_and_data(clf, xdict, xcols)
tester.main();

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best')
	Accuracy: 1.00000	Precision: 1.00000	Recall: 1.00000	F1: 1.00000	F2: 1.00000
	Total predictions: 15000	True positives: 2000	False positives:    0	False negatives:    0	True negatives: 13000



In [None]:
from sklearn.ensemble import AdaBoostClassifier
clf = AdaBoostClassifier()
tester.dump_classifier_and_data(clf, xdict, xcols)
tester.main();

In [121]:
from sklearn.feature_selection import RFE

X = df.values[:,1:]
Y = df.values[:,0]

# create a base classifier used to evaluate a subset of attributes
clf = DecisionTreeClassifier()
# create the RFE model and select nfeat attributes
nfeat = 11
rfe = RFE(clf, nfeat)
rfe = rfe.fit(X, Y)
# summarize the selection of the attributes
#print(rfe.support_)
#print(rfe.ranking_)

In [122]:
best_feat = [full_cols[x] for x in np.where(rfe.ranking_==1)[0]]
bestcols = ["poi"] + best_feat
df4 = df[bestcols]
xdict = df4.to_dict(orient = 'index')
clf = DecisionTreeClassifier()
tester.dump_classifier_and_data(clf, xdict, bestcols)
tester.main();

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best')
	Accuracy: 0.84140	Precision: 0.40885	Recall: 0.42500	F1: 0.41677	F2: 0.42167
	Total predictions: 15000	True positives:  850	False positives: 1229	False negatives: 1150	True negatives: 11771



In [94]:
# Nothing happens if the number of features is reduced (scaled case)
X = df3.values[:,1:]
Y = df3.values[:,0]
nfeat = 1
# create a base classifier used to evaluate a subset of attributes
clf = DecisionTreeClassifier()
# create the RFE model and select 3 attributes
rfe = RFE(clf, nfeat)
rfe = rfe.fit(X, Y)
best_feat = [x for x in np.where(rfe.ranking_==1)[0]]
best_feat_names = [full_cols[x] for x in np.where(rfe.ranking_==1)[0]]
best_feat_names

['restricted_stock']

In [95]:
bestcols = ["poi"] + best_feat
xcols = ["poi"] + list(range(nfeat))
df4 = df3[bestcols]
xdict = df4.to_dict(orient = 'index')
clf = DecisionTreeClassifier()
tester.dump_classifier_and_data(clf, xdict, bestcols)
tester.main();

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best')
	Accuracy: 1.00000	Precision: 1.00000	Recall: 1.00000	F1: 1.00000	F2: 1.00000
	Total predictions: 15000	True positives: 2000	False positives:    0	False negatives:    0	True negatives: 13000



In [85]:
bestcols

['poi', 12, 19, 22, 23, 24, 25, 26, 27, 28, 29]

In [99]:
df

Unnamed: 0,bonus,deferral_payments,deferred_income,director_fees,exercised_stock_options,expenses,from_messages,from_poi_to_this_person,from_this_person_to_poi,loan_advances,...,to_poi_rate,from_poi_rate,from_poi_cc_this_person,median_from_poi_cc_this_person_pubIndex,median_from_poi_to_this_person_pubIndex,median_cc_this_person_pubIndex,median_from_this_person_pubIndex,median_from_this_person_cc_poi_pubIndex,median_from_this_person_to_poi_pubIndex,median_to_this_person_pubIndex
ALLEN PHILLIP K,4175000.0,2869717.0,-3081055.0,0.0,1729541.0,13868.0,2195.0,47.0,65.0,0.0,...,0.029613,0.016196,12.0,10.0,33.0,19.0,0.0,2.0,1.0,36.0
BADUM JAMES P,0.0,178980.0,0.0,0.0,257817.0,3486.0,2.0,0.0,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,44.0
BANNANTINE JAMES M,0.0,0.0,-5104.0,0.0,4046157.0,56301.0,29.0,39.0,0.0,0.0,...,0.000000,0.068905,0.0,0.0,13.0,7.0,0.0,32.0,0.0,74.0
BAXTER JOHN C,1200000.0,1295738.0,-1386055.0,0.0,6680544.0,11200.0,19.0,13.5,2.0,0.0,...,0.105263,0.025568,0.0,0.0,13.0,8.0,0.0,0.0,1.0,45.0
BAY FRANKLIN R,400000.0,260455.0,-201641.0,0.0,0.0,129142.0,1.0,10.0,0.0,0.0,...,0.000000,0.080645,0.0,0.0,47.0,4.0,0.0,0.0,0.0,29.0
BAZELIDES PHILIP J,0.0,684694.0,0.0,0.0,1599641.0,0.0,0.0,0.0,0.0,0.0,...,0.105263,0.000000,0.0,0.0,0.0,117.0,0.0,0.0,0.0,87.0
BECK SALLY W,700000.0,0.0,0.0,0.0,0.0,37172.0,4343.0,144.0,386.0,0.0,...,0.088879,0.019686,40.0,4.0,11.0,6.0,0.0,5.0,3.0,7.0
BELDEN TIMOTHY N,5249999.0,2144013.0,-2334434.0,0.0,953136.0,17355.0,484.0,228.0,108.0,0.0,...,0.223140,0.028532,42.0,6.0,7.0,15.0,0.0,6.0,9.0,45.0
BELFER ROBERT,0.0,-102500.0,0.0,3285.0,3285.0,0.0,19.0,13.5,2.0,0.0,...,0.105263,0.025568,0.0,0.0,13.0,8.0,0.0,0.0,1.0,45.0
BERBERIAN DAVID,0.0,0.0,0.0,0.0,1624396.0,11892.0,1.0,5.0,0.0,0.0,...,0.000000,0.031447,1.0,2.0,71.0,6.0,0.0,0.0,0.0,66.0


2.6984587404083666e-17