# Terry Stop Analysis
### In the 1968 Supreme Court case "Terry v. Ohio", the court found that a police officer was not in vilation of the "unresonable search and seizure" clause of the Fourth Amendment after he stopped and frisked suspects only because their behavior was suspicious. Thus the phrase "Terry Stops" are in reference to stops made of suspicious drivers. 

This is an analysis of over 48,000 Terry Stops, with a goal of predicting if an arrest will be made based off time of day, whether a suspect was frisked, and racial & gender demographics of both the suspects and officers. 

The overall goal of the analysis is to have the highest possible recall, to minimize false positives, of accidentally classifying subject who were not arrested as arrested. 




In [1]:
#import the necessary libraries. 


import pandas as pd
import numpy as np
import seaborn as sns
import datetime
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import plot_confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score
from xgboost import XGBClassifier
from sklearn.tree import DecisionTreeClassifier

# Step 1: Cleaning the Data

In [2]:
# Import and look at the rows of our dataset. 

pd.set_option('display.max_columns', None)
df = pd.read_csv('Terry_Stops.csv')
df.head()


FileNotFoundError: [Errno 2] File b'Terry_Stops.csv' does not exist: b'Terry_Stops.csv'

In [None]:
# For ease of analysis, let's add underscores to each column name
renamed_columns = ['Subject_Age_Group','Subject_ID', 'GO_SC_Num', 'Terry_Stop_ID',
                   'Stop_Resolution', 'Weapon_Type', 'Officer_ID', 'Officer_YOB',
                  'Officer_Gender', 'Officer_Race', 'Subject_Perceived_Race',
                  'Subject_Perceived_Gender', 'Reported_Date', 'Reported_Time',
                   'Initial_Call_Type', 'Final_Call_Type', 'Call_Type', 'Officer_Squad',
                  'Arrest_Flag', 'Frisk_Flag','Precinct', 'Sector', 'Beat']
df.columns = renamed_columns
df.head()

In [None]:
# Looking at the initial data, 'Subject_ID, GO_SC_Num', 'Terry_Stop_ID', and 'Officer_ID' are purely 
# individual indentifiers so it's okay to drop those

In [None]:
df = df.drop(['Subject_ID','GO_SC_Num', 'Terry_Stop_ID', 'Officer_ID'], axis=1)

In [None]:
# Take a look to see if there are any null values in the data 

df.isna().sum()

In [None]:
# Interesting, because upon initial analysis, there are empty entries 
# in 'Subject Age Group', all of the 'Call Type' columns

df.info()

In [None]:
# investigate the columns that are mostly '-' to see if there is data 
# or if it's mostly null values

In [None]:
print('Subject Age Group:' , '\n', df['Subject_Age_Group'].value_counts(),'\n')
print('Initial Call Type:', '\n', df['Initial_Call_Type'].value_counts(),'\n')
print('Final Call Type:', '\n',  df['Final_Call_Type'].value_counts(), '\n')
print('Call Type:', "\n", df['Call_Type'].value_counts(),'\n')

In [None]:
# From this it seems like all "Call Type" is mostly unnecessary and can be dropped. Also 'Initial_Call_Type'
# and 'Final_Call_Type' are missing over 13k columns so it's okay to drop those

In [None]:
df = df.drop(['Call_Type','Initial_Call_Type', 'Final_Call_Type'], axis=1)

In [None]:
df['Subject_Age_Group'].value_counts()

In [None]:
df = df = df[df.Subject_Age_Group != '-']

In [None]:
df['Subject_Age_Group'].value_counts()

In [None]:
df['Officer_YOB'].value_counts()

In [None]:
def officer_yob_decade(x):
    if (x <= 1959):
        return '1900-1960'
    elif (x > 1959) and (x <= 1969):
        return '1960s'
    elif (x > 1969) and (x <= 1979):
        return '1970s'
    elif (x > 1979) and (x <= 1989):
        return '1980s'
    elif (x > 1989) and (x <= 1999):
        return '1990s'
    

In [None]:
df['Officer_Age_By_Decade'] = df['Officer_YOB'].apply(officer_yob_decade)
df['Officer_Age_By_Decade'].value_counts()

In [None]:
# Explore "Arrest Flag" and "Stop Resolution
#a s they both have arrest data that will serve
# as the target variable for this exploration. 

In [None]:
df['Arrest_Flag'].value_counts()

In [None]:
df['Stop_Resolution'].value_counts()

In [None]:
# It seems there is a discrepency in Arrest data between 'Stop Resolution'
# and "Arrest_Flag". I am making the executive decision to base this 
# exploration around "Stop Resolution" as it reads as more thorough
# in it's reporting of resoltion rather than a simple 'Yes'/'No' in 
# 'Arrest Flag'.

In [None]:
df['Stop_Resolution'] = df['Stop_Resolution'].apply(lambda x: 'Yes' if x == 'Arrest' else 'No')
df['Stop_Resolution'].value_counts()

In [None]:
df = df.drop('Arrest_Flag', axis=1)
df.info()

In [None]:
# Now explore factors like the races and genders of both the officers
# and the people pulled over

In [None]:
print('Officer Gender:' , '\n', df['Officer_Gender'].value_counts(),'\n')
print('Officer Race:', '\n', df['Officer_Race'].value_counts(),'\n')
print('Subject Perceived Gender:', '\n',  df['Subject_Perceived_Gender'].value_counts(), '\n')
print('Subject Perceived Race:', "\n", df['Subject_Perceived_Race'].value_counts(),'\n')

In [None]:
# There seem to be many different ways to say "unknown" in these columns
# Let's combine the redundant values together

In [None]:
df['Officer_Race'] = df['Officer_Race'].apply(lambda x: "Other" if x in ['Not Specified', 'Unknown'] else x)
df['Subject_Perceived_Gender'] = df['Subject_Perceived_Gender'].apply(lambda x: 'Unknown/GNC' if x in ['Unable to Determine', '-', 'Unknown', 'Gender Diverse (gender non-conforming and/or transgender)'] else x)
df['Subject_Perceived_Race'] = df['Subject_Perceived_Race'].apply(lambda x: 'Unknown' if x in ['-', 'Other'] else x)

In [None]:
print(df['Officer_Race'].value_counts(), '\n')
print(df['Subject_Perceived_Gender'].value_counts(), '\n')
print(df['Subject_Perceived_Race'].value_counts(), '\n')

In [None]:
# Also let's clean up the gender columns of both the officer and the 
# subject. Entries outside of the gender binary are miniscule compared
# to Male and Female. 

df.info()

In [None]:
df = df[df.Officer_Gender != 'N']
df = df[df.Subject_Perceived_Gender != 'Unknown/GNC']
df.info()

In [None]:
df['Weapon_Type'].value_counts()

In [None]:
df = df.drop('Weapon_Type', axis=1)
df.info()

In [None]:
# Drop Weapon type, there are far more null values than there are weapons

In [None]:
df['Reported_Time'].value_counts()

In [None]:
# There are 13K different times, let's create a new column that is just 
# the hours of the stops. 

In [None]:
df['Reported_Time'] = pd.to_datetime(df['Reported_Time'])
df['Reported_Hour'] = df['Reported_Time'].apply(lambda x: x.hour)


In [None]:
df['Reported_Hour'].value_counts()

In [None]:
def time_of_day(x):
    if (x > 4) and (x <= 11):
        return 'Morning'
    elif (x > 12) and (x <= 19):
        return 'Afternoon'
    else:
        return "Night"
        

In [None]:
df['Time_of_Day'] = df['Reported_Hour'].apply(time_of_day)

In [None]:
df['Time_of_Day'].value_counts()

In [None]:
# Now that those are divided, let's divide them into AM and PM and 
# create a new column. 0 is AM, 1 is PM

In [None]:
df['Reported_AM_or_PM'] = df['Reported_Hour'].apply(lambda x: 0 if x <12 else 1)
df['Reported_AM_or_PM'] = df['Reported_AM_or_PM'].astype(int)


In [None]:
df['Reported_AM_or_PM'].value_counts()

In [None]:
df = df.drop(['Reported_Time'], axis=1)

In [None]:
df['Frisk_Flag'].value_counts()

In [None]:
df = df[df.Frisk_Flag != '-']
df['Frisk_Flag'] = df['Frisk_Flag'].apply(lambda x: "0" if x =='N' else '1')


In [None]:
df['Frisk_Flag'].value_counts()

In [None]:
# Let's explore dates

In [None]:
df['Reported_Date'].value_counts()

In [None]:
df['Reported_Date'] = pd.DatetimeIndex(df['Reported_Date']).month

In [None]:
df['Reported_Date'].value_counts()

In [None]:
df['Precinct'].value_counts()

In [None]:
df = df[df.Precinct != '-']
df = df[df.Precinct != 'Unknown']
df = df[df.Precinct != 'OOJ']
df = df[df.Precinct != 'FK ERROR']
df['Precinct'] = df['Precinct'].apply(lambda x: 'SouthWest' if x in ['Southwest'] else x)
df['Precinct'].value_counts()


In [None]:
df['Beat'].value_counts()

In [None]:
df['Sector'].value_counts()

In [None]:
df['Officer_Squad'].value_counts()

In [None]:
# Dropping 'Officer Squad', 'Beat' and 'Sector' for reasons similar to Officer ID"

In [None]:
df = df.drop(['Beat', 'Sector', 'Officer_Squad'], axis=1)

In [None]:
df.columns

# Step 2: Visualize the Data

In [None]:
# Begin visualization of cleaned data, starting with a visualizaiton of 
# our target variable, Stop Resolution.

In [None]:
arrests = df['Stop_Resolution']=='Yes' 
non_arrests = df['Stop_Resolution']=='No'
y = df['Stop_Resolution']
num_of_arrests = df[arrests].shape[0]
num_of_nonarrests = df[non_arrests].shape[0]

print('Target Variable: Stop Resolution')
print('Total Arrests: ', num_of_arrests)
print('Total of Non-arrests: ', num_of_nonarrests)

sns.set_style('darkgrid');
plt.figure(figsize = (10,5));
sns.countplot(df['Stop_Resolution'], alpha = .80, palette= ['blue', 'red'])
plt.title('Arrests & Non-Arrests');

plt.ylabel('Number of people pulled over');
plt.show()

In [None]:
# Create bar charts to compare different columns against the arrrest data

In [None]:
subject_age_group = df.groupby(['Subject_Age_Group', 'Stop_Resolution']).Subject_Age_Group.count().unstack()
subject_age_group['% of Stops Ending In Arrest'] = (subject_age_group['Yes'] / (subject_age_group.sum(axis=1)))*100 
print('Subject Age Group\n')
print(subject_age_group)

viz_1 = subject_age_group.plot(kind = 'bar', stacked = True,
                           title = "People Pulled Over By Age: Arrested v Not Arrested",
                           color = ['blue', 'red', 'white'], alpha = .70, rot=30)


In [None]:
subject_perceived_race = df.groupby(['Subject_Perceived_Race', 'Stop_Resolution']).Subject_Perceived_Race.count().unstack()
subject_perceived_race['% of Stops Ending In Arrest'] = (subject_perceived_race['Yes'] / (subject_perceived_race.sum(axis=1)))*100 
print('Subject Perceived Race\n')
print(subject_perceived_race)

viz_2 = subject_perceived_race.plot(kind = 'bar', stacked = True,
                                   title = 'People Pulled Over by Perceived Race:\n Arrested v Not Arrested',
                                   color = ['blue', 'red','white'], alpha = .80, rot=90)


In [None]:
# It appears American Inidan/Native Alaskan have the highest percentage
# of arrests made after a Terry stop with nearly 37%. While important to note
# there were only 669 total stops of 36363 total in the data set. 
# The next highest was Asian with 34.2% , with Black, white, and Native Hawaiian
# at around 30% each.

In [None]:
subject_perceived_gender = df.groupby(['Subject_Perceived_Gender', 'Stop_Resolution']).Subject_Perceived_Gender.count().unstack()
subject_perceived_gender['% of Stops Ending In Arrest'] = (subject_perceived_gender['Yes'] / (subject_perceived_gender.sum(axis=1)))*100 


print('Subject Perceived Gender\n')
print(subject_perceived_gender)

viz_3 = subject_perceived_gender.plot(kind = 'bar', stacked = True,
                                   title = 'People Pulled Over by Perceived Gender:\n Arrested v Not Arrested',
                                   color = ['blue', 'red','white'], alpha = .80, rot = 30)


In [None]:
# Male subjects were arrest 31% of the arrests while female subjects were 
# arrested 28.65%. While the male subjects were stopped nearly 4 times the rate
# of female, their arrest percentage was surprisingly close

In [None]:
officer_gender = df.groupby(['Officer_Gender', 'Stop_Resolution']).Officer_Gender.count().unstack()
officer_gender['% of Stops Ending In Arrest'] = (officer_gender['Yes'] / (officer_gender.sum(axis=1)))*100 

print('Officer Gender\n')
print(officer_gender)

viz_4 = officer_gender.plot(kind = 'bar', stacked = True,
                                   title = 'Officers by Gender:\n Arrested v Not Arrested',
                                   color = ['blue', 'red','white'], alpha = .80, rot=0)


In [None]:
# Male cops arrested individuals 31% of those pulled over, while male officers
# arrested at a rate of 28%. 

In [None]:
officer_race = df.groupby(['Officer_Race', 'Stop_Resolution']).Subject_Perceived_Race.count().unstack()
officer_race['% of Stops Ending In Arrest'] = (officer_race['Yes'] / (officer_race.sum(axis=1)))*100 

print('Officer Race\n')
print(officer_race)

viz_5 = officer_race.plot(kind = 'bar', stacked = True,
                                   title = 'Officers by Race:\n Arrested v Not Arrested',
                                   color = ['blue', 'red','white'], alpha = .80, rot=90)


In [None]:
# While white officers make up nearly 3 times the cops of all other races
# combined, they were actually in the middle of the pack in terms of 
# arrests made. Officers of two + races, Hispanic/Latino, and Asian officers
# were arresting at around ~25%.

In [None]:
am_or_pm = df.groupby(['Reported_AM_or_PM', 'Stop_Resolution']).Reported_AM_or_PM.count().unstack()
am_or_pm['% of Stops Ending In Arrest'] = (am_or_pm['Yes'] / (am_or_pm.sum(axis=1)))*100 

print('Reported AM or PM\n')
print(am_or_pm)

viz_6 = am_or_pm.plot(kind = 'bar', stacked = True,
                                   title = 'Time of Day (AM or PM):\n Arrested v Not Arrested',
                                   color = ['blue', 'red','white'], alpha = .80, rot=0)


In [None]:
# There are more Terry stops in the PM hours than AM, however the percentages
# are less than 0.3% apart. 

In [None]:
hour_of_day = df.groupby(['Reported_Hour', 'Stop_Resolution']).Reported_Hour.count().unstack()
hour_of_day['% of Stops Ending In Arrest'] = (hour_of_day['Yes'] / (hour_of_day.sum(axis=1)))*100 

print('Reported Hour\n')
print(hour_of_day)

viz_7 = hour_of_day.plot(kind = 'bar', stacked = True,
                                   title = 'Hour of Day:\n Arrested v Not Arrested',
                                   color = ['blue', 'red','white'], alpha = .80, rot=0)


In [None]:
# Looking at the data, arrests ebb and flow depending on time of day, 
# with peaks at midnight to 3am and 4pm to 7pm. However the highest
# percentage of arrests made at 11am and 12pm, over 1 percentage point
#away from the next two closest hours at 9am and 8pm. 

# Step 3: Model the Data

In [None]:
# Now that we've cleaned and visualized the data, now let's fit it into
# model. First starting with converting the target variable to binary,
# OneHotEncoding, then normalizing the data with StandardScalar


In [None]:
df['Stop_Resolution'] = df['Stop_Resolution'].apply(lambda x: 0 if x=='No' else 1)

df['Stop_Resolution'].value_counts()

In [None]:
one_hot_df = pd.get_dummies(df)

y = one_hot_df['Stop_Resolution']
one_hot_df.drop('Stop_Resolution', axis=1, inplace=True)

X_train, X_test, y_train, y_test = train_test_split(one_hot_df, y, test_size=0.2, random_state=42)

In [None]:
scaler = StandardScaler()

scaled_data_train = scaler.fit_transform(X_train)
scaled_data_test = scaler.transform(X_test)

scaled_df_train = pd.DataFrame(scaled_data_train, columns=one_hot_df.columns)

In [None]:
scaled_df_train.head()

In [None]:
# Now that the data is OneHotEncoded and normalized, let's fit a model, 
# starting with a Logistic Regression model.

In [None]:
logreg = LogisticRegression(fit_intercept=False, C=1e12, solver='liblinear')
logreg.fit(scaled_data_train, y_train)

In [None]:
y_hat_train = logreg.predict(scaled_data_train)
y_hat_test = logreg.predict(scaled_data_test)

In [None]:
print('Testing Accuracy: ', accuracy_score(y_test, y_hat_test))

In [None]:
# Looking at the initial model, both the training and the test, both had an
# accuracy of 57%. We can definitely make this better using more sophisticated
# modeling. Next let's look at a confusion matrix to see where we can
# see how the model is performing



In [None]:
plot_confusion_matrix(logreg, scaled_data_test, y_test,
                     cmap=plt.cm.Blues)
plt.grid(False)
plt.show()

In [None]:
# The model is correctly predicting stops ending in no arrest 4896 times
# while inccorectly predicting stops that were arrests as noa arrest 2163
# times. 

# Next let's check the Precision, Recall, Accuracy, and F1 scores. 

In [None]:
print('Testing Accuracy: ', accuracy_score(y_test, y_hat_test))
print('Testing Precision: ', precision_score(y_test, y_hat_test))
print('Testing Recall: ', recall_score(y_test, y_hat_test))
print('Testing F1-Score: ', f1_score(y_test, y_hat_test))


In [None]:
# The recall and F1-score ar absolutely abysmal. The accuracy is 
# slightly better than a coinflip which isn't much to write home about.
# This time, we'll fit a K Nearest-Neighbors model to see if the data
# works better in that context. 

In [None]:
clf = KNeighborsClassifier()
clf.fit(scaled_data_train, y_train)
y_hat_train = clf.predict(scaled_data_train)
y_hat_test = clf.predict(scaled_data_test)


In [None]:
print('Testing Accuracy: ', accuracy_score(y_test, y_hat_test))
print('Testing Precision: ', precision_score(y_test, y_hat_test))
print('Testing Recall: ', recall_score(y_test, y_hat_test))
print('Testing F1-Score: ', f1_score(y_test, y_hat_test))

In [None]:
plot_confusion_matrix(clf, scaled_data_test, y_test,
                     cmap=plt.cm.Blues)
plt.grid(False)
plt.show()

In [None]:
# The accuracy and precision went down
# slightly in comparison to the Logistic Regression output.
# A confusion matrix shows that it has only predicted '0', not
# a great sign.
# Since KNN has a default "neighbors" of 5, let's use GridSearchCV to 
# see if we can find a the best number of neighbors and run that
# model one more time. 

# Source of following code: Eijaz Allibhai (https://towardsdatascience.com/building-a-k-nearest-neighbors-k-nn-model-with-scikit-learn-51209555453a)

In [None]:
knn2 = KNeighborsClassifier()
param_grid = {'n_neighbors': np.arange(1,25)}
knn_gscv = GridSearchCV(knn2, param_grid, cv=5)
knn_gscv.fit(scaled_data_train, y_train)

In [None]:
knn_gscv.best_params_

In [None]:
# Let's rerun the KNN model to see exactly how this 24 is the best neighbor

In [None]:
clf = KNeighborsClassifier(n_neighbors=24)
clf.fit(scaled_data_train, y_train)
y_hat_test = clf.predict(scaled_data_test)

In [None]:
print('Testing Accuracy: ', accuracy_score(y_test, y_hat_test))
print('Testing Precision: ', precision_score(y_test, y_hat_test))
print('Testing Recall: ', recall_score(y_test, y_hat_test))
print('Testing F1-Score: ', f1_score(y_test, y_hat_test))

In [None]:
# Accuracy and precision are up but recall and are not great. 
# Let's move on to XGBoost

In [None]:
xgbc = XGBClassifier()
xgbc.fit(scaled_data_train, y_train)
y_hat_test = xgbc.predict(scaled_data_test)

In [None]:
print('Testing Accuracy: ', accuracy_score(y_test, y_hat_test))
print('Testing Precision: ', precision_score(y_test, y_hat_test))
print('Testing Recall: ', recall_score(y_test, y_hat_test))
print('Testing F1-Score: ', f1_score(y_test, y_hat_test))

In [None]:
# Accuracy and Precision are up, but everything else is worse. 
# Let's try tuning XGBoost using GridSearchCV
# to see if we get better results. 

In [None]:
param_grid = {
    'learning rate': [0.1, 0.2, 0.3, 0.4, 0.5],
    'max_depth': [6],
    'min_child_weight': [1, 2, 3, 4, 5],
    'subsample': [0.5, 0.7, 0.9],
    'n_estimators': [100],
}

In [None]:
grid_clf = GridSearchCV(clf, param_grid, scoring='accuracy', cv=None, n_jobs=1)
grid_clf.fit(scaled_data_train, y_train)

best_parameters = grid_clf.best_params_

In [None]:
print(best_parameters)

In [None]:
xgbc = XGBClassifier(n_estimators=100, max_depth=6, learning_rate=0.1,
                    min_child_weight=2, subsample=0.9)
xgbc.fit(scaled_data_train, y_train)
y_hat_test = xgbc.predict(scaled_data_test)

In [None]:
print('Testing Accuracy: ', accuracy_score(y_test, y_hat_test))
print('Testing Precision: ', precision_score(y_test, y_hat_test))
print('Testing Recall: ', recall_score(y_test, y_hat_test))
print('Testing F1-Score: ', f1_score(y_test, y_hat_test))

In [None]:
# This is the highest our accuracy has been but barely. Was the 30
# minutes of loading this GridSearch worth it? 
# Let's move to a decision tree and see if we can get better than 
# 69% accuracy. 

In [None]:
SEED=1
dtc = DecisionTreeClassifier(criterion='entropy', random_state=SEED)
dtc.fit(scaled_data_train, y_train)

In [None]:
y_hat_test = dtc.predict(scaled_data_test)

print('Testing Accuracy: ', accuracy_score(y_test, y_hat_test))
print('Testing Precision: ', precision_score(y_test, y_hat_test))
print('Testing Recall: ', recall_score(y_test, y_hat_test))
print('Testing F1-Score: ', f1_score(y_test, y_hat_test))

In [None]:
SEED=1
dtc = DecisionTreeClassifier(criterion='gini', random_state=SEED)
dtc.fit(scaled_data_train, y_train)

In [None]:
y_hat_test = dtc.predict(scaled_data_test)

print('Testing Accuracy: ', accuracy_score(y_test, y_hat_test))
print('Testing Precision: ', precision_score(y_test, y_hat_test))
print('Testing Recall: ', recall_score(y_test, y_hat_test))
print('Testing F1-Score: ', f1_score(y_test, y_hat_test))

In [None]:
# The difference between 'gini' and 'entropy' were basically non existent.
# Let's try another GridSearch to see instead of manually changing parameters.
# Hopefully we can find the best decision tree.


In [None]:
dtc_param_grid = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 2, 3, 4, 5],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 3, 4, 5]
}

dtc_grid = GridSearchCV(dtc, dt_param_grid, cv=3, return_train_score=True)

dtc_grid.fit(scaled_data_train, y_train)


In [None]:
dtc_grid.best_params_

In [None]:
dtc = DecisionTreeClassifier(criterion='gini', max_depth=2,
                             min_samples_leaf=1, min_samples_split=2,
                             random_state=SEED)
dtc.fit(scaled_data_train, y_train)

In [None]:
y_hat_test = dtc.predict(scaled_data_test)

print('Testing Accuracy: ', accuracy_score(y_test, y_hat_test))
print('Testing Precision: ', precision_score(y_test, y_hat_test))
print('Testing Recall: ', recall_score(y_test, y_hat_test))
print('Testing F1-Score: ', f1_score(y_test, y_hat_test))

In [None]:
# It seems as though we were unable to get enough data for testing
# metrics outside of Accuracy, probably deue to the low samples.

In [None]:
# After running 7 models, the highest accuracy achieved was from an 
# XGBoost model with the best parameters found through GridSearchCV, 
# and even at that it was only able to be 69% accurate. 