In [22]:
import pandas as pd
from sklearn.metrics import confusion_matrix


In [49]:
# Given the following confusion matrix, evaluate (by hand) the model's performance.
#|               | pred dog   | pred cat   |
#|:------------  |-----------:|-----------:|
#| actual dog    |         46 |         7  |
#| actual cat    |         13 |         34 |

# In the context of this problem, what is a false positive?
# A false positive is a prediction that is incorrect.
# In this case, a false positive is a prediction of a dog when the actual label is a cat.

# In the context of this problem, what is a false negative?
# A false negative is a prediction that is incorrect.
# In this case, a false negative is a prediction of a cat when the actual label is a dog.

# How would you describe this model?
# Would be best utilized as recall.

TP = 46
FP = 7
FN = 13
TN = 34

accuracy = (TP + TN) / (TP + FP + FN + TN)
precision = TP / (TP + FP)
recall = TP / (TP + FN)

# print the results
print("Accuracy:", round(accuracy*100, 2),"%")
print("Precision:", round(precision*100, 2),"%")
print("Recall:", round(recall*100, 2),"%")


Accuracy: 80.0 %
Precision: 86.79 %
Recall: 77.97 %


You are working as a datascientist working for Codeup Cody Creator (C3 for short), a rubber-duck manufacturing plant.

Unfortunately, some of the rubber ducks that are produced will have defects. Your team has built several models that try to predict those defects, and the data from their predictions can be found here.

Use the predictions dataset and pandas to help answer the following questions:

An internal team wants to investigate the cause of the manufacturing defects. 

They tell you that they want to identify as many of the ducks that have a defect as possible. 

Which evaluation metric would be appropriate here? 

Which model would be the best fit for this use case?


Recently several stories in the local news have come out highlighting customers who received a rubber duck with a defect, and portraying C3 in a bad light. 

The PR team has decided to launch a program that gives customers with a defective duck a vacation to Hawaii. 

They need you to predict which ducks will have defects, but tell you the really don't want to accidentally give out a vacation package when the duck really doesn't have a defect. 

Which evaluation metric would be appropriate here? Which model would be the best fit for this use case?

In [50]:
# aquire and observe the data
df = pd.read_csv('c3.csv')
print(df.head())
print('------------------------------------------')
df.info()
print('------------------------------------------')
df.actual.value_counts()

      actual     model1  model2     model3
0  No Defect  No Defect  Defect  No Defect
1  No Defect  No Defect  Defect     Defect
2  No Defect  No Defect  Defect  No Defect
3  No Defect     Defect  Defect     Defect
4  No Defect  No Defect  Defect  No Defect
------------------------------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   actual  200 non-null    object
 1   model1  200 non-null    object
 2   model2  200 non-null    object
 3   model3  200 non-null    object
dtypes: object(4)
memory usage: 6.4+ KB
------------------------------------------


No Defect    184
Defect        16
Name: actual, dtype: int64

In [37]:
# An internal team wants to investigate the cause of the manufacturing defects. 
# They tell you that they want to identify as many of the ducks that have a defect as possible. 

In [51]:
# create a baseline_prediction of 'Defect'
df['baseline_prediction'] = 'Defect'
df


Unnamed: 0,actual,model1,model2,model3,baseline_prediction
0,No Defect,No Defect,Defect,No Defect,Defect
1,No Defect,No Defect,Defect,Defect,Defect
2,No Defect,No Defect,Defect,No Defect,Defect
3,No Defect,Defect,Defect,Defect,Defect
4,No Defect,No Defect,Defect,No Defect,Defect
...,...,...,...,...,...
195,No Defect,No Defect,Defect,Defect,Defect
196,Defect,Defect,No Defect,No Defect,Defect
197,No Defect,No Defect,No Defect,No Defect,Defect
198,No Defect,No Defect,Defect,Defect,Defect


In [52]:
# Find the accuracy of all the models.
model1_acc = (df.actual == df.model1).mean()
model2_acc = (df.actual == df.model2).mean()
model3_acc = (df.actual == df.model3).mean()
baseline_acc = (df.actual == df.baseline_prediction).mean()

# print the results
print(f'Model 1 Accuracy: {model1_acc:.2%}')
print(f'Model 2 Accuracy: {model2_acc:.2%}')
print(f'Model 3 Accuracy: {model3_acc:.2%}')
print(f'Baseline Accuracy: {baseline_acc:.2%}')

Model 1 Accuracy: 95.00%
Model 2 Accuracy: 56.00%
Model 3 Accuracy: 55.50%
Baseline Accuracy: 8.00%


In [53]:
# Create a subset where we assign 'Defect' as the 'positive' because that is what is being asked from us.
df_defect = df[df.actual == 'Defect']
df_defect.head()

Unnamed: 0,actual,model1,model2,model3,baseline_prediction
13,Defect,No Defect,Defect,Defect,Defect
30,Defect,Defect,No Defect,Defect,Defect
65,Defect,Defect,Defect,Defect,Defect
70,Defect,Defect,Defect,Defect,Defect
74,Defect,No Defect,No Defect,Defect,Defect


In [48]:
# Find the recall of all the models.
model1_recall = (df_defect.model1 == df_defect.actual).mean()
model2_recall = (df_defect.model2 == df_defect.actual).mean()
model3_recall = (df_defect.model3 == df_defect.actual).mean()
baseline_recall = (df_defect.baseline_prediction == df_defect.actual).mean()

#positive 

# print the results
print(f'Model 1 Recall: {model1_recall:.2%}')
print(f'Model 2 Recall: {model2_recall:.2%}')
print(f'Model 3 Recall: {model3_recall:.2%}')
print(f'Baseline Recall: {baseline_recall:.2%}')

Model 1 Recall: 98.91%
Model 2 Recall: 55.98%
Model 3 Recall: 53.26%
Baseline Recall: 0.00%


In [61]:
# create a precision subset data
model1_subset = df[df.model1 == 'Defect']
model2_subset = df[df.model2 == 'Defect']
model3_subset = df[df.model3 == 'Defect']
# Create a precision of all the models.
model1_precision = (model1_subset.model1 == model1_subset.actual).mean()
model2_precision = (model2_subset.model2 == model2_subset.actual).mean()
model3_precision = (model3_subset.model3 == model3_subset.actual).mean()


0.8

In [65]:
model1_subset = df[df.baseline_prediction == 'Defect']
model2_subset = df[df.baseline_prediction == 'Defect']
model3_subset = df[df.baseline_prediction == 'Defect']

baseline_percision1 = (model1_subset.baseline_prediction == model1_subset.actual).mean()
baseline_percision2 = (model2_subset.baseline_prediction == model2_subset.actual).mean()
baseline_percision3 = (model3_subset.baseline_prediction == model3_subset.actual).mean()

# print the results
print(f'Model 1 Precision: {model1_precision:.2%}')
print(f'Model 2 Precision: {model2_precision:.2%}')
print(f'Model 3 Precision: {model3_precision:.2%}')
print(f'Baseline Precision: {baseline_percision1:.2%}')



Model 1 Precision: 80.00%
Model 2 Precision: 10.00%
Model 3 Precision: 13.13%
Baseline Precision: 8.00%
