# Precision and Recall

Let's get to know these two metrics better. 


$$\text{Precision} = \frac{\text{True Positives}}{\text{True Positives + False Positives}}$$

$$\text{Recall} = \frac{\text{True Positives}}{\text{True Positives + False Negatives}}$$


In [1]:
import numpy as np
import pandas as pd
import itertools
from itertools import product
from sklearn.metrics import precision_score, recall_score, f1_score
import matplotlib.pyplot as plt

positives = np.ones(2)
negatives = np.zeros(3)
data = np.concatenate([positives, negatives])

In [2]:
plt.style.use('notes-base.mplstyle')

In [3]:
metrics = pd.DataFrame()
for i in product([1,0], repeat=5):
    i = np.array(i)
    
    idx = len(metrics)
    
    tp = np.where((data==i) & (data==1), 1, 0).sum()
    tn = np.where((data==i) & (data==0), 1, 0).sum()
    fp = np.where((data>i), 1, 0).sum()
    fn = np.where((data<i), 1, 0).sum()
    
    preds = ''.join([str(j) for j in i])
    preds = preds[0:2] + ' ' + preds[2:]
    metrics.loc[idx, 'predictions'] = preds
    
    metrics.loc[idx, 'fp'] = fp.sum()
    metrics.loc[idx, 'fn'] = fn.sum()
    
    metrics.loc[idx, 'tp'] = tp.sum()
    metrics.loc[idx, 'tn'] = tn.sum()


    metrics.loc[idx, 'precision'] = precision_score(data, i, zero_division=np.nan)
    metrics.loc[idx, 'recall'] = recall_score(data, i, zero_division=np.nan)
    metrics.loc[idx, 'f1'] = f1_score(data, i, zero_division=np.nan)


In [4]:
goods = ['tp', 'tn', 'precision', 'recall', 'f1']
bads = ['fp', 'fn']
numerics = goods + bads
s = metrics.sort_values("f1", ascending=False).reset_index(drop=True).style\
        .set_caption("Possible Classifications")\
        .format("{:.2f}", subset=numerics)\
        .background_gradient(cmap='Greens', subset=goods, axis=None)\
        .background_gradient(cmap='Oranges', subset=bads,axis=None)\
        .highlight_null(color='lightgray', subset=numerics)

html = s.to_html()
s

Unnamed: 0,predictions,fp,fn,tp,tn,precision,recall,f1
0,11 000,0.0,0.0,2.0,3.0,1.0,1.0,1.0
1,11 100,0.0,1.0,2.0,2.0,0.67,1.0,0.8
2,11 010,0.0,1.0,2.0,2.0,0.67,1.0,0.8
3,11 001,0.0,1.0,2.0,2.0,0.67,1.0,0.8
4,01 000,1.0,0.0,1.0,3.0,1.0,0.5,0.67
5,11 110,0.0,2.0,2.0,1.0,0.5,1.0,0.67
6,11 101,0.0,2.0,2.0,1.0,0.5,1.0,0.67
7,11 011,0.0,2.0,2.0,1.0,0.5,1.0,0.67
8,10 000,1.0,0.0,1.0,3.0,1.0,0.5,0.67
9,11 111,0.0,3.0,2.0,0.0,0.4,1.0,0.57


In [None]:
fig, ax = plt.subplots(figsize=(3.5, 3.0))  # Make figure narrower
ax.set_aspect(1)

x = np.linspace(0, 1, 1000)
y = np.linspace(0, 1, 1000)
for f1_val in metrics['f1'].unique():
    # get f1 surface
    # f1 = 2*x*y/x+y
    # ...
    # y = - f1*x/(f1-2x)
    
    small = metrics[metrics.f1==f1_val]
    ax.scatter(small.precision, small.recall, s=15*len(small), clip_on=False)
    
    if f1_val > 0:
        y = -(f1_val*x)/(f1_val-2*x)
        x2 = x[x>0.5*f1_val]
        y2 = y[x>0.5*f1_val]
        ax.plot(x2,y2)

# point for precision = recall = 0
ax.scatter([0], [0], s=15, clip_on=False)
        
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)

ax.set_ylabel("Recall")
ax.set_xlabel("Precision")
ax.set_title("Precision, Recall, and F1", pad=12)

plt.tight_layout()

# Save both versions
plt.savefig("../book/assets/scatter_precision_recall_f1.svg", transparent=True)  # For book
plt.savefig("../tex/images/scatter_precision_recall_f1.pdf", transparent=True)   # For LaTeX

In [None]:
# Apply styling with better formatting for Jupyter Book
styled_df = metrics.sort_values("f1", ascending=False).reset_index(drop=True).style\
    .set_caption("Possible Classifications")\
    .format("{:.2f}", subset=numerics)\
    .background_gradient(cmap='Greens', subset=goods, axis=None)\
    .background_gradient(cmap='Oranges', subset=bads, axis=None)\
    .highlight_null(color='lightgray', subset=numerics)\
    .set_table_styles([
        # Table styling
        {'selector': '',
         'props': [('border-collapse', 'separate'),
                   ('border-spacing', '2px'),
                   ('margin', '25px auto'),
                   ('font-family', 'Inter, -apple-system, BlinkMacSystemFont, sans-serif'),
                   ('font-size', '14px')]},
        # Header styling
        {'selector': 'th',
         'props': [('background-color', '#f0f0f0'),
                   ('color', '#000000'),
                   ('padding', '10px 12px'),
                   ('border', '1px solid #ccc'),
                   ('text-align', 'center'),
                   ('font-weight', '600')]},
        # Cell styling
        {'selector': 'td',
         'props': [('padding', '8px 10px'),
                   ('border', '1px solid #ccc'),
                   ('text-align', 'center')]},
        # Caption styling
        {'selector': 'caption',
         'props': [('font-size', '1.5rem'),
                   ('font-weight', '600'),
                   ('margin-bottom', '1rem'),
                   ('font-family', 'Inter, -apple-system, BlinkMacSystemFont, sans-serif')]}
    ])\
    .set_properties(**{'text-align': 'center'})\
    .hide(axis='index')  # Optional: hide the index column for cleaner look

# get html
hh = styled_df.to_html()

In [None]:
#styled_df
#print(hh)