In [1]:
import numpy as np
import matplotlib.pyplot as plt


# 1. Precision Recall


### Overview: 

In a binary decision problem, a classifier labels examples as either positive or negative. The decision made by the classifier can be represented in a structure known as a confusion matrix or contingency table. The confusion matrix has four categories: 
- True positives (TP) are examples correctly labeled as positives. 
- False positives (FP) refer to negative examples incorrectly labeled as positive. 
- True negatives (TN) correspond to negatives correctly labeled as negative.
- False negatives (FN) refer to positive examples incorrectly labeled as negative.

$$
\text{Precision} = \frac{\text{True Positives}}{\text{True Positives} + \text{False Positives}}
$$

$$
\text{Recall} = \frac{\text{True Positives}}{\text{True Positives} + \text{False Negatives}}
$$

### Intuitive example: 


### What do we need to implement for binary classification

- A $Y_T$ (GT) array of your true labels
- A $Y_P$ (pred) array of our probabilities
- $Th$ - a threshold for thresholding leading class (class-0) 
 
or 

- A $Y_T$ (GT) array of your true labels
- A $Y_P$ (pred) array of our predicted labels


### What does a high precision mean?

- It means our model is good at not falsely giving any sample a true label.

### What does a high recall mean?

- It means our model is good at **not missing out** on good samples.



In [3]:
##########  Implementation  ###################

# True labels (binary: 0 for negative class, 1 for positive class)
y_true = np.array([1, 0, 1, 1, 0, 1, 0, 0, 1, 0])

# Predicted labels (binary: 0 for negative class, 1 for positive class)
y_pred = np.array([1, 0, 1, 0, 1, 1, 1, 0, 1, 0])

def precision_recall_binary(y_true, y_pred):
    """Calculate precision and recall for binary classification."""
    tp = np.sum((y_pred == 1) & (y_true == 1))
    fp = np.sum((y_pred == 1) & (y_true == 0))
    fn = np.sum((y_pred == 0) & (y_true == 1))
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    
    # Recall = TP / (TP + FN)
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    
    return precision, recall

# Calculate precision and recall
precision, recall = precision_recall_binary(y_true, y_pred)

print(f"Precision: {precision:.3f}")
print(f"Recall: {recall:.3f}")


Precision: 0.667
Recall: 0.800


# 2. Multi class Classification (Precision-Recall)

### Multiclass Classification

For multiclass classification, where there are more than two classes, precision and recall can be calculated in two main ways:

#### Micro-Averaging:

- Treats all classes as a single combined class.
- Calculates precision and recall globally by counting the total true positives, false positives, and false negatives across all classes.

**Micro-averaged Precision**:

$$
\text{Micro Precision} = \frac{\sum \text{True Positives}}{\sum (\text{True Positives} + \text{False Positives})}
$$

**Micro-averaged Recall**:

$$
\text{Micro Recall} = \frac{\sum \text{True Positives}}{\sum (\text{True Positives} + \text{False Negatives})}
$$

#### Macro-Averaging:

- Calculates precision and recall for each class separately and then takes the average across all classes.

**Macro-averaged Precision**:

$$
\text{Macro Precision} = \frac{1}{N} \sum_{i=1}^{N} \text{Precision}_i
$$

**Macro-averaged Recall**:

$$
\text{Macro Recall} = \frac{1}{N} \sum_{i=1}^{N} \text{Recall}_i
$$

Where $N$ is the number of classes.

#### Weighted Averaging:

- Similar to macro-averaging, but weights the precision and recall for each class based on the number of samples in that class.


In [5]:


# True labels for 10 samples (multiclass problem with 3 classes: 0, 1, 2)
y_true = np.array([0, 1, 2, 1, 0, 2, 1, 0, 1, 2])

# Predicted labels for the same 10 samples
y_pred = np.array([0, 2, 2, 1, 0, 0, 1, 1, 1, 2])

# Number of classes
num_classes = 3

# Create confusion matrix
def confusion_matrix_multiclass(y_true, y_pred, num_classes):
    matrix = np.zeros((num_classes, num_classes), dtype=int)
    for t, p in zip(y_true, y_pred):
        matrix[t, p] += 1
    return matrix

conf_matrix = confusion_matrix_multiclass(y_true, y_pred, num_classes)

print("Confusion Matrix:\n", conf_matrix)

# Precision and recall calculation for each class
def precision_recall_per_class(conf_matrix):
    precision = np.zeros(num_classes)
    recall = np.zeros(num_classes)
    
    for i in range(num_classes):
        tp = conf_matrix[i, i]
        fp = np.sum(conf_matrix[:, i]) - tp
        fn = np.sum(conf_matrix[i, :]) - tp
        
        precision[i] = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall[i] = tp / (tp + fn) if (tp + fn) > 0 else 0
    
    return precision, recall

# Calculate per-class precision and recall
precision, recall = precision_recall_per_class(conf_matrix)
print("Per-class Precision:", precision)
print("Per-class Recall:", recall)

# Macro Averaging: Average precision and recall across all classes
macro_precision = np.mean(precision)
macro_recall = np.mean(recall)

print(f"Macro Precision: {macro_precision:.3f}")
print(f"Macro Recall: {macro_recall:.3f}")

# Micro Averaging: Total true positives, false positives, and false negatives across all classes
def micro_precision_recall(conf_matrix):
    """Calculate micro-averaged precision and recall."""
    tp = np.sum(np.diag(conf_matrix))  # sum of true positives for all classes
    
    # False positives for all classes (sum of columns minus true positives)
    fp = np.sum(conf_matrix, axis=0) - np.diag(conf_matrix)
    
    # False negatives for all classes (sum of rows minus true positives)
    fn = np.sum(conf_matrix, axis=1) - np.diag(conf_matrix)
    
    if (tp + np.sum(fp)) > 0:
        micro_precision = tp / (tp + np.sum(fp))
    else:
        micro_precision = 0
    
    if (tp + np.sum(fn)) > 0:
        micro_recall = tp / (tp + np.sum(fn))
    else:
        micro_recall = 0
    
    return micro_precision, micro_recall

# Calculate micro-averaged precision and recall
micro_precision, micro_recall = micro_precision_recall(conf_matrix)

print(f"Micro Precision: {micro_precision:.3f}")
print(f"Micro Recall: {micro_recall:.3f}")


Confusion Matrix:
 [[2 1 0]
 [0 3 1]
 [1 0 2]]
Per-class Precision: [0.66666667 0.75       0.66666667]
Per-class Recall: [0.66666667 0.75       0.66666667]
Macro Precision: 0.694
Macro Recall: 0.694
Micro Precision: 0.700
Micro Recall: 0.700
