In [1]:
from IPython.display import display, Markdown, Math

# Function to display metric formulas
def display_metric(name, description, formula):
    display(Markdown(f"**{name}:** {description}"))
    display(Math(formula))

# Regression Metrics
display(Markdown("## Regression Metrics"))

# Mean Absolute Error (MAE)
display_metric("Mean Absolute Error (MAE)",
               "Measures the average magnitude of the errors in a set of predictions, without considering their direction.",
               r'MAE = \frac{1}{n} \sum_{i=1}^{n} |y_i - \hat{y}_i|')

# Mean Squared Error (MSE)
display_metric("Mean Squared Error (MSE)",
               "Measures the average of the squares of the errors. It is more sensitive to outliers than MAE.",
               r'MSE = \frac{1}{n} \sum_{i=1}^{n} (y_i - \hat{y}_i)^2')

# Root Mean Squared Error (RMSE)
display_metric("Root Mean Squared Error (RMSE)",
               "The square root of the mean squared error. It represents the standard deviation of the residuals.",
               r'RMSE = \sqrt{MSE}')

# Max Error
display_metric("Max Error",
               "Measures the maximum residual error.",
               r'\text{Max Error} = \max(|y_i - \hat{y}_i|)')

# Coverage Error
display_metric("Coverage Error",
               "Measures the average number of labels that need to be included in the prediction so that all true labels are predicted.",
               r'\text{Coverage Error} = \frac{1}{n} \sum_{i=1}^{n} \min(k : y_i \in \text{Top-k}(\hat{y}_i))')

# Mean Absolute Percentage Error (MAPE)
display_metric("Mean Absolute Percentage Error (MAPE)",
               "Measures the average of the absolute percentage errors.",
               r'MAPE = \frac{1}{n} \sum_{i=1}^{n} \left| \frac{y_i - \hat{y}_i}{y_i} \right| \times 100')

# Median Absolute Error
display_metric("Median Absolute Error",
               "Measures the median of the absolute errors.",
               r'\text{Median Absolute Error} = \text{median}(|y_i - \hat{y}_i|)')

# Mean Squared Logarithmic Error (MSLE)
display_metric("Mean Squared Logarithmic Error (MSLE)",
               "Measures the mean of the squared logarithmic errors.",
               r'MSLE = \frac{1}{n} \sum_{i=1}^{n} (\log(1 + y_i) - \log(1 + \hat{y}_i))^2')

# Root Mean Squared Logarithmic Error (RMSLE)
display_metric("Root Mean Squared Logarithmic Error (RMSLE)",
               "The square root of the mean squared logarithmic error.",
               r'RMSLE = \sqrt{MSLE}')

# Classification Metrics
display(Markdown("## Classification Metrics"))

# Accuracy Score
display_metric("Accuracy Score",
               "Measures the ratio of correctly predicted instances to the total instances.",
               r'Accuracy = \frac{1}{n_{samples}} \sum_{i=0}^{n_{samples}-1} 1(\hat{y}_i = y_i)')

# Precision Score
display_metric("Precision Score",
               "Measures the ratio of correctly predicted positive observations to the total predicted positives.",
               r'Precision = \frac{TP}{TP + FP}')

# Recall Score
display_metric("Recall Score",
               "Measures the ratio of correctly predicted positive observations to all observations in the actual class.",
               r'Recall = \frac{TP}{TP + FN}')

# F1 Score
display_metric("F1 Score",
               "The weighted average of Precision and Recall.",
               r'F1 = 2 \times \frac{Precision \times Recall}{Precision + Recall}')

# ROC AUC Score
display_metric("ROC AUC Score",
               "Measures the area under the ROC curve.",
               r'\text{ROC AUC} = \int_{0}^{1} \text{ROC}(t) \, dt')

# R2 Score
display_metric("R2 Score",
               "Measures the proportion of the variance in the dependent variable that is predictable from the independent variables.",
               r'R^2 = 1 - \frac{\sum_{i=1}^{n} (y_i - \hat{y}_i)^2}{\sum_{i=1}^{n} (y_i - \bar{y})^2}')

# D2 Absolute Error Score
display_metric("D2 Absolute Error Score",
               "Measures the degree of deviation from the absolute error model.",
               r'D^2 = 1 - \frac{\sum_{i=1}^{n} |y_i - \hat{y}_i|}{\sum_{i=1}^{n} |y_i - \bar{y}|}')

# NDCG Score
display_metric("NDCG Score",
               "Measures the quality of a ranking.",
               r'NDCG = \frac{DCG}{IDCG}')

# Rand Score
display_metric("Rand Score",
               "Measures the similarity between two data clusterings.",
               r'Rand = \frac{TP + TN}{TP + FP + FN + TN}')

# DCG Score
display_metric("DCG Score",
               "Measures the ranking quality.",
               r'DCG = \sum_{i=1}^{n} \frac{2^{rel_i} - 1}{\log_2(i + 1)}')

# Fbeta Score
display_metric("Fbeta Score",
               "The weighted average of Precision and Recall with a weighting factor beta.",
               r'F_\beta = (1 + \beta^2) \times \frac{Precision \times Recall}{\beta^2 \times Precision + Recall}')

# Adjusted Rand Score
display_metric("Adjusted Rand Score",
               "Measures the similarity between two data clusterings with adjustment for chance.",
               r'Adjusted \, Rand = \frac{RI - Expected \, RI}{Max \, RI - Expected \, RI}')

# Silhouette Score
display_metric("Silhouette Score",
               "Measures how similar an object is to its own cluster compared to other clusters.",
               r'Silhouette = \frac{b - a}{\max(a, b)}')

# Completeness Score
display_metric("Completeness Score",
               "Measures if all the data points that are members of a given class are elements of the same cluster.",
               r'Completeness = 1 - \frac{H(C|K)}{H(C)}')

# Homogeneity Score
display_metric("Homogeneity Score",
               "Measures if each cluster contains only members of a single class.",
               r'Homogeneity = 1 - \frac{H(K|C)}{H(K)}')

# Jaccard Score
display_metric("Jaccard Score",
               "Measures similarity between sample sets.",
               r'Jaccard = \frac{|A \cap B|}{|A \cup B|}')

# Consensus Score
display_metric("Consensus Score",
               "Measures the consensus of clustering results.",
               r'Consensus = \frac{2}{n(n-1)} \sum_{i < j} \frac{a_{ij} \cdot b_{ij}}{\sqrt{a_{ij} \cdot a_{ji} \cdot b_{ij} \cdot b_{ji}}}')

# V-measure Score
display_metric("V-measure Score",
               "Measures the harmonic mean between Homogeneity and Completeness.",
               r'V = 2 \times \frac{Homogeneity \times Completeness}{Homogeneity + Completeness}')

# Brier Score Loss
display_metric("Brier Score Loss",
               "Measures the mean squared difference between predicted probability and the actual outcome.",
               r'Brier = \frac{1}{n} \sum_{i=1}^{n} (\hat{p}_i - y_i)^2')

# D2 Tweedie Score
display_metric("D2 Tweedie Score",
               "Measures the proportion of Tweedie deviance explained.",
               r'D^2 = 1 - \frac{Deviance(y, \hat{y})}{Deviance(y, \bar{y})}')

# Cohen Kappa Score
display_metric("Cohen Kappa Score",
               "Measures the agreement between two raters.",
               r'\kappa = \frac{p_o - p_e}{1 - p_e}')

# D2 Pinball Score
display_metric("D2 Pinball Score",
               "Measures the proportion of pinball loss explained.",
               r'D^2 = 1 - \frac{PinballLoss(y, \hat{y})}{PinballLoss(y, \bar{y})}')

# Mutual Info Score
display_metric("Mutual Info Score",
               "Measures the mutual information between two labels.",
               r'I(X; Y) = \sum_{y \in Y} \sum_{x \in X} p(x,y) \log \frac{p(x,y)}{p(x)p(y)}')

# Adjusted Mutual Info Score
display_metric("Adjusted Mutual Info Score",
               "Measures the mutual information between two labels with adjustment for chance.",
               r'AMI = \frac{MI - E[MI]}{\max(H(U), H(V)) - E[MI]}')

# Average Precision Score
display_metric("Average Precision Score",
               "Measures the area under the precision-recall curve.",
               r'AP = \sum_n (R_n - R_{n-1}) P_n')

# Label Ranking Average Precision Score
display_metric("Label Ranking Average Precision Score",
               "Measures the average precision score for label ranking.",
               r'LRAP = \frac{1}{n} \sum_{i=1}^{n} \frac{\sum_{j=1}^{k} P(y_{ij})}{\max_{y_{ij}} P(y_{ij})}')

# Balanced Accuracy Score
display_metric("Balanced Accuracy Score",
               "Measures the balanced accuracy.",
               r'Balanced \, Accuracy = \frac{Sensitivity + Specificity}{2}')

# Top K Accuracy Score
display_metric("Top K Accuracy Score",
               "Measures the accuracy considering the top k predictions.",
               r'Top \, K \, Accuracy = \frac{1}{n} \sum_{i=1}^{n} 1(y_i \in \text{Top-k}(\hat{y}_i))')

# Calinski Harabasz Score
display_metric("Calinski Harabasz Score",
               "Measures the ratio of the sum of between-cluster dispersion and within-cluster dispersion.",
               r'CH = \frac{Tr(B_k)}{Tr(W_k)} \times \frac{n - k}{k - 1}')


## Regression Metrics

**Mean Absolute Error (MAE):** Measures the average magnitude of the errors in a set of predictions, without considering their direction.

<IPython.core.display.Math object>

**Mean Squared Error (MSE):** Measures the average of the squares of the errors. It is more sensitive to outliers than MAE.

<IPython.core.display.Math object>

**Root Mean Squared Error (RMSE):** The square root of the mean squared error. It represents the standard deviation of the residuals.

<IPython.core.display.Math object>

**Max Error:** Measures the maximum residual error.

<IPython.core.display.Math object>

**Coverage Error:** Measures the average number of labels that need to be included in the prediction so that all true labels are predicted.

<IPython.core.display.Math object>

**Mean Absolute Percentage Error (MAPE):** Measures the average of the absolute percentage errors.

<IPython.core.display.Math object>

**Median Absolute Error:** Measures the median of the absolute errors.

<IPython.core.display.Math object>

**Mean Squared Logarithmic Error (MSLE):** Measures the mean of the squared logarithmic errors.

<IPython.core.display.Math object>

**Root Mean Squared Logarithmic Error (RMSLE):** The square root of the mean squared logarithmic error.

<IPython.core.display.Math object>

## Classification Metrics

**Accuracy Score:** Measures the ratio of correctly predicted instances to the total instances.

<IPython.core.display.Math object>

**Precision Score:** Measures the ratio of correctly predicted positive observations to the total predicted positives.

<IPython.core.display.Math object>

**Recall Score:** Measures the ratio of correctly predicted positive observations to all observations in the actual class.

<IPython.core.display.Math object>

**F1 Score:** The weighted average of Precision and Recall.

<IPython.core.display.Math object>

**ROC AUC Score:** Measures the area under the ROC curve.

<IPython.core.display.Math object>

**R2 Score:** Measures the proportion of the variance in the dependent variable that is predictable from the independent variables.

<IPython.core.display.Math object>

**D2 Absolute Error Score:** Measures the degree of deviation from the absolute error model.

<IPython.core.display.Math object>

**NDCG Score:** Measures the quality of a ranking.

<IPython.core.display.Math object>

**Rand Score:** Measures the similarity between two data clusterings.

<IPython.core.display.Math object>

**DCG Score:** Measures the ranking quality.

<IPython.core.display.Math object>

**Fbeta Score:** The weighted average of Precision and Recall with a weighting factor beta.

<IPython.core.display.Math object>

**Adjusted Rand Score:** Measures the similarity between two data clusterings with adjustment for chance.

<IPython.core.display.Math object>

**Silhouette Score:** Measures how similar an object is to its own cluster compared to other clusters.

<IPython.core.display.Math object>

**Completeness Score:** Measures if all the data points that are members of a given class are elements of the same cluster.

<IPython.core.display.Math object>

**Homogeneity Score:** Measures if each cluster contains only members of a single class.

<IPython.core.display.Math object>

**Jaccard Score:** Measures similarity between sample sets.

<IPython.core.display.Math object>

**Consensus Score:** Measures the consensus of clustering results.

<IPython.core.display.Math object>

**V-measure Score:** Measures the harmonic mean between Homogeneity and Completeness.

<IPython.core.display.Math object>

**Brier Score Loss:** Measures the mean squared difference between predicted probability and the actual outcome.

<IPython.core.display.Math object>

**D2 Tweedie Score:** Measures the proportion of Tweedie deviance explained.

<IPython.core.display.Math object>

**Cohen Kappa Score:** Measures the agreement between two raters.

<IPython.core.display.Math object>

**D2 Pinball Score:** Measures the proportion of pinball loss explained.

<IPython.core.display.Math object>

**Mutual Info Score:** Measures the mutual information between two labels.

<IPython.core.display.Math object>

**Adjusted Mutual Info Score:** Measures the mutual information between two labels with adjustment for chance.

<IPython.core.display.Math object>

**Average Precision Score:** Measures the area under the precision-recall curve.

<IPython.core.display.Math object>

**Label Ranking Average Precision Score:** Measures the average precision score for label ranking.

<IPython.core.display.Math object>

**Balanced Accuracy Score:** Measures the balanced accuracy.

<IPython.core.display.Math object>

**Top K Accuracy Score:** Measures the accuracy considering the top k predictions.

<IPython.core.display.Math object>

**Calinski Harabasz Score:** Measures the ratio of the sum of between-cluster dispersion and within-cluster dispersion.

<IPython.core.display.Math object>