In [12]:
import numpy as np
import math
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score,recall_score,confusion_matrix

# <font color=grey> For Supervised Learning models</font>

### <font color=yellow> For Regression </font>

1. Mean square error:-

# $ MSE = \frac{1}{N} \sum \limits _{i=1} ^N (Y_{i} - \hat{Y}_{i})^2$
#### $\hat{Y}_{i}$ = ground_truth
#### $Y_{i}$ = predicted_values

In [13]:
ground_truth = np.array([2,4,5,4,5]) 
predicted_values = np.array([2.8,3.4,4,4.6,5.2])

MSE = mean_squared_error(ground_truth,predicted_values)
MSE

0.47999999999999987

observation: Lower the MSE higher the model performance

***

2. Root Mean Square:-
# RMSE = $\sqrt{MSE}$

In [15]:
RMSE = math.sqrt(MSE)
RMSE

0.6928203230275508

observation: Lower the RMSE better the model performance

***

3. Mean Absolute Error:-

# MAE = $ \frac{1}{N} \sum \limits _{i=1} ^N |y_{i} - \hat{x}_{i}| $

In [16]:

MAE = mean_absolute_error(ground_truth,predicted_values)
MAE

0.6399999999999999

observation: Lower the MAE better the model performance

4. R-squared
# $R^2 = 1- \frac{Sum squared regression(SSR)}{total sum of squares(SST)} $
# $ R^2 = 1 - \frac{\sum (y_i - \hat{y}_i)^2}{\sum (y_i - \bar{y})^2}$

In [18]:
r2 = r2_score(ground_truth,predicted_values)
r2*100

60.00000000000001

observation: Higher the R-square indicates better the model fit

### <font color=yellow> For Classfication </font>

1. Accuracy:-

# $ Accuracy = \frac{TP + TN}{TP + TN + FP + FN}$

In [32]:
y_pred = [0, 2, 1, 3]
y_true = [0, 1, 2, 3]
acc = accuracy_score(y_pred,y_true)
acc

0.5

2. Precission & Recall :-
# $ Precission = \frac{TP}{TP + FP}$

# $ Recall = \frac{TP}{TP +FN}$

In [8]:
y_true = [0, 1, 0, 0, 1]
y_pred = [0, 1, 1, 0, 1]
P = precision_score(y_pred,y_true)
P

1.0

In [38]:
R = recall_score(y_pred,y_true)
R

0.6666666666666666

3. F1 score:-

# $ F1 Score = \frac{2* P*R}{P+R} $

4. Confusion Matrix

In [40]:
cm = confusion_matrix(y_pred,y_true)
cm

array([[2, 0],
       [1, 2]])

# <font color=grey> For Unsupervised Learning models</font>

1. Silhouette score:-

# $ SS = \frac{b_i - a_i}{max(b_i , a_i)}$

In [10]:
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.metrics import calinski_harabasz_score
from sklearn.metrics import adjusted_rand_score
from sklearn.metrics import f1_score

In [5]:
X,y = make_blobs(n_samples=500,centers=4,random_state=42)
kmeans = KMeans(n_clusters=2,random_state=42)
ss = silhouette_score(X,kmeans.fit_predict(X))
ss

0.5955207405315346

2. Calinski Harabasz score:-

### $ BGSS(inter cluster) = \sum \limits_{k=1} ^K n_k * ||C_k - C||^2 $ 
### $ WGSS_k(intra cluster) = \sum \limits_{i=1} ^{n_k} *||X_ik - C_k||^2$ 
### $ WGSS = \sum \limits_{K=1} ^K WGSS_k$ 

# $ CH = \frac{\frac{BGSS}{K-1}}{\frac{WGSS}{N-K}} = \frac{BGSS}{WGSS}* \frac{N-K}{K-1}$ 

In [6]:
ch = calinski_harabasz_score(X,kmeans.labels_)
ch

574.8890256995833

observation: Higher the calinski_harabasz_score,better the result

3. Adjust Rand Index:-
#  $ARI = \frac{Index - Expected Index}{max(Index-Expected Index)}$

In [9]:
ARI = adjusted_rand_score(y_pred,y_true)
ARI

0.16666666666666666

observation: [-1,1] The higher the ARI value, the closer the two clusterings are to each other.1 indicates perfect agreement 

4. F1 score:-

# $ F1 Score = \frac{2* P*R}{P+R} $

In [11]:
f1 = f1_score(y_pred,y_true,average=')
f1

0.8

observation: A low F1 scoreimplying that the model has trouble striking that balance.