# Machine Learning  Evaluation Metrics

In [5]:
# Import libraries
import warnings
warnings.filterwarnings('ignore')
from pandas import read_csv
# Machine Learning Module
from sklearn.model_selection import train_test_split
# Machine Learning Algorithm (classifier, model)
from sklearn.linear_model import LogisticRegression
# ML metrics
from sklearn.metrics import confusion_matrix, classification_report
filename = './datasets/pima-indians-diabetes.data.csv'
col_names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
data = read_csv(filename, names=col_names)

In [41]:
from sklearn.metrics import precision_score, accuracy_score, recall_score, f1_score, roc_auc_score
# Define metrics function
def fct_metrics(TN, FP, FN, TP):
    acc = (TP+TN)/(TP+TN+FP+FN)
    prec = TP / (TP+FP)
    rec = TP/(TP+FN)
    spec = TN/(TN+FP)
    f1_score = (2 * prec * rec)/(prec + rec)
    print(f'Acc: {acc.round(2)*100.0}% | Precision: {prec.round(2)*100.0}% |Recall: {rec.round(2)*100.0}% | Specificity: {spec.round(2)*100.0}% | F1_Score: {f1_score.round(2)*100.0}%')
# Get values from dataframe
array = data.values
# Get inputs (all variables except the class)
X = array[ : , 0:-1]
# Get the output (class)
Y = array[ : , -1]
test_proportion = 0.30
seed = 7 # Controler le fait aleatoire dans la division de donnees
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = test_proportion
                                                    , random_state = seed)
# Training
model = LogisticRegression(solver='newton-cg')
model.fit(X_train, Y_train)
print("Training completed successfully!")
predicted = model.predict(X_test)
matrix = confusion_matrix(Y_test, predicted)
print(f'Confusion Matrix\n----------------\n{matrix}')
TN, FP, FN, TP = matrix.ravel()
fct_metrics(TN, FP, FN, TP)
print(f"Accuracy Score: {accuracy_score(Y_test, predicted).round(2)*100.0}")
print(f"Precision Score: {precision_score(Y_test, predicted, average='binary').round(2)*100.0}")
print(f"Recall Score: {recall_score(Y_test, predicted, average='binary').round(2)*100.0}")
print(f"f1-Score: {f1_score(Y_test, predicted, average='binary').round(2)*100.0}")
print(f"AUC: {roc_auc_score(Y_test, predicted).round(2)*100.0}")



Training completed successfully!
Confusion Matrix
----------------
[[130  17]
 [ 34  50]]
Acc: 78.0% | Precision: 75.0% |Recall: 60.0% | Specificity: 88.0% | F1_Score: 66.0%
Accuracy Score: 78.0
Precision Score: 75.0
Recall Score: 60.0
f1-Score: 66.0
AUC: 74.0


## II. Classification Report

In [39]:
from sklearn.metrics import classification_report
report = classification_report(Y_test, predicted)
print(report)

              precision    recall  f1-score   support

         0.0       0.79      0.88      0.84       147
         1.0       0.75      0.60      0.66        84

    accuracy                           0.78       231
   macro avg       0.77      0.74      0.75       231
weighted avg       0.78      0.78      0.77       231

