In [None]:
from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from sklearn.preprocessing import OrdinalEncoder
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report, precision_recall_curve
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [None]:
data = pd.read_csv(r"/content/drive/MyDrive/Colab Notebooks/ML homework/Copy of default of credit card clients.csv")

In [None]:
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

In [None]:
data.drop('ID', axis=1)

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,PAY_6,BILL_AMT1,BILL_AMT2,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default payment next month
0,20000,2,2,1,24,2,2,-1,-1,-2,-2,3913,3102,689,0,0,0,0,689,0,0,0,0,1
1,120000,2,2,2,26,-1,2,0,0,0,2,2682,1725,2682,3272,3455,3261,0,1000,1000,1000,0,2000,1
2,90000,2,2,2,34,0,0,0,0,0,0,29239,14027,13559,14331,14948,15549,1518,1500,1000,1000,1000,5000,0
3,50000,2,2,1,37,0,0,0,0,0,0,46990,48233,49291,28314,28959,29547,2000,2019,1200,1100,1069,1000,0
4,50000,1,2,1,57,-1,0,-1,0,0,0,8617,5670,35835,20940,19146,19131,2000,36681,10000,9000,689,679,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29995,220000,1,3,1,39,0,0,0,0,0,0,188948,192815,208365,88004,31237,15980,8500,20000,5003,3047,5000,1000,0
29996,150000,1,3,2,43,-1,-1,-1,-1,0,0,1683,1828,3502,8979,5190,0,1837,3526,8998,129,0,0,0
29997,30000,1,2,2,37,4,3,2,-1,0,0,3565,3356,2758,20878,20582,19357,0,0,22000,4200,2000,3100,1
29998,80000,1,3,1,41,1,-1,0,0,0,-1,-1645,78379,76304,52774,11855,48944,85900,3409,1178,1926,52964,1804,1


# Helper Function

In [None]:
def preprocessing_(data):
    # get rid of missing values in MARRIAGE column
    data = data[data["MARRIAGE"] != 0]   
    
    # get rid of missing values in EDUCATION column
    data = data[data["EDUCATION"] != 0]    
    
    # discritize the AGE attribute to 6 interval
    data["AGE"] = pd.cut(data["AGE"], 6, labels=["AGE20", "AGE30", "AGE40", "AGE50", "AGE60", "AGE70"] )    
    #ordinal encoding
    encoder = OrdinalEncoder()
    # transform data
    data["AGE"] = encoder.fit_transform(np.array(data["AGE"]).reshape(-1, 1))
    
    return data

def print_cm(cm, labels, hide_zeroes=False, hide_diagonal=False, hide_threshold=None):
    """pretty print for confusion matrixes"""
    columnwidth = max([len(x) for x in labels] + [5])  # 5 is value length
    empty_cell = " " * columnwidth
    # Print header
    print("    " + empty_cell, end=" ")
    for label in labels:
        print("%{0}s".format(columnwidth) % label, end=" ")
    print()
    # Print rows
    for i, label1 in enumerate(labels):
        print("    %{0}s".format(columnwidth) % label1, end=" ")
        for j in range(len(labels)):
            cell = "%{0}.1f".format(columnwidth) % cm[i, j]
            if hide_zeroes:
                cell = cell if float(cm[i, j]) != 0 else empty_cell
            if hide_diagonal:
                cell = cell if i != j else empty_cell
            if hide_threshold:
                cell = cell if cm[i, j] > hide_threshold else empty_cell
            print(cell, end=" ")
        print()

        
def train_evaluate_val(model, x_train, y_train, folds, metrics):
    for metric in metrics:
        scores = cross_val_score(model, x_train, y_train, cv=folds, scoring = metric)
        print(f'{metric}:')
        print(f'Cross-validation: {scores}')
        print(f"Average: {scores.mean()}")
        print("----------------------------------------------------------")
    
def test_evaluate(model, x_test, y_test):
    prediction = model.predict(x_test)
    # prediction_prob = model.predict_proba(x_test)
    confusion = confusion_matrix(y_test, prediction)
    print("Confusion matrix",print_cm(confusion, ['Not pay', 'pay']))
    print('Accuracy: {:.2f}'.format(accuracy_score(y_test, prediction)))
    print('Precision: {:.2f}'.format(precision_score(y_test, prediction)))
    print('Recall: {:.2f}'.format(recall_score(y_test, prediction)))
    print('F1: {:.2f}'.format(f1_score(y_test, prediction)))
    print('AUC: {:.2f}'.format(roc_auc_score(y_test, prediction)))
    print(classification_report(y_test, prediction, target_names=['not pay', 'pay']))
    # precision_recall_curve(y_test, prediction_prob)

def optimize_model(model, x_train, y_train, parameters, metrics):
    for metric in metrics:
        print(f'{metric}:')
        grid_clf = GridSearchCV(model, param_grid = parameters, scoring = metric)
        grid_clf.fit(x_train, y_train)
        best_parameter = grid_clf.best_params_
        best_score = grid_clf.best_score_
        print(f"The best parameters: {best_parameter}\nThe best score: {best_score}\n")

# Prepare the data:

In [None]:
data = preprocessing_(data)

In [None]:
data

Unnamed: 0,ID,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,PAY_6,BILL_AMT1,BILL_AMT2,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default payment next month
0,1,20000,2,2,1,0.0,2,2,-1,-1,-2,-2,3913,3102,689,0,0,0,0,689,0,0,0,0,1
1,2,120000,2,2,2,0.0,-1,2,0,0,0,2,2682,1725,2682,3272,3455,3261,0,1000,1000,1000,0,2000,1
2,3,90000,2,2,2,1.0,0,0,0,0,0,0,29239,14027,13559,14331,14948,15549,1518,1500,1000,1000,1000,5000,0
3,4,50000,2,2,1,1.0,0,0,0,0,0,0,46990,48233,49291,28314,28959,29547,2000,2019,1200,1100,1069,1000,0
4,5,50000,1,2,1,3.0,-1,0,-1,0,0,0,8617,5670,35835,20940,19146,19131,2000,36681,10000,9000,689,679,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29995,29996,220000,1,3,1,1.0,0,0,0,0,0,0,188948,192815,208365,88004,31237,15980,8500,20000,5003,3047,5000,1000,0
29996,29997,150000,1,3,2,2.0,-1,-1,-1,-1,0,0,1683,1828,3502,8979,5190,0,1837,3526,8998,129,0,0,0
29997,29998,30000,1,2,2,1.0,4,3,2,-1,0,0,3565,3356,2758,20878,20582,19357,0,0,22000,4200,2000,3100,1
29998,29999,80000,1,3,1,2.0,1,-1,0,0,0,-1,-1645,78379,76304,52774,11855,48944,85900,3409,1178,1926,52964,1804,1


# Split the data:

In [None]:
y = data["default payment next month"]
data = data.drop("default payment next month", axis=1)

x_train, x_test, y_train, y_test = train_test_split(data, y, test_size=0.2, random_state=42)

Normalization:

In [None]:
scaler2 = preprocessing.StandardScaler().fit(np.array(x_train["LIMIT_BAL"]).reshape(-1, 1))
x_train["LIMIT_BAL"] = scaler2.transform(np.array(x_train["LIMIT_BAL"]).reshape(-1, 1))
x_test["LIMIT_BAL"] = scaler2.transform(np.array(x_test["LIMIT_BAL"]).reshape(-1, 1))

scaler3 = preprocessing.StandardScaler().fit(np.array(x_train["BILL_AMT1"]).reshape(-1, 1))
x_train["BILL_AMT1"] = scaler3.transform(np.array(x_train["BILL_AMT1"]).reshape(-1, 1))
x_train["BILL_AMT2"] = scaler3.transform(np.array(x_train["BILL_AMT2"]).reshape(-1, 1))
x_train["BILL_AMT3"] = scaler3.transform(np.array(x_train["BILL_AMT3"]).reshape(-1, 1))
x_train["BILL_AMT4"] = scaler3.transform(np.array(x_train["BILL_AMT4"]).reshape(-1, 1))
x_train["BILL_AMT5"] = scaler3.transform(np.array(x_train["BILL_AMT5"]).reshape(-1, 1))
x_train["BILL_AMT6"] = scaler3.transform(np.array(x_train["BILL_AMT6"]).reshape(-1, 1))

x_test["BILL_AMT1"] = scaler2.transform(np.array(x_test["BILL_AMT1"]).reshape(-1, 1))
x_test["BILL_AMT2"] = scaler2.transform(np.array(x_test["BILL_AMT2"]).reshape(-1, 1))
x_test["BILL_AMT3"] = scaler2.transform(np.array(x_test["BILL_AMT3"]).reshape(-1, 1))
x_test["BILL_AMT4"] = scaler2.transform(np.array(x_test["BILL_AMT4"]).reshape(-1, 1))
x_test["BILL_AMT5"] = scaler2.transform(np.array(x_test["BILL_AMT5"]).reshape(-1, 1))
x_test["BILL_AMT6"] = scaler2.transform(np.array(x_test["BILL_AMT6"]).reshape(-1, 1))

scaler4 = preprocessing.StandardScaler().fit(np.array(x_train["PAY_AMT1"]).reshape(-1, 1))
x_train["PAY_AMT1"] = scaler4.transform(np.array(x_train["PAY_AMT1"]).reshape(-1, 1))
x_train["PAY_AMT2"] = scaler4.transform(np.array(x_train["PAY_AMT2"]).reshape(-1, 1))
x_train["PAY_AMT3"] = scaler4.transform(np.array(x_train["PAY_AMT3"]).reshape(-1, 1))
x_train["PAY_AMT4"] = scaler4.transform(np.array(x_train["PAY_AMT4"]).reshape(-1, 1))
x_train["PAY_AMT5"] = scaler4.transform(np.array(x_train["PAY_AMT5"]).reshape(-1, 1))
x_train["PAY_AMT6"] = scaler4.transform(np.array(x_train["PAY_AMT6"]).reshape(-1, 1))

x_test["PAY_AMT1"] = scaler4.transform(np.array(x_test["PAY_AMT1"]).reshape(-1, 1))
x_test["PAY_AMT2"] = scaler4.transform(np.array(x_test["PAY_AMT2"]).reshape(-1, 1))
x_test["PAY_AMT3"] = scaler4.transform(np.array(x_test["PAY_AMT3"]).reshape(-1, 1))
x_test["PAY_AMT4"] = scaler4.transform(np.array(x_test["PAY_AMT4"]).reshape(-1, 1))
x_test["PAY_AMT5"] = scaler4.transform(np.array(x_test["PAY_AMT5"]).reshape(-1, 1))
x_test["PAY_AMT6"] = scaler4.transform(np.array(x_test["PAY_AMT6"]).reshape(-1, 1))

In [None]:
x_train

Unnamed: 0,ID,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,PAY_6,BILL_AMT1,BILL_AMT2,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6
8759,8760,-1.135178,2,1,2,0.0,0,0,0,0,0,-1,-0.558595,-0.582250,-0.599997,-0.598109,-0.554967,-0.672282,-0.270961,-0.292380,-0.347201,-0.152203,-0.260507,-0.306404
29948,29949,0.945432,1,1,1,1.0,-1,-1,-1,-1,0,0,-0.261862,-0.169901,-0.689814,-0.666133,-0.680239,-0.692511,2.131999,-0.343313,-0.231120,-0.355871,-0.356126,-0.356126
29155,29156,-1.135178,2,2,1,3.0,1,2,0,0,0,-2,-0.431853,-0.440174,-0.454388,-0.422791,-0.692511,-0.692511,-0.356126,-0.247758,-0.164888,-0.356126,-0.356126,-0.356126
11522,11523,1.022492,2,1,1,1.0,-1,-1,-1,-1,-1,-1,-0.684204,-0.674036,-0.680158,-0.688250,-0.680401,-0.682424,-0.268794,-0.297735,-0.335982,-0.298882,-0.308444,-0.308444
8242,8243,0.328955,2,2,1,2.0,1,-2,-2,-2,-2,-2,-0.692511,-0.692511,-0.692511,-0.692511,-0.692511,-0.692511,-0.356126,-0.356126,-0.356126,-0.356126,-0.356126,-0.356126
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29870,29871,-1.212238,1,2,2,0.0,0,0,0,0,0,0,-0.570894,-0.557449,-0.556747,-0.556235,-0.564785,-0.562115,-0.282308,-0.278993,-0.279631,-0.334516,-0.333751,-0.331010
5402,5403,-0.903999,1,1,2,1.0,0,0,0,0,0,0,-0.037643,-0.058560,-0.484354,-0.554482,-0.569532,-0.581427,-0.192108,-0.228634,-0.324253,-0.326866,-0.324253,-0.324253
863,864,-1.212238,2,2,1,1.0,0,0,0,0,0,0,-0.477881,-0.570463,-0.558986,-0.557988,-0.561211,-0.575142,-0.207598,-0.215885,-0.292380,-0.334898,-0.336301,-0.335600
15824,15825,-0.672820,2,2,2,0.0,1,2,2,2,2,-2,-0.637583,-0.640415,-0.583153,-0.586660,-0.692511,-0.692511,-0.356126,-0.069270,-0.354787,-0.356126,-0.356126,-0.356126


# Undersampling:

In [None]:
# the unbalanecd classes
class_0 = x_train[y_train == 0]
class_1 = x_train[y_train == 1]

print("before undersampling:\n")
print(f"not payed class: {len(class_0)}\n") #18641

print(f"payed class: {len(class_1)}\n") #5304

# under sample the dominent class(not payed)
class_0 = class_0[0:6001]
print("after undersampling:\n")
print(f"not payed class: {len(class_0)}\n")
# adding label as not payed
y_0 = np.zeros(len(class_0))
y_0 = pd.DataFrame(y_0, columns=["label"])

print(f"payed class: {len(class_1)}\n")
# adding label as payed
y_1 = np.ones(len(class_1))
y_1 = pd.DataFrame(y_1, columns=["label"])

# concatinate the new balanced data
undersampling_data_samples = pd.concat([class_0, class_1], ignore_index=True)
undersampling_data_label = pd.concat([y_0, y_1], ignore_index=True)
undersampling_data_samples["label"] = undersampling_data_label


# shuffling the data
undersampling_data = shuffle(undersampling_data_samples)
undersampling_data

before undersampling:

not payed class: 23301

payed class: 6631

after undersampling:

not payed class: 7000

payed class: 6631



In [None]:
y_train = undersampling_data["label"]
x_train = undersampling_data.drop("label", axis=1)

# Train & Evaluate: 

In [None]:
metrics = ["accuracy", 'precision', 'recall', 'f1', 'roc_auc']

Logistic Regression:

In [None]:
l_g_grid_values = {'C': [0.001, 0.01, 0.1, 1, 10, 100]}
l_g = LogisticRegression()

train and evaluate using cross validation:

In [None]:
train_evaluate_val(l_g, x_train, y_train, 5, metrics)

accuracy:
Cross-validation: [0.80538735 0.81290457 0.81227814 0.80726665 0.8081019 ]
Average: 0.8091877218626017
----------------------------------------------------------
precision:
Cross-validation: [0.65609756 0.73504274 0.73546512 0.67164179 0.6775    ]
Average: 0.6951494406684381
----------------------------------------------------------
recall:
Cross-validation: [0.25377358 0.24316682 0.23845429 0.25447691 0.25541942]
Average: 0.24905820425728664
----------------------------------------------------------
f1:
Cross-validation: [0.36598639 0.36543909 0.36014235 0.36910458 0.37097878]
Average: 0.3663302396167972
----------------------------------------------------------
roc_auc:
Cross-validation: [0.72152127 0.73963444 0.71084905 0.70806678 0.72435865]
Average: 0.7208860384950748
----------------------------------------------------------


optimize the model using grid search: 

In [None]:
l_g = LogisticRegression()
optimize_model(l_g, x_train, y_train, l_g_grid_values, metrics)

accuracy:
The best parameters: {'C': 1}
The best score: 0.8091877218626017

precision:
The best parameters: {'C': 0.01}
The best score: 0.7024681168341428

recall:
The best parameters: {'C': 1}
The best score: 0.24905820425728664

f1:
The best parameters: {'C': 1}
The best score: 0.3663302396167972

roc_auc:
The best parameters: {'C': 1}
The best score: 0.7208860384950748



Final evaluation on the test data:

In [None]:
l_g = LogisticRegression(C=0.01)
l_g.fit(x_train, y_train)
test_evaluate(l_g, x_test, y_test)

            Not pay     pay 
    Not pay  4510.0   150.0 
        pay  1012.0   315.0 
Confusion matrix None
Accuracy: 0.81
Precision: 0.68
Recall: 0.24
F1: 0.35
AUC: 0.60
              precision    recall  f1-score   support

     not pay       0.82      0.97      0.89      4660
         pay       0.68      0.24      0.35      1327

    accuracy                           0.81      5987
   macro avg       0.75      0.60      0.62      5987
weighted avg       0.79      0.81      0.77      5987



The model can't distinguish between the two classes completly (AUC = 0.5) and predict all the instances as (not payed) == (the majority)

Decision Tree:

In [None]:
d_t = DecisionTreeClassifier()

In [None]:
d_t_grid_values = {'criterion': ["gini", "entropy"],
                   'splitter': ["best", "random"],}

train and evaluate using cross validation:

In [None]:
train_evaluate_val(d_t, x_train, y_train, 5, metrics)

accuracy:
Cross-validation: [0.72436834 0.74399666 0.71726874 0.71246607 0.73021508]
Average: 0.725662977657131
----------------------------------------------------------
precision:
Cross-validation: [0.39911111 0.41355932 0.37533274 0.37444543 0.38838475]
Average: 0.3901666720548711
----------------------------------------------------------
recall:
Cross-validation: [0.40754717 0.45711593 0.39585297 0.39773798 0.40999057]
Average: 0.41364892500844697
----------------------------------------------------------
f1:
Cross-validation: [0.40386385 0.44385027 0.37323623 0.38686131 0.40852246]
Average: 0.40326682440452466
----------------------------------------------------------
roc_auc:
Cross-validation: [0.61233777 0.64274305 0.60640596 0.60027967 0.61377158]
Average: 0.6151076054441105
----------------------------------------------------------


optimize the model using grid search: 

In [None]:
d_t = DecisionTreeClassifier()
                
optimize_model(d_t, x_train, y_train, d_t_grid_values, metrics)

accuracy:
The best parameters: {'criterion': 'entropy', 'splitter': 'best'}
The best score: 0.7291710169137607

precision:
The best parameters: {'criterion': 'entropy', 'splitter': 'best'}
The best score: 0.4005015247924744

recall:
The best parameters: {'criterion': 'gini', 'splitter': 'best'}
The best score: 0.41893087688723707

f1:
The best parameters: {'criterion': 'entropy', 'splitter': 'best'}
The best score: 0.40282686834571074

roc_auc:
The best parameters: {'criterion': 'entropy', 'splitter': 'best'}
The best score: 0.6204935638068203



Final evaluation on the test data:

In [None]:
d_t = DecisionTreeClassifier(criterion = 'gini', splitter = 'best')
d_t.fit(x_train, y_train)
test_evaluate(d_t, x_test, y_test)

            Not pay     pay 
    Not pay  3247.0  1413.0 
        pay   764.0   563.0 
Confusion matrix None
Accuracy: 0.64
Precision: 0.28
Recall: 0.42
F1: 0.34
AUC: 0.56
              precision    recall  f1-score   support

     not pay       0.81      0.70      0.75      4660
         pay       0.28      0.42      0.34      1327

    accuracy                           0.64      5987
   macro avg       0.55      0.56      0.54      5987
weighted avg       0.69      0.64      0.66      5987



Random forest:

In [None]:
r_f = RandomForestClassifier()

In [None]:
r_f_grid_values = {'max_depth': [10, 20, 30, 50, 100],
                    'n_estimators': [10, 20, 40, 80, 100],
                    'criterion': ["gini", "entropy"]}

train and evaluate using cross validation:

In [None]:
train_evaluate_val(r_f, x_train, y_train, 5, metrics)

accuracy:
Cross-validation: [0.81227814 0.82543328 0.81478388 0.81123408 0.82271873]
Average: 0.8172896220505326
----------------------------------------------------------
precision:
Cross-validation: [0.63414634 0.68204283 0.65942029 0.63265306 0.6726094 ]
Average: 0.6561743852950067
----------------------------------------------------------
recall:
Cross-validation: [0.37075472 0.399623   0.33930254 0.34967012 0.38548539]
Average: 0.3689671545178098
----------------------------------------------------------
f1:
Cross-validation: [0.47170935 0.50089659 0.45527452 0.44051251 0.48019208]
Average: 0.46971701002164956
----------------------------------------------------------
roc_auc:
Cross-validation: [0.76392224 0.78752293 0.76727293 0.75193823 0.77196449]
Average: 0.768524164614105
----------------------------------------------------------


optimize the model using grid search: 

In [None]:
r_f = RandomForestClassifier()
                
optimize_model(r_f, x_train, y_train, r_f_grid_values, metrics)

accuracy:
The best parameters: {'criterion': 'entropy', 'max_depth': 10, 'n_estimators': 80}
The best score: 0.8207558989350595

precision:
The best parameters: {'criterion': 'gini', 'max_depth': 10, 'n_estimators': 100}
The best score: 0.6810105013993653

recall:
The best parameters: {'criterion': 'gini', 'max_depth': 50, 'n_estimators': 80}
The best score: 0.3676480002845305

f1:
The best parameters: {'criterion': 'gini', 'max_depth': 50, 'n_estimators': 80}
The best score: 0.4733796387915398

roc_auc:
The best parameters: {'criterion': 'entropy', 'max_depth': 10, 'n_estimators': 80}
The best score: 0.7819567748574516



Final evaluation on the test data:

In [None]:
r_f = RandomForestClassifier(max_depth = 100, n_estimators = 100, criterion = 'entropy')
r_f.fit(x_train, y_train)
test_evaluate(r_f, x_test, y_test)

            Not pay     pay 
    Not pay  4523.0   137.0 
        pay  1064.0   263.0 
Confusion matrix None
Accuracy: 0.80
Precision: 0.66
Recall: 0.20
F1: 0.30
AUC: 0.58
              precision    recall  f1-score   support

     not pay       0.81      0.97      0.88      4660
         pay       0.66      0.20      0.30      1327

    accuracy                           0.80      5987
   macro avg       0.73      0.58      0.59      5987
weighted avg       0.78      0.80      0.75      5987



# Deep Neural network:

Metrics:

In [None]:
from keras import backend as K
from keras.models import Sequential
from keras.layers import Dense

def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [None]:
model = Sequential()
model.add(Dense(12, input_dim=24, activation='relu'))
model.add(Dense(20, activation='relu'))
model.add(Dense(30, activation='relu'))
model.add(Dense(20, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# compile the keras model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', precision_m, recall_m, f1_m,])
# fit the keras model on the dataset
model.fit(np.array(x_train), np.array(y_train), epochs=50, batch_size=100, validation_split=0.2)
model.evaluate(x=x_test, y=y_test)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


[0.7576056718826294,
 0.5273091793060303,
 0.2705850899219513,
 0.6653037071228027,
 0.3740612864494324]