# Functions for creating training/ test sets and metrics evaluation for ML

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve
import matplotlib.pyplot as plt

In [5]:
class train_test():
    def __init__(self):
        pass
    def train_test_sets (self, df_features, df_labels):
        X_train, X_val, y_train, y_val = train_test_split(
                                                        df_features, # x
                                                        df_labels.values, # y
                                                        test_size = 0.30, # 70%/30% split  
                                                        shuffle = True, # shuffle dataset
                                                                        # before splitting
                                                        stratify = df_labels.values,  # keep
                                                                               # distribution
                                                                               # of Gender
                                                                               # consistent
                                                                               # betw. train
                                                                               # & test sets.
                                                        random_state = 123 # same shuffle each
                                                                           # time
                                                                           )

        # print the size of our training and test groups

        print('training:', len(X_train), 'testing:', len(X_val))
        return X_train, X_val, y_train, y_val

In [None]:
class ML_metrics():
    def __init__(self):
        pass
    # Function Definition for Confusion Matrix

    def conf_matrix(self, test_Y, prediction):
        con_matrix = confusion_matrix(test_Y,prediction)
        print('Confusion_matrix', con_matrix)
        #plt.figure()
        plt.matshow(con_matrix,cmap='Pastel1')
        for x in range(0, 2):
            for y in range(0,2):
                plt.text(x,y,con_matrix[x, y])
        plt.ylabel('Actual Label')
        plt.xlabel('Predicted Label')
       # plt.show()
        print('True Negative = ',con_matrix[0][0])
        print('False Negative = ',con_matrix[0][1])
        print('False Positive = ',con_matrix[1][0])
        print('True Positive = ',con_matrix[1][1])

    def metrics (self, X_train, y_train, clf_names, classifiers):

        for name, clf in zip(clf_names, classifiers):

            print("{}\n\n".format(name))

            # predict
            y_pred = cross_val_predict(clf, X_train, y_train, cv=10)

            # scores
            acc = cross_val_score(clf, X_train, y_train, cv=10).mean()
            roc_auc = cross_val_score(clf, X_train, y_train, cv=10, scoring = 'roc_auc').mean()
            f1 = cross_val_score(clf, X_train, y_train, cv=10, scoring = 'f1').mean()

            print("Accuracy of {}: {}".format(name,acc))
            print("Area Under Curve of {}: {}".format(name,roc_auc))
            print("f1 score of {}: {}".format(name, f1))

            self.conf_matrix(y_train,y_pred)

            print("==================================================================================================================\n\n")

        return  y_pred, acc, roc_auc, f1