# Comparing Logistic Regression and SVM

In [0]:
import pandas as pd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn import svm
%matplotlib inline

In [0]:
class Model():

    def __init__(self, filename):
        # private variables
        self._df = pd.DataFrame()
        self._confusion_matrix = []
        self.load_data(filename)
        pass

    def load_data(self, filename):
        self._df = pd.read_csv(filename)
        pass

    def print_df_info(self):
        print(self._df.head())
        pass

    def change_column_names(self, new_column_names):
        self._df.columns = new_column_names

    def set_features(self, feature_cols):
        self._feature_col = feature_cols
        self._X = self._df[self._feature_col]

    def set_target(self, target_col):
        self._target_col = target_col
        self._y = self._df[target_col]

    def train_and_test_logreg(self):
        self.split_data()
        self._logreg = LogisticRegression()
        self._logreg.fit(self._X_train, self._y_train)
        self._y_pred=self._logreg.predict(self._X_test)
        self._confusion_matrix = metrics.confusion_matrix(self._y_test, self._y_pred)
        pass

    def train_and_test_svm(self):
        self.split_data()
        self._logreg = svm.SVC(gamma=0.001, C=100., probability=True)
        self._logreg.fit(self._X_train, self._y_train)
        self._y_pred=self._logreg.predict(self._X_test)
        self._confusion_matrix = metrics.confusion_matrix(self._y_test, self._y_pred)
        pass

    def print_confusion_matrix(self):
        print(f'Confusion matrix:')
        print(f'TP: {self._confusion_matrix[1][1]}, FP: {self._confusion_matrix[0][1]}')
        print(f'FN: {self._confusion_matrix[1][0]}, TN: {self._confusion_matrix[0][0]}')
        pass

    def print_evaluation_metrics(self):
        print("Accuracy:",metrics.accuracy_score(self._y_test, self._y_pred))
        print("Precision:",metrics.precision_score(self._y_test, self._y_pred))
        print("Recall:",metrics.recall_score(self._y_test, self._y_pred))
        pass

    def split_data(self):
        self._X_train, self._X_test, self._y_train, self._y_test = train_test_split(
            self._X, self._y, test_size=0.25, random_state=0)
        pass

    def plot_confusion_materixOLD(self):
        class_names=[0,1] # name  of classes
        fig, ax = plt.subplots()
        tick_marks = np.arange(len(class_names))
        plt.xticks(tick_marks, class_names)
        plt.yticks(tick_marks, class_names)
        # create heatmap
        sns.heatmap(pd.DataFrame(self._confusion_matrix), annot=True, cmap="YlGnBu" ,fmt='g')
        ax.xaxis.set_label_position("top")
        plt.tight_layout()
        plt.title('Confusion matrix', y=1.1)
        plt.ylabel('Actual label')
        plt.xlabel('Predicted label')
        pass

    def plot_confusion_materix(self):
        class_names=[1,0] # name  of classes
        fig, ax = plt.subplots()
        tick_marks = np.arange(len(class_names))
        plt.xticks(tick_marks, class_names)
        plt.yticks(tick_marks, class_names)
        # create heatmap
        sns.heatmap(pd.DataFrame(self._confusion_matrix), annot=True, cmap="YlGnBu" ,fmt='g')
        ax.xaxis.set_label_position("top")
        plt.tight_layout()
        plt.title('Confusion matrix', y=1.1)
        plt.xlabel('Actual label')
        plt.ylabel('Predicted label')
        pass

    def plot_roc_curve(self):
        y_pred_proba = self._logreg.predict_proba(self._X_test)[::,1]
        fpr, tpr, _ = metrics.roc_curve(self._y_test,  y_pred_proba)
        auc = metrics.roc_auc_score(self._y_test, y_pred_proba)
        plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
        plt.legend(loc=4)
        plt.show()
        pass

In [3]:
''' 
    analysying diabetes data 
'''
filename = './Data/diabetes.csv'
model = Model(filename)
column_names = ['pregnant', 'glucose', 'bp', 'skin',
                'insulin', 'bmi', 'pedigree', 'age', 'label']
model.change_column_names(column_names)
# model.print_df_info()
feature_cols = ['pregnant', 'insulin', 'bmi',
                'age', 'glucose', 'bp', 'pedigree']
model.set_features(feature_cols)
model.set_target('label')
print(20 * '>', 'Applying Logistic Regression')
model.train_and_test_logreg()
model.print_confusion_matrix()
model.print_evaluation_metrics()
model.plot_roc_curve()
print(20 * '>', 'Applying SVM')
model.train_and_test_svm()
model.print_confusion_matrix()
model.print_evaluation_metrics()
model.plot_roc_curve()

FileNotFoundError: ignored