## Initialization

In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder

In [3]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

In [32]:
model_accuracies_1 = {'KNN':1, 'LogReg':1, 'DT':1, 'RF':1, 'NB':1, 'LinearSVC':1, 'KernelSVC':1}
model_accuracies_2 = {'KNN':1, 'LogReg':1, 'DT':1, 'RF':1, 'NB':1, 'LinearSVC':1, 'KernelSVC':1}

## Importing the Data

In [5]:
dataset = pd.read_csv("diagnosis.data", header = None, delimiter = r"\s+")
X = dataset.iloc[:, 0:6].values
Y1 = dataset.iloc[:, 6:7].values
Y2 = dataset.iloc[:, 7:8].values

In [6]:
dataset.head()

Unnamed: 0,0,1,2,3,4,5,6,7
0,35.5,no,yes,no,no,no,no,no
1,35.9,no,no,yes,yes,yes,yes,no
2,35.9,no,yes,no,no,no,no,no
3,36.0,no,no,yes,yes,yes,yes,no
4,36.0,no,yes,no,no,no,no,no


## Preprocess the Data

In [8]:
le_Y = LabelEncoder()
Y1 = le_Y.fit_transform(Y1)
Y2 = le_Y.transform(Y2)

In [9]:
Y1

array([0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 0, 0, 0, 0], dtype=int64)

In [10]:
Y2

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
       1, 0, 1, 1, 1], dtype=int64)

In [11]:
le_X = LabelEncoder()
le_X.fit(X[:, 1])

X[:, 1] = le_X.transform(X[:, 1])
X[:, 2] = le_X.transform(X[:, 2])
X[:, 3] = le_X.transform(X[:, 3])
X[:, 4] = le_X.transform(X[:, 4])
X[:, 5] = le_X.transform(X[:, 5])

In [12]:
X

array([[35.5, 0, 1, 0, 0, 0],
       [35.9, 0, 0, 1, 1, 1],
       [35.9, 0, 1, 0, 0, 0],
       [36.0, 0, 0, 1, 1, 1],
       [36.0, 0, 1, 0, 0, 0],
       [36.0, 0, 1, 0, 0, 0],
       [36.2, 0, 0, 1, 1, 1],
       [36.2, 0, 1, 0, 0, 0],
       [36.3, 0, 0, 1, 1, 1],
       [36.6, 0, 0, 1, 1, 1],
       [36.6, 0, 0, 1, 1, 1],
       [36.6, 0, 1, 0, 0, 0],
       [36.6, 0, 1, 0, 0, 0],
       [36.7, 0, 0, 1, 1, 1],
       [36.7, 0, 1, 0, 0, 0],
       [36.7, 0, 1, 0, 0, 0],
       [36.8, 0, 0, 1, 1, 1],
       [36.8, 0, 0, 1, 1, 1],
       [36.9, 0, 0, 1, 1, 1],
       [36.9, 0, 1, 0, 0, 0],
       [37.0, 0, 0, 1, 1, 0],
       [37.0, 0, 0, 1, 1, 0],
       [37.0, 0, 1, 0, 0, 0],
       [37.0, 0, 0, 1, 1, 1],
       [37.0, 0, 0, 1, 1, 1],
       [37.0, 0, 0, 1, 1, 1],
       [37.0, 0, 0, 1, 1, 1],
       [37.0, 0, 0, 1, 0, 0],
       [37.1, 0, 1, 0, 0, 0],
       [37.1, 0, 0, 1, 1, 1],
       [37.1, 0, 0, 1, 0, 0],
       [37.2, 0, 0, 1, 1, 0],
       [37.2, 0, 1, 0, 0, 0],
       [37

## Create Train and Test Data

In [13]:
X1_train, X1_test, Y1_train, Y1_test = train_test_split(X, Y1, test_size = 0.2, random_state = 4)
X2_train, X2_test, Y2_train, Y2_test = train_test_split(X, Y2, test_size = 0.2, random_state = 4)

## Decision Tree Classifier Y1

In [14]:
clf_dt_1 = DecisionTreeClassifier(criterion = 'entropy')

In [15]:
clf_dt_1.fit(X1_train, Y1_train)

DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [16]:
Y1_pred_dt = clf_dt_1.predict(X1_test)

In [17]:
cm1_dt = confusion_matrix(Y1_pred_dt, Y1_test)
cm1_dt

array([[13,  0],
       [ 0, 11]], dtype=int64)

## Decision Tree Classifier Y2

In [18]:
clf_dt_2 = DecisionTreeClassifier(criterion = 'entropy')

In [19]:
clf_dt_2.fit(X2_train, Y2_train)

DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [20]:
Y2_pred_dt = clf_dt_2.predict(X2_test)

In [21]:
cm2_dt = confusion_matrix(Y2_pred_dt, Y2_test)
cm2_dt

array([[15,  0],
       [ 0,  9]], dtype=int64)

## Random Forest Classifier Y1

In [22]:
clf_rf_1 = RandomForestClassifier(n_estimators = 10, criterion = 'entropy')

In [23]:
clf_rf_1.fit(X1_train, Y1_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [25]:
Y1_pred_rf = clf_rf_1.predict(X1_test)

In [26]:
cm1_rf = confusion_matrix(Y1_pred_dt, Y1_test)
cm1_rf

array([[13,  0],
       [ 0, 11]], dtype=int64)

## Random Forest Classifier Y2

In [27]:
clf_rf_2 = RandomForestClassifier(n_estimators = 10, criterion = 'entropy')

In [28]:
clf_rf_2.fit(X2_train, Y2_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [29]:
Y2_pred_rf = clf_rf_2.predict(X2_test)

In [30]:
cm2_rf = confusion_matrix(Y2_pred_dt, Y2_test)
cm2_rf

array([[15,  0],
       [ 0,  9]], dtype=int64)

## Naive Bayes Classifier Y1

In [33]:
clf_nb_1 = GaussianNB()

In [34]:
clf_nb_1.fit(X1_train, Y1_train)

GaussianNB(priors=None)

In [35]:
Y1_pred_nb = clf_nb_1.predict(X1_test)

In [36]:
cm1_nb = confusion_matrix(Y1_pred_nb, Y1_test)
cm1_nb

array([[ 9,  0],
       [ 4, 11]], dtype=int64)

## Naive Bayes Classifier Y2

In [37]:
clf_nb_2 = GaussianNB()

In [38]:
clf_nb_2.fit(X2_train, Y2_train)

GaussianNB(priors=None)

In [39]:
Y2_pred_nb = clf_nb_2.predict(X2_test)

In [40]:
cm2_nb = confusion_matrix(Y2_pred_nb, Y2_test)
cm2_nb

array([[15,  0],
       [ 0,  9]], dtype=int64)

## KNN Classifier Y1

In [41]:
clf_knn_1 = KNeighborsClassifier(n_neighbors = 5)

In [42]:
clf_knn_1.fit(X1_train, Y1_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

In [43]:
Y1_pred_knn = clf_knn_1.predict(X1_test)

In [44]:
cm1_knn = confusion_matrix(Y1_pred_knn, Y1_test)
cm1_knn

array([[13,  0],
       [ 0, 11]], dtype=int64)

## KNN Classifier Y2

In [45]:
clf_knn_2 = KNeighborsClassifier(n_neighbors = 5)

In [46]:
clf_knn_2.fit(X2_train, Y2_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

In [47]:
Y2_pred_knn = clf_knn_2.predict(X2_test)

In [48]:
cm2_knn = confusion_matrix(Y2_pred_knn, Y2_test)
cm2_knn

array([[15,  0],
       [ 0,  9]], dtype=int64)

## Logistic Regression Classifier Y1

In [49]:
clf_lr_1 = LogisticRegression()

In [50]:
clf_lr_1.fit(X1_train, Y1_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [51]:
Y1_pred_lr = clf_lr_1.predict(X1_test)

In [52]:
cm1_lr = confusion_matrix(Y1_pred_lr, Y1_test)
cm1_lr

array([[13,  0],
       [ 0, 11]], dtype=int64)

## Logistic Regression Classifier Y2

In [53]:
clf_lr_2 = LogisticRegression()

In [54]:
clf_lr_2.fit(X2_train, Y2_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [55]:
Y2_pred_lr = clf_lr_2.predict(X2_test)

In [56]:
cm2_lr = confusion_matrix(Y2_pred_lr, Y2_test)
cm2_lr

array([[15,  0],
       [ 0,  9]], dtype=int64)

## Linear SVC Classifier Y1

In [57]:
clf_lsvc_1 = SVC(kernel = 'linear')

In [58]:
clf_lsvc_1.fit(X1_train, Y1_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [59]:
Y1_pred_lsvc = clf_lsvc_1.predict(X1_test)

In [61]:
cm1_lsvc = confusion_matrix(Y1_pred_lsvc, Y1_test)
cm1_lsvc

array([[13,  0],
       [ 0, 11]], dtype=int64)

## Linear SVC Classifier Y2

In [63]:
clf_lsvc_2 = SVC(kernel = 'linear')

In [64]:
clf_lsvc_2.fit(X2_train, Y2_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [65]:
Y2_pred_lsvc = clf_lsvc_2.predict(X2_test)

In [66]:
cm2_lsvc = confusion_matrix(Y2_pred_lsvc, Y2_test)
cm2_lsvc

array([[15,  0],
       [ 0,  9]], dtype=int64)

## Kernel SVC Classifier Y1

In [67]:
clf_ksvc_1 = SVC(kernel = 'rbf')

In [68]:
clf_ksvc_1.fit(X1_train, Y1_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [69]:
Y1_pred_ksvc = clf_ksvc_1.predict(X1_test)

In [70]:
cm1_ksvc = confusion_matrix(Y1_pred_ksvc, Y1_test)
cm1_ksvc

array([[13,  0],
       [ 0, 11]], dtype=int64)

## Kernel SVC Classifier Y2

In [71]:
clf_ksvc_2 = SVC(kernel = 'rbf')

In [72]:
clf_ksvc_2.fit(X2_train, Y2_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [73]:
Y2_pred_ksvc = clf_ksvc_2.predict(X2_test)

In [74]:
cm2_ksvc = confusion_matrix(Y2_pred_ksvc, Y2_test)
cm2_ksvc

array([[15,  0],
       [ 0,  9]], dtype=int64)

## Checking the Model Accuracies

In [75]:
model_accuracies_1['DT'] = accuracy_score(Y1_pred_dt, Y1_test)
model_accuracies_1['KNN'] = accuracy_score(Y1_pred_knn, Y1_test)
model_accuracies_1['KernelSVC'] = accuracy_score(Y1_pred_ksvc, Y1_test)
model_accuracies_1['LinearSVC'] = accuracy_score(Y1_pred_lsvc, Y1_test)
model_accuracies_1['LogReg'] = accuracy_score(Y1_pred_lr, Y1_test)
model_accuracies_1['NB'] = accuracy_score(Y1_pred_nb, Y1_test)
model_accuracies_1['RF'] = accuracy_score(Y1_pred_rf, Y1_test)
model_accuracies_1

{'DT': 1.0,
 'KNN': 1.0,
 'KernelSVC': 1.0,
 'LinearSVC': 1.0,
 'LogReg': 1.0,
 'NB': 0.83333333333333337,
 'RF': 1.0}

In [76]:
model_accuracies_2['DT'] = accuracy_score(Y2_pred_dt, Y2_test)
model_accuracies_2['KNN'] = accuracy_score(Y2_pred_knn, Y2_test)
model_accuracies_2['KernelSVC'] = accuracy_score(Y2_pred_ksvc, Y2_test)
model_accuracies_2['LinearSVC'] = accuracy_score(Y2_pred_lsvc, Y2_test)
model_accuracies_2['LogReg'] = accuracy_score(Y2_pred_lr, Y2_test)
model_accuracies_2['NB'] = accuracy_score(Y2_pred_nb, Y2_test)
model_accuracies_2['RF'] = accuracy_score(Y2_pred_rf, Y2_test)
model_accuracies_2

{'DT': 1.0,
 'KNN': 1.0,
 'KernelSVC': 1.0,
 'LinearSVC': 1.0,
 'LogReg': 1.0,
 'NB': 1.0,
 'RF': 1.0}