In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from itertools import cycle

from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC, SVC
from sklearn.decomposition import PCA
from sklearn import cross_validation
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, GradientBoostingClassifier, AdaBoostClassifier



In [2]:
data = np.genfromtxt('cleveland.data', dtype = float, delimiter = ',')
#Removing rows with missing values
data = data[~np.isnan(data).any(axis=1)]
X = data[:,:-1] #Feature set
Y = data[:, -1]    #label Set
''' 
In Y: 
Value 0: < 50% diameter narrowing
Value 1-4: > 50% diameter narrowing  , so covert all values >0 to 1
'''
Y[Y > 0] = 1   
target_names = ['0','1']

In [3]:
attr_names = ( "age", "sex", "cp", "trestbps", "chol","fbs", "restecg",
              "thalach","exang", "oldpeak","slope", "ca", "thal", "num")

In [4]:
# First normalise the data so that the data is now between -1 and 1

X = StandardScaler().fit_transform(X)

In [5]:
# Applying cross validation on the training and test set for validating our linear SVM model
X_train,X_test,Y_train,Y_test = cross_validation.train_test_split(X, Y, test_size = 0.2, train_size=0.8, random_state=0)

In [6]:
## naive bayes classifier
gnb = GaussianNB()
gnb.fit(X_train, Y_train)
gnb_predict = gnb.predict(X_test)

## find accuracy
gnb_conf_matrix = confusion_matrix(Y_test, gnb_predict)
gnb_accuracy_score = accuracy_score(Y_test, gnb_predict)

print("\nNaive Bayes Confusion Matrix:")
print(gnb_conf_matrix)
print("Naive Bayes accuracy")
print(gnb_accuracy_score)


Naive Bayes Confusion Matrix:
[[28  2]
 [ 7 23]]
Naive Bayes accuracy
0.85


In [7]:
## Decision tree CART
dt = DecisionTreeClassifier(max_depth=10)
dt.fit(X_train,Y_train)
dt_predict = dt.predict(X_test)

## find accuracy
dt_conf_matrix = confusion_matrix(Y_test, dt_predict)
dt_accuracy_score = accuracy_score(Y_test, dt_predict)

print("\nDecision Tree CART Confusion Matrix:")
print(dt_conf_matrix)
print("Decision Tree CART accuracy")
print(dt_accuracy_score)


Decision Tree CART Confusion Matrix:
[[19 11]
 [ 7 23]]
Decision Tree CART accuracy
0.7


In [8]:
## Decision tree ID3
id3 = DecisionTreeClassifier(criterion='entropy',max_depth=10)
id3.fit(X_train,Y_train)
id3_predict = id3.predict(X_test)

id3_conf_matrix = confusion_matrix(Y_test, id3_predict)
id3_accuracy_score = accuracy_score(Y_test, id3_predict)

print("\nDecision Tree ID3 Confusion Matrix:")
print(id3_conf_matrix)
print("Decision Tree ID3 Accuracy")
print(id3_accuracy_score)


Decision Tree ID3 Confusion Matrix:
[[21  9]
 [ 8 22]]
Decision Tree ID3 Accuracy
0.7166666666666667


In [23]:
## Randon forest classifier
rf = RandomForestClassifier(max_depth=6) #10 trees
rf.fit(X_train, Y_train)
rf_predict = rf.predict(X_test)

## find accuracy
rf_conf_matrix = confusion_matrix(Y_test,rf_predict)
rf_accuracy_score = accuracy_score(Y_test, rf_predict)

print("\nRandom Forest Confusion Matrix:")
print(rf_conf_matrix)
print("Random Forest accuracy")
print(rf_accuracy_score)


Random Forest Confusion Matrix:
[[27  3]
 [ 7 23]]
Random Forest accuracy
0.8333333333333334


In [10]:
## Bagging
cart = DecisionTreeClassifier()
bagging = BaggingClassifier(base_estimator=cart, n_estimators=20, random_state=7)
bagging.fit(X_train, Y_train)
bagging_predict = bagging.predict(X_test)

bagging_conf_matrix = confusion_matrix(Y_test,bagging_predict)
bagging_accuracy_score = accuracy_score(Y_test, bagging_predict)


print("\nBagging Confusion Matrix:")
print(bagging_conf_matrix)
print("Bagging Accuracy")
print(bagging_accuracy_score)


Bagging Confusion Matrix:
[[24  6]
 [ 9 21]]
Bagging Accuracy
0.75


In [11]:
##Gradient Boosting
gbm = GradientBoostingClassifier(learning_rate=0.01,random_state=1, n_estimators=1000)
gbm.fit(X_train, Y_train)
gbm_predict = gbm.predict(X_test)


gbm_conf_matrix = confusion_matrix(Y_test,gbm_predict)
gbm_accuracy_score = accuracy_score(Y_test, gbm_predict)

print("\nGradient Boosting Confusion Matrix:")
print(gbm_conf_matrix)
print("Gradient Boosting Accuracy")
print(gbm_accuracy_score)


Gradient Boosting Confusion Matrix:
[[23  7]
 [ 6 24]]
Gradient Boosting Accuracy
0.7833333333333333


In [12]:
## Adaboost
ada = AdaBoostClassifier(base_estimator=cart, random_state= 7)
ada.fit(X_train, Y_train)
ada_predict = ada.predict(X_test)


ada_conf_matrix = confusion_matrix(Y_test,ada_predict)
ada_accuracy_score = accuracy_score(Y_test, ada_predict)

print("\nAdaboost Confusion Matrix:")
print(ada_conf_matrix)
print("Adaboost Accuracy")
print(ada_accuracy_score)


Adaboost Confusion Matrix:
[[22  8]
 [ 6 24]]
Adaboost Accuracy
0.7666666666666667
