In [None]:
#importing libraries
import pandas as pd
import numpy as np
import matplotlib.pylab as plt

from sklearn import model_selection
from sklearn.model_selection import train_test_split, KFold, cross_val_score, StratifiedKFold, learning_curve, GridSearchCV
from sklearn.metrics import confusion_matrix, make_scorer, accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
from keras import models
model = models.Sequential()

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import to_categorical
from keras.preprocessing import image

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from keras.utils import to_categorical
from scikeras.wrappers import KerasClassifier, KerasRegressor

In [None]:
#loading images and their labels
X = np.load('X.npy') # images
Y = np.load('Y.npy') # labels for the images (0 = no IDC, 1 = IDC)

In [None]:
#making sure the data for X crossed over correctly
print(X[:3])

In [None]:
#making sure the data for Y crosssed over correctly
print(Y[:5])

In [None]:
#total number of images 
print('Total number of images: {}'.format(len(X)))

In [None]:
#total number of negative IDC images
print('Number of negative IDC Images: {}'.format(np.sum(Y==0)))

In [None]:
#total number of positive IDC images
print('Number of positive IDC Images: {}'.format(np.sum(Y==1)))

In [None]:
#shape of the images
print('Image shape (Width, Height, Channels): {}'.format(X[0].shape))

In [None]:
#train/test split
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

In [None]:
# Reduce Sample Size
x_train = x_train[0:30000] 
y_train = y_train[0:30000]
x_test = x_test[0:30000] 
y_test = y_test[0:30000]

In [None]:
# rescale pizel intensity
x_train = x_train / 256.0
x_test = x_test / 256.0

In [None]:
#verifying shape
print("Training Data Shape:", x_train.shape)
print("Testing Data Shape:", x_test.shape)

In [None]:
#displaying the first five images in the training set along with the labels
for i in range(5):
    plt.imshow(x_train[i], cmap='gray'), plt.axis("off")
    plt.title('IDC = %d'%y_train[i])
    plt.show()

In [None]:
#printing the data to show that it's now 0-1
print(x_train[:3])

In [None]:
# reshape data

x_train_r = x_train.reshape(x_train.shape[0], x_train.shape[1]*x_train.shape[2]*x_train.shape[3])
x_test_r = x_test.reshape(x_test.shape[0], x_test.shape[1]*x_test.shape[2]*x_test.shape[3])

print("x_train shape: ",x_train_r.shape)
print("x_test shape: ",x_test_r.shape)

In [None]:
def buildclassifier():
    classifier = Sequential() # initialize neural network
    classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu', input_dim = x_train_r.shape[1]))
    classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu'))
    classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
    classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
    return classifier

In [None]:
classifier = KerasClassifier(build_fn = buildclassifier, epochs = 200)
accuracies = cross_val_score(estimator = classifier, X = x_train_r, y = y_train, cv = 6)
mean = accuracies.mean()
variance = accuracies.std()

In [None]:
print("Accuracy mean: "+ str(mean))
print("Accuracy variance: "+ str(variance))

In [None]:
minmax = MinMaxScaler()

In [None]:
dtc = DecisionTreeClassifier()
rfc= RandomForestClassifier(n_estimators = 100, random_state=42)
svc = SVC(random_state=42)
lr = LogisticRegression()
knn = KNeighborsClassifier()
dtr = DecisionTreeRegressor()

In [None]:
pipe = Pipeline(steps = [('scaler', minmax), ('classifier', dtr)])

In [None]:
pipe.fit(x_train_r, y_train)

In [None]:
param_grid = [{'classifier__max_depth': [2,6,8,10], 
              'classifier__min_samples_split': [2,5,10,15]}, 
              {'classifier':[dtc], 
               'classifier__max_depth': [2,6,8,10], 
              'classifier__min_samples_split': [2,5,10,15], 
              'classifier__max_leaf_nodes': [None,10,20,50,100]}, 
              {'classifier':[rfc], 
              'classifier__max_depth': [2,6,8,10], 
              'classifier__min_samples_split': [2,5,10,15], 
              'classifier__max_features': [2,3,4,5,6]}, 
              {'classifier':[svc], 
              'classifier__max_depth': [2,6,8,10], 
              'classifier__min_samples_split': [2,5,10,15], 
              'classifier__max_features': [2,3,4,5,6]}, 
               {'classifier':[lr], 
              'classifier__max_depth': [2,6,8,10], 
              'classifier__min_samples_split': [2,5,10,15], 
              'classifier__max_features': [2,3,4,5,6]}, 
               {'classifier':[knn], 
              'classifier__max_depth': [2,6,8,10], 
              'classifier__min_samples_split': [2,5,10,15], 
              'classifier__max_features': [2,3,4,5,6]}, 
              {'classifier':[dtr], 
              'classifier__max_depth': [2,6,8,10], 
              'classifier__min_samples_split': [2,5,10,15], 
              'classifier__max_features': [2,3,4,5,6]},
             ]

In [None]:
grid_search = GridSearchCV(pipe, param_grid, cv = 5, verbose = 0)

In [None]:
best_model = grid_search.fit(x_train_r, y_train)

In [None]:
#classifiying decision tree and fitting it
dtc = DecisionTreeClassifier()
dtc.fit(x_train_r,y_train)

In [None]:
#running the accuracy of the decision tree
dscore = dtc.score(x_test_r,y_test)
print("Decision Tree Score: ", dscore)

In [None]:
#classifying random forest tree and fitting it
rfc= RandomForestClassifier(n_estimators = 100, random_state=42)
rfc.fit(x_train_r,y_train) 

In [None]:
#accuracy of random forest
rscore=rfc.score(x_test_r,y_test)
print("Random Forest Score: ", rscore)

In [None]:
#SVC classiying and fitting
svc = SVC(random_state=42)
svc.fit(x_train_r,y_train)

In [None]:
#accuracy of SVC
sscore = svc.score(x_test_r,y_test)
print ("SVM Accuracy:", sscore)

In [None]:
#logistic regression classifying and fittnig
lr = LogisticRegression()
lr.fit(x_train_r,y_train)

In [None]:
#logisitic regression accuracy
lscore = lr.score(x_test_r,y_test)
print("logistic Regression accuracy", lscore)

In [None]:
#logistic regression classifying and fittnig
knn = KNeighborsClassifier()
knn.fit(x_train_r,y_train)

In [None]:
#logisitic regression accuracy
kscore = knn.score(x_test_r,y_test)
print("KNeighbors accuracy", kscore)

In [None]:
results = []
results.append(mean)
results.append(dscore)
results.append(rscore)
results.append(sscore)
results.append(lscore)
results.append(kscore)
print(results)

In [None]:
models = []
models.append(('classifier', KerasClassifier()))
models.append(('dtc', DecisionTreeClassifier()))
models.append(('rfc', RandomForestClassifier()))
models.append(('svc', SVC()))
models.append(('ls', LogisticRegression()))
models.append(('knn', KNeighborsClassifier()))
print(models)