In [1]:
import numpy as np
import matplotlib.pyplot as plt
from define_dataset import define_dataset
import time
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

In [2]:
def build_model(X, y):
    params = [{
    'C':[0.1, 0.2, 0.5, 1],
    'max_iter': [500, 1000, 2000]
    }] 
    clf = GridSearchCV(LogisticRegression(), params, refit = True, cv=6)
    clf.fit(X, y)
    print("Best parameters set found:")
    print(clf.best_params_)
    print("Best score found:")
    print(clf.best_score_)
    return(clf.best_score_, clf.best_estimator_)

In [5]:
dataset = define_dataset()
tr_set = dataset.tr_set
test_set = dataset.test_set
results_dict = dataset.results_dict
num_bins_arr = [40, 60, 100]
ROI_threshold_arr = [0.85, 0.9, 0.95]
best_clf_score = 0

for ROI_threshold in ROI_threshold_arr:
    for num_bins in num_bins_arr:
        X = np.load('bins/bins_arr_'+str(num_bins)+'r'+str(ROI_threshold)+'.npy')
        y = np.zeros(np.shape(tr_set), dtype='int')
        for i, slide_name in enumerate(tr_set):
            y[i] = results_dict[slide_name]
        print('Num bins:', num_bins, 'ROI threshold:', ROI_threshold)

        (score, clf) = build_model(X, y)
        if score >= best_clf_score:
            best_clf_score = score
            best_clf = clf
            best_bins = num_bins
            best_ROI = ROI_threshold

Num bins: 40 ROI threshold: 0.85
Best parameters set found:
{'C': 0.5, 'max_iter': 500}
Best score found:
0.48333333333333334
Num bins: 60 ROI threshold: 0.85
Best parameters set found:
{'C': 0.2, 'max_iter': 500}
Best score found:
0.5111111111111111
Num bins: 100 ROI threshold: 0.85
Best parameters set found:
{'C': 0.1, 'max_iter': 500}
Best score found:
0.5055555555555555
Num bins: 40 ROI threshold: 0.9
Best parameters set found:
{'C': 0.5, 'max_iter': 500}
Best score found:
0.48333333333333334
Num bins: 60 ROI threshold: 0.9
Best parameters set found:
{'C': 0.2, 'max_iter': 500}
Best score found:
0.5111111111111111
Num bins: 100 ROI threshold: 0.9
Best parameters set found:
{'C': 0.1, 'max_iter': 500}
Best score found:
0.5055555555555555
Num bins: 40 ROI threshold: 0.95
Best parameters set found:
{'C': 0.1, 'max_iter': 500}
Best score found:
0.5722222222222223
Num bins: 60 ROI threshold: 0.95
Best parameters set found:
{'C': 0.2, 'max_iter': 500}
Best score found:
0.5444444444444444

Test set

In [8]:
print('best bins:', best_bins, ', best ROI threshold:', best_ROI, ', best score:', best_clf_score)
X_test = np.load('./bins/bins_arr_test_'+str(best_bins)+'r'+str(best_ROI)+'.npy')
y_test = np.zeros(np.shape(test_set), dtype='int')
for i, slide_name in enumerate(test_set):
    y_test[i] = results_dict[slide_name]

y_pred = best_clf.predict(X_test)

print('predictions:',y_pred)
print('real y:', y_test)
score = accuracy_score(y_test, y_pred)
errors = np.size(test_set) - accuracy_score(y_test, y_pred, normalize=False)
print('Test result:', score)
print('Errors:', errors)

best bins: 100 , best ROI threshold: 0.95 , best score: 0.5722222222222223
predictions: [0 1 0 0 0 0]
real y: [0 0 0 1 1 1]
Test result: 0.3333333333333333
Errors: 4
