In [1]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.preprocessing import MinMaxScaler
from statistics import mean, stdev

In [2]:
df = datasets.load_breast_cancer()

In [3]:
df.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [4]:
X = df.data
y = df.target

In [5]:
X.shape

(569, 30)

In [6]:
scale = MinMaxScaler()

In [7]:
x_scaled = scale.fit_transform(X)

In [8]:
model = LogisticRegression()

In [9]:
sss = StratifiedShuffleSplit(n_splits=10, test_size=0.4, random_state=1)

In [10]:
lst_accu_stratified = []

for train_index, test_index in sss.split(X=X,y=y):
    x_train, x_test, y_train, y_test = x_scaled[train_index], x_scaled[test_index], y[train_index], y[test_index]
    model.fit(x_train, y_train)
    lst_accu_stratified.append(model.score(x_test, y_test))

In [11]:
# Print the output.

print('List of possible accuracy:\n',lst_accu_stratified)

print('\nMaximum Accuracy That can be obtained from this model is:',max(lst_accu_stratified)*100, '%')

print('\nMinimum Accuracy:', min(lst_accu_stratified)*100, '%')

print('\nOverall Accuracy:',mean(lst_accu_stratified)*100, '%')

print('\nStandard Deviation is:', stdev(lst_accu_stratified))

List of possible accuracy:
 [0.956140350877193, 0.9473684210526315, 0.9517543859649122, 0.9736842105263158, 0.9517543859649122, 0.956140350877193, 0.956140350877193, 0.9692982456140351, 0.9605263157894737, 0.9692982456140351]

Maximum Accuracy That can be obtained from this model is: 97.36842105263158 %

Minimum Accuracy: 94.73684210526315 %

Overall Accuracy: 95.92105263157895 %

Standard Deviation is: 0.00878410461157886


In [12]:
## Just for understanding purpose

for train_index, test_index in sss.split(X=X,y=y):
    print("\nTRAIN:", train_index)
    print("\nTotal train data points: ", len(train_index))
    print("*"*80)
    print("\nTEST:", test_index)
    print("\nTotal test data points: ", len(test_index))
#     X_train, X_test = X[train_index], X[test_index]
#     y_train, y_test = y[train_index], y[test_index]


TRAIN: [407  48 532 468 243 190 126 491 257 566  66 489 546 142 425 559 526 375
 123 358 324  19 225  36 370 108  44 347 549 378 333 322 351 436 392  84
 162 506 449  67 305 427  16 350 275 528 181 459   5 524 232 276 443 330
 133 233 229 444 466  72 544 282 182 269 475 452 137 310 473 148 568 556
 470  24  18 105  47 297 238 421  89 171  56 114 286 445 516   4 365 291
 522 189 536 227 379   0 140 496 118  90  64 254 453  51 220 136 146 198
 151 104 414 293  39  93 352 184 298 340 547 134  98 224 400 552 319 296
 212 159 418 476 485 250  55 328 308 307 419 163 488 535 155 499 542  42
 223 217 187 309 204 336 541 461 205  82  38 294  76 500 244 271 558 385
 172 388  46 486  14  49 120 451  13 497 543 245  68 438  62 353 503 467
 115 239 555 150 176 265 390 540 313 230 518 478 348  40 218 551 119 389
 525 393 495 331 417 208 563 364 316 454 430 334 377  97 507 337 199 345
 433  52 194 397 251 202 165 169 214 168  58 368 216  80  17 362 538 455
   6 562 287 539 369   2 139  85 567 193 38