In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression

In [2]:
from sklearn.model_selection import ShuffleSplit

In [4]:
data = load_iris()
X = data.data
y = data.target


In [32]:
ss = ShuffleSplit(n_splits=10, train_size=0.5,
                 test_size=0.5, random_state=0)

In [33]:
clf = LogisticRegression(solver='liblinear', multi_class='auto')

In [34]:
scores = []

for train_index, test_index in ss.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)
    scores.append(score)
    
scores = np.array(scores)
print(scores)

[0.84       0.93333333 0.90666667 1.         0.90666667 0.93333333
 0.94666667 1.         0.90666667 0.88      ]


[]

In [35]:
scores.mean()

0.9253333333333333

In [36]:
scores.std()

0.04702717890279574

In [37]:
print("{0:4.2f} +/- {1:4.2f}".format(scores.mean() *100, scores.std()*100))

92.53 +/- 4.70


In [38]:
import matplotlib.pyplot as plt
%matplotlib inline

In [39]:
train_sizes = np.arange(0.1, 1.0, 0.1)

In [40]:
all_mean = []
all_std = []

for train_size in train_sizes:
    
    ss = ShuffleSplit(n_splits=100, train_size=train_size, 
                      test_size=1-train_size)
    
    scores = []
    
    for train_index, test_index in ss.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        
        clf.fit(X_train, y_train)
        score = clf.score(X_test, y_test)
        scores.append(score)
        
    scores = np.array(scores)
    print("train_size {0:.0f}%: {1:4.2f} +- {2:4.2f} %".format(train_size *100,
                                                              scores.mean() *100,
                                                              scores.std() *100))
    
    
    all_mean.append(scores.mean() *100)
    all_std.append(scores.std() *100)
    

train_size 10%: 75.04 +- 10.28 %
train_size 20%: 84.36 +- 9.61 %
train_size 30%: 88.97 +- 6.13 %
train_size 40%: 91.32 +- 5.84 %
train_size 50%: 92.71 +- 4.24 %
train_size 60%: 93.78 +- 3.68 %
train_size 70%: 94.93 +- 3.85 %
train_size 80%: 94.93 +- 3.67 %
train_size 90%: 94.40 +- 5.87 %


In [17]:
clf

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)