In [14]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Load the default wine dataset
data = load_wine()
X = data.data
y = data.target


In [15]:
def split_train_evaluate(X, y, test_size_step1, test_size_step2):
    """
    Run logistic regression with a two-step split:
      1) Hold out test_size_step1 of total data as TEST
      2) From remaining, hold out test_size_step2 as VALIDATION

    Examples:
      - 70/15/15  -> run_experiment(X, y, 0.15, 0.1765)
      - 60/20/20  -> run_experiment(X, y, 0.20, 0.25)
    """
    random_state = 42

    # Step 1: Split off test set
    X_train_val, X_test, y_train_val, y_test = train_test_split(
        X, y, test_size=test_size_step1, random_state=random_state, stratify=y
    )

    # Step 2: Split validation from remaining
    X_train, X_val, y_train, y_val = train_test_split(
        X_train_val, y_train_val, test_size=test_size_step2,
        random_state=random_state, stratify=y_train_val
    )

    # Fit logistic regression
    model = LogisticRegression(max_iter=1000, random_state=random_state)
    model.fit(X_train, y_train)

    # Evaluate validation set
    val_preds = model.predict(X_val)
    val_accuracy = accuracy_score(y_val, val_preds)
    print(f"Validation Accuracy: {val_accuracy:.4f}")

    # Evaluate test set
    test_preds = model.predict(X_test)
    test_accuracy = accuracy_score(y_test, test_preds)
    print(f"Test Accuracy: {test_accuracy:.4f}")

    # Detailed classification report on test set
    print("\nClassification Report on Test Set:")
    print(classification_report(y_test, test_preds, target_names=data.target_names))

In [16]:
# 70/15/15 split
split_train_evaluate(X, y, 0.15, 0.1765)


=== 70/15/15 split ===
Validation Accuracy: 1.0000
Test Accuracy: 0.9630

Classification Report on Test Set:
              precision    recall  f1-score   support

     class_0       1.00      1.00      1.00         9
     class_1       0.92      1.00      0.96        11
     class_2       1.00      0.86      0.92         7

    accuracy                           0.96        27
   macro avg       0.97      0.95      0.96        27
weighted avg       0.97      0.96      0.96        27



STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [18]:
# 60/20/20 split
split_train_evaluate(X, y, 0.20, 0.25)


=== 60/20/20 split ===
Validation Accuracy: 0.9444
Test Accuracy: 0.9722

Classification Report on Test Set:
              precision    recall  f1-score   support

     class_0       1.00      1.00      1.00        12
     class_1       0.93      1.00      0.97        14
     class_2       1.00      0.90      0.95        10

    accuracy                           0.97        36
   macro avg       0.98      0.97      0.97        36
weighted avg       0.97      0.97      0.97        36



STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
