In [22]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

# Load the default wine dataset
data = load_wine()
X = data.data
y = data.target


In [31]:
def split_train_evaluate(test_size_step1, test_size_step2, scale=False):
    """
    Run logistic regression with a two-step split:
      1) Hold out test_size_step1 of total data as TEST
      2) From remaining, hold out test_size_step2 as VALIDATION

    Args:
        test_size_step1 (float): test size proportion
        test_size_step2 (float): validation size proportion (from remaining)
        scale (bool): if True, apply StandardScaler before training

    Examples:
      - 70/15/15  -> run_experiment(X, y, 0.15, 0.1765, scale=True)
      - 60/20/20  -> run_experiment(X, y, 0.20, 0.25, scale=True)
    """
    random_state = 42

    # Step 1: Split off test set
    X_train_val, X_test, y_train_val, y_test = train_test_split(
        X, y, test_size=test_size_step1, random_state=random_state, stratify=y
    )

    # Step 2: Split validation from remaining
    X_train, X_val, y_train, y_val = train_test_split(
        X_train_val, y_train_val, test_size=test_size_step2,
        random_state=random_state, stratify=y_train_val
    )


    # Choose model & Fit logistic regression
    if scale:
        model = make_pipeline(StandardScaler(), LogisticRegression(max_iter=100, random_state=random_state))
    else:
        model = LogisticRegression(max_iter=1000, random_state=random_state)

    # model = LogisticRegression(max_iter=5000, random_state=random_state)
    model.fit(X_train, y_train)

    # Evaluate validation set
    val_preds = model.predict(X_val)
    val_accuracy = accuracy_score(y_val, val_preds)
    print(f"Validation Accuracy: {val_accuracy:.4f}")

    # Evaluate test set
    test_preds = model.predict(X_test)
    test_accuracy = accuracy_score(y_test, test_preds)
    print(f"Test Accuracy: {test_accuracy:.4f}")

    # Detailed classification report on test set
    print("\nClassification Report on Test Set:")
    print(classification_report(y_test, test_preds, target_names=data.target_names))

# 70/15/15 split



In [32]:
# 70/15/15 split
split_train_evaluate(0.15, 0.1765, scale=True)

Validation Accuracy: 1.0000
Test Accuracy: 1.0000

Classification Report on Test Set:
              precision    recall  f1-score   support

     class_0       1.00      1.00      1.00         9
     class_1       1.00      1.00      1.00        11
     class_2       1.00      1.00      1.00         7

    accuracy                           1.00        27
   macro avg       1.00      1.00      1.00        27
weighted avg       1.00      1.00      1.00        27



# 60/20/20 split

In [28]:
split_train_evaluate(0.20, 0.25, scale=True)

Validation Accuracy: 1.0000
Test Accuracy: 0.9722

Classification Report on Test Set:
              precision    recall  f1-score   support

     class_0       1.00      1.00      1.00        12
     class_1       0.93      1.00      0.97        14
     class_2       1.00      0.90      0.95        10

    accuracy                           0.97        36
   macro avg       0.98      0.97      0.97        36
weighted avg       0.97      0.97      0.97        36

