In [2]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Load the default wine dataset
data = load_wine()
X = data.data
y = data.target

print(f"Original dataset shape: X={X.shape}, y={y.shape}")


# Adjusting the hyperparameter ratio to 60:20:20 ---

# First split: Separate out 20% for the test dataset
# X_train_val 80% of the data, y_train_val 80% of the targets
X_train_val, X_test, y_train_val, y_test = train_test_split(
    X, y, test_size=0.20, random_state=42, stratify=y
)

# Second split: From the X_train_val (80% of the original data),
# split 25% to be the validation set (20% of total), 60% left for training
X_train, X_val, y_train, y_val = train_test_split(
    X_train_val, y_train_val, test_size=0.25, random_state=42, stratify=y_train_val
)

print("\n--- Data Split Shapes (60:20:20) ---")
print(f"Training set shape: X={X_train.shape}, y={y_train.shape}")
print(f"Validation set shape: X={X_val.shape}, y={y_val.shape}")
print(f"Test set shape: X={X_test.shape}, y={y_test.shape}")

# Fit the logistic regression model on the training set
# max_iter is increased to ensure convergence for this dataset
model = LogisticRegression(max_iter=1000, random_state=42)
print("\nFitting Logistic Regression model on the training data...")
model.fit(X_train, y_train)
print("Model fitted successfully.")

# Evaluate the validation set
val_preds = model.predict(X_val)
val_accuracy = accuracy_score(y_val, val_preds)
print(f"\nValidation Accuracy: {val_accuracy:.4f}")

# Evaluate the test set
test_preds = model.predict(X_test)
test_accuracy = accuracy_score(y_test, test_preds)
print(f"\nTest Accuracy: {test_accuracy:.4f}")

# Detailed classification report on test set
print("\nClassification Report on Test Set:")
print(classification_report(y_test, test_preds, target_names=data.target_names))

Original dataset shape: X=(178, 13), y=(178,)

--- Data Split Shapes (60:20:20) ---
Training set shape: X=(106, 13), y=(106,)
Validation set shape: X=(36, 13), y=(36,)
Test set shape: X=(36, 13), y=(36,)

Fitting Logistic Regression model on the training data...
\Model fitted successfully.

Validation Accuracy: 0.9444

Test Accuracy: 0.9722

Classification Report on Test Set:
              precision    recall  f1-score   support

     class_0       1.00      1.00      1.00        12
     class_1       0.93      1.00      0.97        14
     class_2       1.00      0.90      0.95        10

    accuracy                           0.97        36
   macro avg       0.98      0.97      0.97        36
weighted avg       0.97      0.97      0.97        36



STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
