In [2]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC # Import Support Vector Classifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler # Often good practice for SVMs

# Load the default wine dataset
data = load_wine()
X = data.data
y = data.target

print(f"Original dataset shape: X={X.shape}, y={y.shape}")

# --- Adjusting the hyperparameter ratio to 70:15:15 ---

# First split: Separate out 15% for the test dataset
# X_train_val will contain 85% of the data
X_train_val, X_test, y_train_val, y_test = train_test_split(
    X, y, test_size=0.15, random_state=42, stratify=y
)

# Second split: From X_train_val (85% of original), split for validation.
# We need 15% of original data for validation.
# 15% of original / 85% of original = 0.15 / 0.85 approx 0.17647
X_train, X_val, y_train, y_val = train_test_split(
    X_train_val, y_train_val, test_size=(0.15 / 0.85), random_state=42, stratify=y_train_val
)

print("\n--- Data Split Shapes (70:15:15) ---")
print(f"Training set shape: X={X_train.shape}, y={y_train.shape}") # Should be ~124 samples
print(f"Validation set shape: X={X_val.shape}, y={y_val.shape}")   # Should be ~27 samples
print(f"Test set shape: X={X_test.shape}, y={y_test.shape}")       # Should be ~27 samples

# --- Optional but Recommended: Scale the data for SVM ---
# SVMs are sensitive to feature scaling.
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)


# Fit the Support Vector Machine (SVC) model on the training set
# Using a linear kernel for simplicity, but RBF is common too.
# max_iter is for the solver, similar to LogisticRegression
model_svm = SVC(kernel='linear', random_state=42, max_iter=5000) # Increased max_iter
print("\nFitting SVC model with StandardScaler on the training data...")
model_svm.fit(X_train_scaled, y_train) # Use scaled data for training
print("Model fitted successfully.")

# Evaluate the validation set
val_preds_svm = model_svm.predict(X_val_scaled) # Use scaled data for prediction
val_accuracy_svm = accuracy_score(y_val, val_preds_svm)
print(f"\nSVM Validation Accuracy: {val_accuracy_svm:.4f}")

# Evaluate the test set
test_preds_svm = model_svm.predict(X_test_scaled) # Use scaled data for prediction
test_accuracy_svm = accuracy_score(y_test, test_preds_svm)
print(f"SVM Test Accuracy: {test_accuracy_svm:.4f}")

# Detailed classification report on test set
print("\nSVM Classification Report on Test Set:")
print(classification_report(y_test, test_preds_svm, target_names=data.target_names))

Original dataset shape: X=(178, 13), y=(178,)

--- Data Split Shapes (70:15:15) ---
Training set shape: X=(124, 13), y=(124,)
Validation set shape: X=(27, 13), y=(27,)
Test set shape: X=(27, 13), y=(27,)

Fitting SVC model with StandardScaler on the training data...
Model fitted successfully.

SVM Validation Accuracy: 0.9630
SVM Test Accuracy: 0.9630

SVM Classification Report on Test Set:
              precision    recall  f1-score   support

     class_0       1.00      1.00      1.00         9
     class_1       0.92      1.00      0.96        11
     class_2       1.00      0.86      0.92         7

    accuracy                           0.96        27
   macro avg       0.97      0.95      0.96        27
weighted avg       0.97      0.96      0.96        27

