In [1]:
import numpy as np
from pathlib import Path
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report
import joblib

In [2]:
# 1. Load dataset
wine = load_wine()
feature_names = wine.feature_names

# 2. Check and handle missing values
X_all = wine.data.astype(float)
if np.isnan(X_all).any():
    col_medians = np.nanmedian(X_all, axis=0)
    inds = np.where(np.isnan(X_all))
    X_all[inds] = np.take(col_medians, inds[1])

# 3. Select exactly 6 input features
feature_cols = [
    "alcohol",
    "malic_acid",
    "ash",
    "alcalinity_of_ash",
    "flavanoids",
    "proline",
]

feature_indices = [feature_names.index(name) for name in feature_cols]
X = X_all[:, feature_indices]
y = wine.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [5]:
# 4. Feature scaling + 5. Train model (Logistic Regression)
pipeline = Pipeline(
    steps=[
        ("scaler", StandardScaler()),
        ("model", LogisticRegression(max_iter=1000)),
    ]
)

pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)

# 6. Evaluation
accuracy = accuracy_score(y_test, y_pred)
precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred, average="weighted")
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print("\nClassification Report:\n", report)

Accuracy: 0.9722
Precision: 0.9741
Recall: 0.9722
F1 Score: 0.9720

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       0.93      1.00      0.97        14
           2       1.00      0.90      0.95        10

    accuracy                           0.97        36
   macro avg       0.98      0.97      0.97        36
weighted avg       0.97      0.97      0.97        36



In [None]:
# 7. Save model
model_dir = Path("model")
model_dir.mkdir(parents=True, exist_ok=True)
model_path = model_dir / "wine_cultivar_model.pkl"
joblib.dump(pipeline, model_path)
model_path

WindowsPath('model/wine_cultivar_model.pkl')

: 