In [1]:

import os
import numpy as np
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report
from joblib import dump

OUTPUT_MODEL_PATH = os.path.join("model", "wine_cultivar_model.pkl")
METRICS_PATH = os.path.join("model", "metrics.txt")

# 6 chosen features
FEATURES = ["alcohol", "malic_acid", "alcalinity_of_ash", "flavanoids", "color_intensity", "proline"]

def load_data_external(csv_path="data/wine.csv"):
    """If you want to use an external CSV (not necessary). Expect same column names as FEATURES + 'target'."""
    df = pd.read_csv(csv_path)
    X = df[FEATURES]
    y = df["cultivar"] if "cultivar" in df.columns else df["target"]
    return X, y

def load_data_sklearn():
    data = load_wine(as_frame=True)
    df = data.frame
    X = df[FEATURES]
    y = df["target"]   # 0,1,2
    return X, y

def train_and_save():
    X, y = load_data_sklearn()  # change to load_data_external(...) if using your CSV
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

    pipeline = Pipeline([
        ("imputer", SimpleImputer(strategy="median")),
        ("scaler", StandardScaler()),
        ("clf", RandomForestClassifier(n_estimators=200, random_state=42, n_jobs=-1))
    ])

    pipeline.fit(X_train, y_train)
    preds = pipeline.predict(X_test)

    acc = accuracy_score(y_test, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(y_test, preds, average="macro")
    report = classification_report(y_test, preds, digits=4)

    os.makedirs("model", exist_ok=True)
    dump(pipeline, OUTPUT_MODEL_PATH)

    with open(METRICS_PATH, "w") as f:
        f.write(f"Accuracy: {acc:.4f}\n")
        f.write(f"Precision (macro): {precision:.4f}\n")
        f.write(f"Recall (macro): {recall:.4f}\n")
        f.write(f"F1 (macro): {f1:.4f}\n\n")
        f.write("Classification report:\n")
        f.write(report)

    print("Model saved to:", OUTPUT_MODEL_PATH)
    print("Metrics saved to:", METRICS_PATH)
    print("Accuracy:", acc)
    print("Precision (macro):", precision)
    print("Recall (macro):", recall)
    print("F1 (macro):", f1)
    print("\nClassification report:\n", report)

if __name__ == "__main__":
    train_and_save()


Model saved to: model/wine_cultivar_model.pkl
Metrics saved to: model/metrics.txt
Accuracy: 0.9722222222222222
Precision (macro): 0.9777777777777779
Recall (macro): 0.9666666666666667
F1 (macro): 0.9709618874773139

Classification report:
               precision    recall  f1-score   support

           0     1.0000    1.0000    1.0000        12
           1     0.9333    1.0000    0.9655        14
           2     1.0000    0.9000    0.9474        10

    accuracy                         0.9722        36
   macro avg     0.9778    0.9667    0.9710        36
weighted avg     0.9741    0.9722    0.9720        36

