In [None]:
# The following code trains a model to predict the cultivator of a wine
# based on the chemical analysis

# Split arrays or matrices into random train and test subsets
# https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html
from sklearn.model_selection import train_test_split

# Evaluate a score by cross-validation.
# https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.cross_val_score.html
from sklearn.model_selection import cross_val_score

# Construct a Pipeline from the given estimators.
# https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.make_pipeline.html
from sklearn.pipeline import make_pipeline

# Standardize features by removing the mean and scaling to unit variance
# https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html
from sklearn.preprocessing import StandardScaler

# Linear Support Vector Classification
# https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html
from sklearn.svm import LinearSVC

# Load and return the wine dataset (classification).
# https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_wine.html
from sklearn.datasets import load_wine

# Load the dataset as a pandas dataframe (i.e. two-dimensional data structure)
wines = load_wine(as_frame=True)
# print(wines.data)
print(wines.DESCR)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(wines.data, wines.target, random_state=42)

# Create the classifier and apply it to the extracted dataset
svm_clf = make_pipeline(StandardScaler(), LinearSVC(C=1, random_state=42))
svm_clf.fit(X_train, y_train)
# make_pipeline: Construct a pipeline
# Pipeline allows you to sequentially apply a list of transformers to preprocess the data and,
# if desired, conclude the sequence with a final predictor for predictive modeling.
# https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.make_pipeline.html
# https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html#sklearn.pipeline.Pipeline

# StandardScaler(): Standardize features by removing the mean and scaling to unit variance
# https://scikit-learn.org/1.6/modules/generated/sklearn.preprocessing.StandardScaler.html

# LinearSVC(C=1, random_state=42)): Linear Support Vector Classification
# https://scikit-learn.org/1.6/modules/generated/sklearn.svm.LinearSVC.html

# Use the model to make a prediction
# X_new = [[3.06, 1065.0], [0.61, 740.0]]
# print(svm_clf.predict(X_new))

# Assess the cross validation score
print(cross_val_score(svm_clf, X_train, y_train).mean())