# Notebook from Data Scientist with E2E scenario for Wine dataset

Expected Steps:
- download
- clean/preprocess
- train/hyperparam tunning with results in mlflow+minio
- seldon serving
- example inference

Artefacts:
- raw data, preprocessed
- model per experiment
- experiment metadata and results

In [None]:
!pip install mlflow==1.30 boto3 awscli pyarrow scikit-learn

# Download data

In [None]:
!wget https://raw.githubusercontent.com/Barteus/kubeflow-examples/main/e2e-wine-kfp-mlflow/winequality-red.csv

# Preprocess data

In [None]:
file_path = 'winequality-red.csv'

import pandas as pd
df = pd.read_csv(file_path, header=0, sep=";")
df.columns = [c.lower().replace(' ', '_') for c in df.columns]

# Train model

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import mlflow
from sklearn.linear_model import ElasticNet

# Hint!
# df = pd.read_parquet(file_path)

target_column='quality'
train_x, test_x, train_y, test_y = train_test_split(df.drop(columns=[target_column]),
                                                df[target_column], test_size=.25,
                                                random_state=1337, stratify=df[target_column])
mlflow.sklearn.autolog()
with mlflow.start_run(run_name='elastic_net_models'):
    alpha =  0.5
    l1_ratio =  0.5
    lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
    lr.fit(train_x, train_y)
    result = mlflow.sklearn.log_model(lr, "model", registered_model_name="wine-elasticnet")
    print(f"{mlflow.get_artifact_uri()}/{result.artifact_path}")

In [None]:
!pip freeze > nb-requirements.txt