## Train a Ridge Regression Model on the Diabetes Dataset

This notebook loads the Diabetes dataset from sklearn, splits the data into training and validation sets, trains a Ridge regression model, validates the model on the validation set, and saves the model.


In [None]:
https://github.com/microsoft/MLOpsPython/tree/master/experimentation

In [1]:
from sklearn.datasets import load_diabetes
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import joblib
import pandas as pd

In [None]:
# Load Data
sample_data = load_diabetes()

df = pd.DataFrame(
    data=sample_data.data,
    columns=sample_data.feature_names)
df['Y'] = sample_data.target
print(df.shape)

In [None]:
# All data in a single dataframe
df.describe()

In [None]:
#Split Data into Training and Validation Sets
X = df.drop('Y', axis=1).values
y = df['Y'].values

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=0)
data = {"train": {"X": X_train, "y": y_train},
        "test": {"X": X_test, "y": y_test}}

In [None]:
# Train Model on Training Set
# experiment parameters
args = {
    "alpha": 0.5
}

reg_model = Ridge(**args)
reg_model.fit(data["train"]["X"], data["train"]["y"])

In [None]:
# Validate Model on Validation Set

preds = reg_model.predict(data["test"]["X"])
mse = mean_squared_error(preds, y_test)
metrics = {"mse": mse}
print(metrics)

In [None]:
model_name = "sklearn_regression_model.pkl"

joblib.dump(value=reg, filename=model_name)

## Score Data with a Ridge Regression Model Trained on the Diabetes Dataset

This notebook loads the model trained in the Diabetes Ridge Regression Training notebook, prepares the data, and scores the data.

In [None]:
import json
import numpy
from azureml.core.model import Model
import joblib

In [None]:
# Load Model
model_path = Model.get_model_path(model_name="sklearn_regression_model.pkl")
model = joblib.load(model_path)

In [None]:
# Prepare Data
raw_data = '{"data":[[1,2,3,4,5,6,7,8,9,10],[10,9,8,7,6,5,4,3,2,1]]}'

data = json.loads(raw_data)["data"]
data = numpy.array(data)

In [None]:
# Score Data
request_headers = {}

result = model.predict(data)
print("Test result: ", {"result": result.tolist()})