
# ML Workflow - Supervised Learning (Regression)

![Image](./img/scikit_learn.png)


In [None]:
# imports 

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn import datasets
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
import pickle
import joblib

from sklearn import linear_model
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import Ridge
from sklearn.svm import SVR
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import LinearRegression

## [Train-Test-Split](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html)

![Image](./img/train_valid_test.JPG)

In [None]:
# Load datasets

X, y = make_regression(n_samples=1000, n_features=10, random_state=42)
#X, y = datasets.load_diabetes(return_X_y=True)
print(X.shape, y.shape)

In [None]:
# Train and test

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"X_train: {X_train.shape}, X_test: {X_test.shape}, y_train: {y_train.shape}, y_test: {y_test.shape}")
#print(f"X_train: {type(X_train)}, X_test: {type(X_test)}, y_train: {type(y_train)}, y_test: {type(y_test)}")

---

## [Model Selection](https://scikit-learn.org/stable/supervised_learning.html)

![Image](./img/ml_map_regression.JPG)

[See full map](https://scikit-learn.org/stable/tutorial/machine_learning_map/index.html)

---

### Regression Estimators

- [Lasso](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lasso.html)

- [ElasticNet](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNet.html)

- [Ridge Regression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html)

- [SVR](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html)

- [SGD Regressor](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDRegressor.html)

- [Linear Regression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html)

In [None]:
%%time

# Model definition

model = linear_model.Lasso()
#model = ElasticNet()
#model = Ridge()
#model = SVR()
#model = SGDRegressor()
#model = LinearRegression()

hyperparameters = model.get_params()

print(type(model), '\n')
print('Model hyperparameters:', hyperparameters, '\n')

In [None]:
%%time

# Model training

model.fit(X_train, y_train)

print('Model:', model, '\n')
print('Model hyperparameters:', hyperparameters, '\n')
print('Model coefficients:', model.coef_, '\n')

In [None]:
%%time

# Model predictions

predictions = model.predict(X_test)

print(type(predictions))

In [None]:
# Visual check

check = pd.DataFrame({'Ground truth':y_test, 'Predictions':predictions, 'Diff':y_test-predictions})
check

In [None]:
check.reset_index(inplace=True)

check.plot(x='index', y=['Ground truth', 'Predictions'], kind='line', figsize=(15, 10));

---

### Model persistence

- __pickle.__ Serialize your machine learning algorithms and save (and later load) the serialized format to a file.

- __joblib.__ Saving and loading Python objects that make use of NumPy data structures, efficiently. Better options for machine learning algorithms that require a lot of parameters or store the entire dataset (like K-Nearest Neighbors).

In [None]:
# Save model using pickle

filename = './models/linearregression_001.sav'
pickle.dump(model, open(filename, 'wb'))
print('Your model has been saved with  pickle!!!')

In [None]:
# Load model using pickle

filename = './models/linearregression_001.sav'
loaded_model = pickle.load(open(filename, 'rb'))
print('Model coefficients:', model.coef_, '\n')
print('Loaded model coefficients:', loaded_model.coef_)

In [None]:
# Save model using joblib

filename = './models/linearregression_002.sav'
joblib.dump(model, filename)
print('Your model has been saved with joblib!!!')

In [None]:
# Load model using joblib

filename = './models/linearregression_002.sav'
loaded_model = joblib.load(filename)
print('Model coefficients:', model.coef_, '\n')
print('Loaded model coefficients:', loaded_model.coef_)