In [42]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import BayesianRidge
import pickle

In [8]:
csv_url =\
        'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv'
try:
    data = pd.read_csv(csv_url, sep=';')
except Exception as e:
    logger.exception(
        "Unable to download training & test CSV, check your internet connection. Error: %s", e)

In [9]:
data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [10]:
data.shape

(4898, 12)

In [27]:
# Split into training and test data
y = data.quality
X = data.drop('quality', axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.5)

In [31]:
# Parameters
alpha_1=1e-06
alpha_2=1e-06
n_iter=500

In [33]:
# Create and train model
model = BayesianRidge(alpha_1=alpha_1, alpha_2=alpha_2, n_iter=n_iter)
model.fit(X_train, y_train)

BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=False, copy_X=True,
              fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, n_iter=500,
              normalize=False, tol=0.001, verbose=False)

In [46]:
X_test.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
2714,6.8,0.24,0.37,7.45,0.043,59.0,188.0,0.99579,3.2,0.5,9.4
2316,7.4,0.2,0.31,1.6,0.038,34.0,116.0,0.9912,3.25,0.39,12.0
965,8.3,0.23,0.43,3.2,0.035,14.0,101.0,0.9928,3.15,0.36,11.5
2157,8.5,0.25,0.27,4.7,0.031,31.0,92.0,0.9922,3.01,0.33,12.0
4099,6.4,0.5,0.2,2.4,0.059,19.0,112.0,0.99314,3.18,0.4,9.2


In [36]:
predicted_qualities = model.predict(X_test)

In [37]:
predicted_qualities

array([5.69516903, 6.41381109, 6.0253786 , ..., 5.34225972, 6.34697361,
       6.69437413])

In [38]:
# Metrics
r2_score(y_test, predicted_qualities)

0.269229226736417

In [39]:
# Metrics
mean_squared_error(y_test, predicted_qualities)

0.5784097956150277

In [40]:
# Metrics
mean_absolute_error(y_test, predicted_qualities)

0.5916633253997684

In [43]:
# save the model to disk
filename = 'finalized_model.sav'
pickle.dump(model, open(filename, 'wb'))

In [44]:
# some time later...
 
# load the model from disk
loaded_model = pickle.load(open(filename, 'rb'))
predicted_qualities_loaded_model = loaded_model.predict(X_test)
predicted_qualities_loaded_model

array([5.69516903, 6.41381109, 6.0253786 , ..., 5.34225972, 6.34697361,
       6.69437413])