In [None]:
from sklearn import datasets, linear_model, model_selection

# load the data
diabetes = datasets.load_diabetes()

# target
y = diabetes.target

# features
X = diabetes.data

In [None]:
# the first patient has index 0
print(y[0])

In [None]:
# lets look at the first patients data
print(
  dict(zip(diabetes.feature_names, X[0]))
)

In [None]:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

# values of alpha
x = np.linspace(1, 20, 100)

# probabilities
p_X = stats.gamma.pdf(x=x, a=1, loc=1, scale=2)

plt.plot(x,p_X)
plt.xlabel('alpha')
plt.ylabel('P(alpha)')

In [None]:
# n sample values
n_iter = 100

# sample from the gamma distribution
samples = stats.gamma.rvs(a=1, loc=1, scale=2, size=n_iter, random_state=100)

In [None]:
# visualise the sample distribution
plt.hist(samples)
plt.xlabel('alpha')
plt.ylabel('sample count')

In [None]:
# we will store the results inside a dictionary
result = {}

# for each sample
for sample in samples:
  
  # initialize a ridge regression estimator with alpha set to the sample value
  reg = linear_model.Ridge(alpha=sample)
  
  # conduct a 10-fold cross validation scoring on negative mean squared error
  cv = model_selection.cross_val_score(reg, X, y, cv=10, scoring='neg_mean_squared_error')
  
  # retain the result in the dictionary
  result[sample] = [cv.mean()]

In [None]:
import pandas as pd

# convert the result dictionary to a pandas dataframe, transpose and reset the index
df_result = pd.DataFrame(result).T.reset_index()

# give the columns sensible names
df_result.columns = ['alpha', 'mean_neg_mean_squared_error']

print(df_result.sort_values('mean_neg_mean_squared_error', ascending=False).head(5))

In [None]:
plt.scatter(df_result.alpha, df_result.mean_neg_mean_squared_error)
plt.xlabel('alpha')
plt.ylabel('-MSE')