Example from http://scikit-learn.org/stable/auto_examples/linear_model/plot_ols.html
    
Load test dataset (diabetes) and predict target using one input factor

Barry's Test for Rob C

In [None]:
print(__doc__)


# Code source: Jaques Grobler
# License: BSD 3 clause


import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score

# Barry
import pandas as pd


# Barry
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge

In [None]:
# Load the diabetes dataset
diabetes = datasets.load_diabetes()

diabetes

In [None]:
pd.DataFrame(diabetes.data)


In [None]:
pd.DataFrame(diabetes.target)


In [None]:
# Use only one feature
diabetes_X = diabetes.data[:, np.newaxis, 2]

# pd.DataFrame(diabetes_X)


In [None]:
# Split the data into training/testing sets
diabetes_X_train = diabetes_X[:-20]
diabetes_X_test = diabetes_X[-20:]

print(len(diabetes_X_train), len(diabetes_X_test))


In [None]:
# Split the targets into training/testing sets
diabetes_y_train = diabetes.target[:-20]
diabetes_y_test = diabetes.target[-20:]

print(len(diabetes_y_train), len(diabetes_y_test))


In [None]:
# Create linear regression object
regr = linear_model.LinearRegression()


In [None]:
# Train the model using the training sets
regr.fit(diabetes_X_train, diabetes_y_train)


In [None]:
# Make predictions using the testing set
diabetes_y_pred = regr.predict(diabetes_X_test)


In [None]:
# The coefficients
print('Coefficients: \n', regr.coef_)
# The mean squared error
print("Mean squared error: %.2f"
      % mean_squared_error(diabetes_y_test, diabetes_y_pred))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % r2_score(diabetes_y_test, diabetes_y_pred))

# Plot outputs
plt.scatter(diabetes_X_test, diabetes_y_test,  color='black')
plt.plot(diabetes_X_test, diabetes_y_pred, color='blue', linewidth=3)

plt.xticks(())
plt.yticks(())

plt.show()



# Polynomial Model

In [None]:
# Create linear regression object

# regr = linear_model.LinearRegression()

# Set poloymomial order
var_poly_order = 2

polynomial_features = PolynomialFeatures(degree=var_poly_order, include_bias=False)
linear_regression = linear_model.LinearRegression()
# linear_regression = linear_model.Ridge(alpha=0.2)

regr_new = Pipeline([("polynomial_features", polynomial_features), ("linear_regression", linear_regression)])



In [None]:
my_df = pd.DataFrame(np.arange(0, 400, 1))
my_df.columns = ['x_value']
my_df['x_squared'] = my_df['x_value'] ** 2

X_train = my_df['x_value'].values[:-20]
X_test = my_df['x_value'].values[-20:]
y_train = my_df['x_value'].values[:-20]
y_test = my_df['x_value'].values[-20:]



In [None]:
X_train.reshape(1, -1).shape

In [None]:
X_test.reshape(1, -1).shape

In [None]:
y_train.reshape(-1,).shape