In [None]:
%matplotlib inline


# Linear Regression Example

This example uses the only the first feature of the `diabetes` dataset, in
order to illustrate a two-dimensional plot of this regression technique. The
straight line can be seen in the plot, showing how linear regression attempts
to draw a straight line that will best minimize the residual sum of squares
between the observed responses in the dataset, and the responses predicted by
the linear approximation.

The coefficients and the residual sum of squares are also calculated.


In [None]:
print(__doc__)

# Code source adapted from: Jaques Grobler
# License: BSD 3 clause

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error

# Load the diabetes dataset
diabetes = datasets.load_diabetes()

# Print the diabetes dimension
print("Diabetes dataset dimensions:", diabetes.data.shape)
print()

# Print the diabetes data (features and target)
diabetes_features_df = pd.DataFrame(diabetes.data)
print(diabetes_features_df)
diabetes_target_df = pd.DataFrame(diabetes.target)
print(diabetes_target_df)

In [None]:
# Use only one feature
diabetes_X = diabetes.data[:, np.newaxis, 2]

# Use all features
#diabetes_X = diabetes.data 

diabetes_X_df = pd.DataFrame(diabetes_X)
#print(diabetes_X_df)

# Split the data into training/validation sets
diabetes_X_train = diabetes_X[:-20]
diabetes_X_validation = diabetes_X[-20:]

# Print the diabetes train dimension
print("Diabetes X train dimension:", diabetes_X_train.data.shape)

# Print the diabetes validation dimension
print("Diabetes X validation dimension:", diabetes_X_validation.data.shape)
print()

#diabetes_X_train_df = pd.DataFrame(diabetes_X_train)
#diabetes_X_validation_df = pd.DataFrame(diabetes_X_validation)
#print(diabetes_X_train_df)
#print(diabetes_X_validation_df)

# Split the targets into training/validation sets
diabetes_y_train = diabetes.target[:-20]
diabetes_y_validation = diabetes.target[-20:]

# Print the diabetes train dimension
print("Diabetes y train dimension:", diabetes_y_train.data.shape)

# Print the diabetes validation dimension
print("Diabetes y validation dimension:", diabetes_y_validation.data.shape)
print()

#diabetes_y_train_df = pd.DataFrame(diabetes_y_train)
#diabetes_y_validation_df = pd.DataFrame(diabetes_y_validation)
#print(diabetes_y_train_df)
#print(diabetes_y_validation_df)


In [None]:
# Create linear regression object
#regr = linear_model.LinearRegression()

# Train the model using the training sets
#regr.fit(diabetes_X_train, diabetes_y_train)

# Make predictions using the validation set
#diabetes_y_pred = regr.predict(diabetes_X_validation)

# Print predictions 
#print(diabetes_y_pred)

In [None]:
# Create linear regression object
regr = linear_model.SGDRegressor(max_iter=10000, eta0=0.001)

# Train the model using the training sets
regr.fit(diabetes_X_train, diabetes_y_train)

# Make predictions using the validation set
diabetes_y_pred = regr.predict(diabetes_X_validation)

# Print predictions 
print(diabetes_y_pred)

In [None]:
# The coefficients
print('Coefficients: \n', regr.coef_)
print('Intercept: \n', regr.intercept_)
# The mean squared error
print("Mean squared error: %.2f"
      % mean_squared_error(diabetes_y_validation, diabetes_y_pred))

# Plot outputs
#plt.scatter(diabetes_X_train, diabetes_y_train, color='gray')
plt.scatter(diabetes_X_validation, diabetes_y_validation, color='black')
plt.plot(diabetes_X_validation, diabetes_y_pred, color='blue', linewidth=3)

plt.xticks()
plt.yticks()

plt.show()