**Basic Linear Regression on Diabetes Dataset**

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets,linear_model
from sklearn.metrics import mean_squared_error,r2_score

In [4]:
data = datasets.load_diabetes()

In [9]:
data

{'data': array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
          0.01990749, -0.01764613],
        [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
         -0.06833155, -0.09220405],
        [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
          0.00286131, -0.02593034],
        ...,
        [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
         -0.04688253,  0.01549073],
        [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
          0.04452873, -0.02593034],
        [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
         -0.00422151,  0.00306441]]),
 'target': array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
         69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
         68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
         87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
        259.,  53., 190., 142.,  75., 142., 155., 225.,  59

In [10]:
data.feature_names

['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']

In [12]:
print(data.DESCR)

.. _diabetes_dataset:

Diabetes dataset
----------------

Ten baseline variables, age, sex, body mass index, average blood
pressure, and six blood serum measurements were obtained for each of n =
442 diabetes patients, as well as the response of interest, a
quantitative measure of disease progression one year after baseline.

**Data Set Characteristics:**

:Number of Instances: 442

:Number of Attributes: First 10 columns are numeric predictive values

:Target: Column 11 is a quantitative measure of disease progression one year after baseline

:Attribute Information:
    - age     age in years
    - sex
    - bmi     body mass index
    - bp      average blood pressure
    - s1      tc, total serum cholesterol
    - s2      ldl, low-density lipoproteins
    - s3      hdl, high-density lipoproteins
    - s4      tch, total cholesterol / HDL
    - s5      ltg, possibly log of serum triglycerides level
    - s6      glu, blood sugar level

Note: Each of these 10 feature variables have bee

In [13]:
print(data.target)

[151.  75. 141. 206. 135.  97. 138.  63. 110. 310. 101.  69. 179. 185.
 118. 171. 166. 144.  97. 168.  68.  49.  68. 245. 184. 202. 137.  85.
 131. 283. 129.  59. 341.  87.  65. 102. 265. 276. 252.  90. 100.  55.
  61.  92. 259.  53. 190. 142.  75. 142. 155. 225.  59. 104. 182. 128.
  52.  37. 170. 170.  61. 144.  52. 128.  71. 163. 150.  97. 160. 178.
  48. 270. 202. 111.  85.  42. 170. 200. 252. 113. 143.  51.  52. 210.
  65. 141.  55. 134.  42. 111.  98. 164.  48.  96.  90. 162. 150. 279.
  92.  83. 128. 102. 302. 198.  95.  53. 134. 144. 232.  81. 104.  59.
 246. 297. 258. 229. 275. 281. 179. 200. 200. 173. 180.  84. 121. 161.
  99. 109. 115. 268. 274. 158. 107.  83. 103. 272.  85. 280. 336. 281.
 118. 317. 235.  60. 174. 259. 178. 128.  96. 126. 288.  88. 292.  71.
 197. 186.  25.  84.  96. 195.  53. 217. 172. 131. 214.  59.  70. 220.
 268. 152.  47.  74. 295. 101. 151. 127. 237. 225.  81. 151. 107.  64.
 138. 185. 265. 101. 137. 143. 141.  79. 292. 178.  91. 116.  86. 122.
  72. 

In [15]:
x_dia = data.data

In [16]:
X_dia_train = x_dia[:-30]
x_dia_test = x_dia[-30:]


In [17]:
y_dia_train = x_dia[:-30]
y_dia_test = x_dia[-30:]

In [18]:
print(len(X_dia_train),len(x_dia_test),len(y_dia_train),len(y_dia_test))

412 30 412 30


In [19]:
Model = linear_model.LinearRegression()

In [20]:
Model.fit(X_dia_train,y_dia_train)

In [22]:
print(Model.predict(x_dia_test))

[[ 7.44012909e-02 -4.46416365e-02  8.54080721e-02  6.31865972e-02
   1.49424745e-02  1.30909518e-02  1.55053592e-02 -2.59226200e-03
   6.20673545e-03  8.59065477e-02]
 [-5.27375548e-02 -4.46416365e-02 -8.16893766e-04 -2.63275281e-02
   1.08146159e-02  7.14113104e-03  4.86400995e-02 -3.94933829e-02
  -3.58161926e-02  1.96328371e-02]
 [ 8.16663678e-02  5.06801187e-02  6.72779075e-03 -4.53428147e-03
   1.09883222e-01  1.17056241e-01 -3.23559322e-02  9.18746074e-02
   5.47199725e-02  7.20651633e-03]
 [-5.51455498e-03 -4.46416365e-02  8.88341490e-03 -5.04274850e-02
   2.59500973e-02  4.72241342e-02 -4.34008457e-02  7.12099798e-02
   1.48209799e-02  3.06440941e-03]
 [-2.73097857e-02 -4.46416365e-02  8.00190118e-02  9.87512478e-02
  -2.94491268e-03  1.81013272e-02 -1.76293810e-02  3.31191734e-03
  -2.95264268e-02  3.62012647e-02]
 [-5.27375548e-02 -4.46416365e-02  7.13965152e-02 -7.45274418e-02
  -1.53284884e-02 -1.31387743e-03  4.46044580e-03 -2.14118336e-02
  -4.68825342e-02  3.06440941e-03

In [25]:
Dia_y_pred = Model.predict(x_dia_test)

In [26]:
mean_squared_error = mean_squared_error(y_dia_test,Dia_y_pred)
print("mean_squared_error " , mean_squared_error)

mean_squared_error  1.6094703714907663e-33


In [24]:
print("Weights ",Model.coef_)
print("Intercept ",Model.intercept_)

Weights  [[ 1.00000000e+00 -1.93228961e-16 -1.98281908e-16 -2.10824953e-16
   2.97631606e-16  8.49767980e-17  1.90628181e-16 -6.16908516e-17
  -4.74610496e-17 -1.17764332e-16]
 [-3.57887569e-16  1.00000000e+00 -2.22044605e-16  3.88578059e-16
  -5.55111512e-17  1.66533454e-16 -4.16333634e-17  5.55111512e-17
  -3.33066907e-16  1.66533454e-16]
 [ 2.57862618e-16 -4.44089210e-16  1.00000000e+00  1.80411242e-16
  -6.38378239e-16  5.68989300e-16 -6.93889390e-18  7.63278329e-16
  -1.52655666e-16  5.55111512e-17]
 [-6.47110379e-16  4.44089210e-16 -1.11022302e-16  1.00000000e+00
   2.49800181e-16 -3.05311332e-16  2.04697370e-16 -4.30211422e-16
   8.32667268e-17  3.33066907e-16]
 [-2.24415525e-17  8.32667268e-16  0.00000000e+00  1.66533454e-16
   1.00000000e+00  4.57966998e-16  2.11636264e-16 -4.16333634e-16
   4.02455846e-16  1.66533454e-16]
 [ 6.40947471e-17  1.66533454e-16 -1.80411242e-16 -2.91433544e-16
   1.94289029e-16  1.00000000e+00  1.11022302e-16  4.02455846e-16
  -1.38777878e-16 -1.526