# 1. Import Libraries

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn import model_selection, linear_model, datasets
from sklearn.metrics import mean_squared_error

# 2. Read Data

In [3]:
diabetes = datasets.load_diabetes()

In [5]:
diabetes.keys()

dict_keys(['data', 'target', 'frame', 'DESCR', 'feature_names', 'data_filename', 'target_filename'])

In [10]:
print(diabetes['DESCR'])

.. _diabetes_dataset:

Diabetes dataset
----------------

Ten baseline variables, age, sex, body mass index, average blood
pressure, and six blood serum measurements were obtained for each of n =
442 diabetes patients, as well as the response of interest, a
quantitative measure of disease progression one year after baseline.

**Data Set Characteristics:**

  :Number of Instances: 442

  :Number of Attributes: First 10 columns are numeric predictive values

  :Target: Column 11 is a quantitative measure of disease progression one year after baseline

  :Attribute Information:
      - age     age in years
      - sex
      - bmi     body mass index
      - bp      average blood pressure
      - s1      tc, T-Cells (a type of white blood cells)
      - s2      ldl, low-density lipoproteins
      - s3      hdl, high-density lipoproteins
      - s4      tch, thyroid stimulating hormone
      - s5      ltg, lamotrigine
      - s6      glu, blood sugar level

Note: Each of these 10 feature va

In [11]:
diabetes['feature_names']

['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']

In [14]:
# diabetes['data']

In [16]:
diabetes_feature = diabetes.data

In [17]:
diabetes_target = diabetes.target

In [18]:
print(diabetes_feature.ndim)
print(diabetes_feature.shape)
print('----------------------')
print(diabetes_target.ndim)
print(diabetes_target.shape)

2
(442, 10)
----------------------
1
(442,)


# 3. Feature Selection

In [30]:
diabetes_X =  diabetes_feature[:,6:7]
# diabetes_X

In [29]:
# diabetes_target = diabetes_target.reshape(len(diabetes_target),1)

In [31]:
diabetes_Y = diabetes_target

In [32]:
print(diabetes_X.shape)
print(diabetes_Y.shape)

(442, 1)
(442,)


# 4. Split into Training & Test data

In [33]:
x_train, x_test, y_train, y_test = model_selection.train_test_split(diabetes_X, diabetes_Y, test_size=0.3, random_state=42)

# 5. Create Model object

In [34]:
model = linear_model.LinearRegression()

# 6. Train Model object

In [35]:
model.fit(x_train, y_train)

LinearRegression()

# 7. Test Model object

In [38]:
print('MSE(Train data): ', mean_squared_error(model.predict(x_train), y_train))

MSE(Train data):  5153.998507994996


In [39]:
print('MSE(Test data): ', mean_squared_error(model.predict(x_test), y_test))

MSE(Test data):  4672.8083916932055


In [40]:
print('MSRE(Test data): ', np.sqrt(mean_squared_error(model.predict(x_test), y_test)))

MSRE(Test data):  68.3579431499603


In [41]:
from tqdm import tqdm

for i in tqdm(range(len(diabetes.feature_names))):
    
    # 2. Feature selection
    diabetes_X =diabetes_feature[:,i:i+1]
    diabetes_Y = diabetes_target
    
    # 3. Train/Test split
    x_train, x_test, y_train, y_test = model_selection.train_test_split(diabetes_X, diabetes_Y, test_size=0.3, random_state=42)
    
    # 4. Create Model object
    model = linear_model.LinearRegression()
    # 5. Train Model
    model.fit(x_train, y_train)
    # 6. Test Model
    print(diabetes.feature_names[i] + ' Score')
    print('MSRE(Test data): ', np.sqrt(mean_squared_error(model.predict(x_test), y_test)))
    
    
    

100%|██████████| 10/10 [00:00<00:00, 247.26it/s]

age Score
MSRE(Test data):  72.49891644833504
sex Score
MSRE(Test data):  73.66993085687035
bmi Score
MSRE(Test data):  62.32926055201547
bp Score
MSRE(Test data):  66.61026730638127
s1 Score
MSRE(Test data):  71.07200560112936
s2 Score
MSRE(Test data):  72.08739740608164
s3 Score
MSRE(Test data):  68.3579431499603
s4 Score
MSRE(Test data):  66.43038449578502
s5 Score
MSRE(Test data):  55.17706473439357
s6 Score
MSRE(Test data):  68.09047955273391





In [None]:
$ pip install tqdm