In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

from sklearn.pipeline import make_pipeline
from category_encoders import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error

In [2]:
# %matplotlib notebook
%matplotlib widget

- ref from [Interactive results with jupyter notebooks](https://blog.neurotech.africa/interactive-results-with-jupyter-notebooks/)

In [3]:
df = pd.read_csv('insurance.csv')

In [4]:
print (df.shape)
df.head()

(1338, 7)


Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [5]:
# %building model pipeline

model = make_pipeline(
    OneHotEncoder(use_cat_names=True),
    LGBMRegressor()
#     LinearRegression()
)

In [6]:
X = df.drop(columns=['region', 'charges'])
y = df['charges']

In [7]:
# Slittin into Train & test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [8]:
# Checking if the data is splitted properly
print(f'X_train',X_train.shape)  #Printing the X_train Features Shape
print(f'y_train',y_train.shape)  #Printing the y_train Labels Shape
print(f'X_test',X_test.shape)    #Printing the X_test Features Shape
print(f'y_test',y_test.shape)    #Printing the y_test Labels  Shape

X_train (1070, 5)
y_train (1070,)
X_test (268, 5)
y_test (268,)


In [9]:
# # % training model to forecast medical costs

# model.fit(X,target)
model.fit(X_train,y_train)

In [10]:
# # evaluating model

# y_pred_training = model.predict(X)
# print("Training MAE:", mean_absolute_error(target,y_pred_training))
y_train_pred = model.predict(X_train)
print("Training MAE:", mean_absolute_error(y_train,y_train_pred))

Training MAE: 1891.4203513009877


In [11]:
# %import ipywidgests 
import ipywidgets as widgets 

### Widget 1

In [12]:
def make_prediction(age, gender, bmi, children, smoker):
    """
        Function to perform predictions of (medical costs)insurance charges depending on:
        1. age - age of primary beneficiary
        2. sex - insurance contractor gender, female, male
        3. bmi - Body mass index (kg / m ^ 2) using the ratio of height to weight, ideally 18.5 to 24.9
        4. children - Number of children covered by health insurance / Number of dependents
        5. smoker - smoking or not
    """
    df = pd.DataFrame({"age":age,"sex":gender,"bmi":bmi,"children":children, "smoker":smoker},index=[0])
    prediction = model.predict(df).round(2)[0]
    return f"Predicted total insurance cost: ${prediction}"

In [13]:
make_prediction(39,"male", 20, 3, "yes")
# outputs
# 'Predicted total insurance cost: $29564.47'

'Predicted total insurance cost: $17676.13'

In [14]:
widgets.interact(
                make_prediction,
                age=widgets.IntSlider(
                    min=X["age"].min(),
                    max=X["age"].max(),
                    value=X["age"].mean(),
                ),
    
                gender=widgets.Dropdown(options=sorted(X["sex"].unique())),

                bmi=widgets.FloatSlider(
                    min=X["bmi"].min(),
                    max=X["bmi"].max(),
                    step=0.01,
                    value=X["bmi"].mean(),
                ),

                children=widgets.IntSlider(
                    min=X["children"].min(),
                    max=X["children"].max(),
                    value=X["children"].mean(),
                ),

                smoker=widgets.Dropdown(options=sorted(X["smoker"].unique())),
            );

interactive(children=(IntSlider(value=39, description='age', max=64, min=18), Dropdown(description='gender', o…

### Widget 2

In [15]:
features = X.columns.tolist()
num_features = X.select_dtypes(exclude=["object"]).columns.tolist()
# features

In [16]:
def make_prediction_plot(test_tag, age, bmi, children, sex='male', smoker='yes'):
    """
        Function to perform predictions of (medical costs)insurance charges depending on:
        1. age - age of primary beneficiary
        2. sex - insurance contractor gender, female, male
        3. bmi - Body mass index (kg / m ^ 2) using the ratio of height to weight, ideally 18.5 to 24.9
        4. children - Number of children covered by health insurance / Number of dependents
        5. smoker - smoking or not
    """
    df = pd.DataFrame()
    for tag in num_features:
        df[tag] = [X[num_features].mean()]    
   
    num_points = 150
    tag = test_tag

    if test_tag == 'age':
        tag_start = 5
    elif test_tag =='bmi':
        tag_start= 10
    else:
        tag_start = X[tag].quantile(0.03)

        
    if test_tag == 'age':
        tag_stop = 120        
    elif test_tag == 'bmi':
        tag_stop = 50
    else:
        tag_stop = X[tag].quantile(0.97)

    tag_name = 'Test tag'
    
    sens_df = pd.concat([df.copy()] * num_points, ignore_index=False)
    sens_input_ranges = list(np.linspace(start=tag_start, stop=tag_stop, num=num_points))
    sens_df[tag] = sens_input_ranges.copy()
    
    # Get values from sliders:
    if test_tag != 'age':
        sens_df['age'] = age
    if test_tag != 'bmi':
        sens_df['bmi'] = bmi
    if test_tag != 'children':
        sens_df['children'] = children
    if test_tag != 'sex':
        sens_df['sex'] = sex
    if test_tag != 'smoker':
        sens_df['smoker'] = smoker
        
#     sens_df['DUL_norm'] = sens_df['DUL']/height
    
    prediction = model.predict(sens_df[features])
    df_res = pd.DataFrame()
    df_res['tag'] = sens_df[tag]
    df_res['target'] = prediction

    fig, ax = plt.subplots()
    ax.scatter(df_res['tag'], df_res['target'])
    ax.set_xlabel('{}'.format(test_tag))
    ax.set_ylabel('insurance cost')
    ax.set_title('Insurance cost simulation')      
    plt.show()

In [17]:
widgets.interact(
         make_prediction_plot, 
         test_tag=widgets.Dropdown(options=[
                                     'age', 'bmi', 'children', 'sex', 'smoker',
                                           ],
                               value='bmi',
                               description='Test tag:'
                             ),
         age=widgets.FloatSlider(min=1, max=120, step=1, value=30, description='Age'),
         bmi=widgets.FloatSlider(min=5, max=50, step=1, value=25, description='Bmi'),
         children=widgets.FloatSlider(min=1, max=100, step=1, value=10, description='Children'),
         sex=widgets.Dropdown(options=sorted(X["sex"].unique()), description='Sex'),
         smoker=widgets.Dropdown(options=sorted(X["smoker"].unique()), description='Smoker')
        );

interactive(children=(Dropdown(description='Test tag:', index=1, options=('age', 'bmi', 'children', 'sex', 'sm…