In [146]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from pandas_profiling import ProfileReport
from sklearn.ensemble import GradientBoostingRegressor, AdaBoostRegressor
from sklearn.model_selection import train_test_split,  GridSearchCV
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_squared_error as MSE
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from pandas.plotting import scatter_matrix
from sklearn.metrics import r2_score, classification_report
from sklearn.linear_model import ElasticNet, ElasticNetCV
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA

import ipywidgets as widgets
from IPython.display import display, clear_output
from pandas.io.formats import style
from IPython.core.display import display
from ipywidgets import Button, GridBox, Layout, ButtonStyle


%matplotlib inline

initial_df = pd.read_csv("insurance.csv")

# one hot encoding
df = pd.get_dummies(initial_df, drop_first=True)
# dodanie kategorii otyły (w USA wg jakiegoś medycznego instytu jest po prostu powyżej 30
df["obese"] = df["bmi"].apply(lambda x: 1 if x >= 30 else 0)
# dodanie kategorii otyły i palący
df["obese_smoker"] = df[["obese", "smoker_yes"]].apply(
    lambda x: 1 if (x["obese"] == 1) & (x["smoker_yes"] == 1) else 0, axis=1
)

  from IPython.core.display import display


In [147]:
initial_df.columns

Index(['age', 'sex', 'bmi', 'children', 'smoker', 'region', 'charges'], dtype='object')

In [148]:
df.columns

Index(['age', 'bmi', 'children', 'charges', 'sex_male', 'smoker_yes',
       'region_northwest', 'region_southeast', 'region_southwest', 'obese',
       'obese_smoker'],
      dtype='object')

In [149]:
#widgets
style = {'description_width': 'initial'}

widget_sex = widgets.Select(
    options = sorted(initial_df['sex'].unique().tolist()),
    description = 'Sex',
    style = style,
    disabled = False,
    layout = {'width': 'max-content'})

widget_region = widgets.Select(
    options = sorted(initial_df['region'].unique().tolist()),
    description = 'Region',
    style = style,
    disabled = False,
    layout = {'width': 'max-content'})

widget_age = widgets.IntSlider(
    min = initial_df['age'].min(),
    max = 100,
    description = 'Age',
    style = style)

widget_height = widgets.IntSlider(
    min = 0,
    max = 200,
    description = 'Height [cm]',
    style = style)

widget_weight = widgets.FloatSlider(
    min = 0,
    max = 200,
    description = 'Weight [kg]',
    style = style)

widget_children = widgets.IntSlider(
    min = initial_df['children'].min(),
    max = 8,
    description = 'Children',
    style = style)

widget_smoker = widgets.RadioButtons(
    options=[('Yes',1), ('No',0)],
    style=style,
    disabled = False,
    description='Smoker')

# widget_smoker = widgets.Select(
#     options = ['Yes', 'No'],
#     description = 'Smoker',
#     style = style,
#     disabled = False,
#     layout = {'width': 'max-content'})

In [154]:
#widget handler

values = {'sex_dropdown': widget_sex.value,
          'region_dropdown': widget_region.value,
          'age_slider': widget_age.value,
          'height_slider': widget_height.value,
          'weight_slider': widget_weight.value,
          'children_slider': widget_children.value,
          'smoker_button': widget_smoker.value}
def widgets_handler(sex_dropdown_val,
                    region_dropdown_val,
                    age_slider_val,
                    height_slider_val,
                    weight_slider_val,
                    children_slider_val,
                    smoker_button_val):
    values['sex_dropdown'] = sex_dropdown_val
    values['region_dropdown'] = region_dropdown_val
    values['age_slider'] = age_slider_val
    values['height_slider'] = height_slider_val
    values['weight_slider'] = weight_slider_val
    values['children_slider'] = children_slider_val
    values['smoker_button'] = smoker_button_val

In [155]:
#run button
run_button = widgets.Button(description="Predict")
from IPython.display import clear_output

def button_callback(button):
    age=values['age_slider']
    bmi=values['weight_slider']/(values['height_slider']/100)**2
    children=values['children_slider']
    smoker_yes=values['smoker_button']
    if values['region_dropdown']=='northwest':
        region_northwest=1
        region_southeast=0
        region_southwest=0
    elif values['region_dropdown']=='southeast':
        region_northwest=0
        region_southeast=1
        region_southwest=0
    elif values['region_dropdown']=='southwest':
        region_northwest=0
        region_southeast=0
        region_southwest=1
    else:
        region_northwest=0
        region_southeast=0
        region_southwest=0
    if bmi>=30:
        obese=1
    else:
        obese=0
    if bmi>=30 & smoker_yes==1:
        obese_smoker=1
    else:
        obese_smoker=0
    if values['sex_dropdown']=='male':
        sex_male=1
    else:
        sex_male=0
        
    values_to_predict= [age, bmi, children, sex_male, smoker_yes, region_northwest, region_southeast,
          region_southwest, obese, obese_smoker]
    columns=['age', 'bmi', 'children', 'sex_male', 'smoker_yes',
       'region_northwest', 'region_southeast', 'region_southwest', 'obese', 'obese_smoker']
    to_predict=pd.DataFrame([values_to_predict],columns=columns)
    return print(xgb_model.predict(to_predict))
run_button.on_click(button_callback)

In [156]:
filters = GridBox(children=[widget_sex, widget_region, widget_smoker, widget_age, 
                            widget_height, widget_weight, widget_children,
                            run_button],
                    layout=Layout(
                    width='100%',
                    grid_template_columns='300px 300px 300px',
                    grid_template_rows='auto auto auto',
                    grid_gap='5px 10px')
               )

In [157]:
display(filters)

GridBox(children=(Select(description='Sex', layout=Layout(width='max-content'), options=('female', 'male'), st…

[38011.125]


In [118]:
import pickle
# Load the Model back from file
Pkl_Ridge = 'pickled_ridge_model.pkl'  
Pkl_EN = 'pickled_en_model.pkl'  
Pkl_XGB = 'pickled_xgb_model.pkl'  
Pkl_Stacked = 'pickled_stacked_model.pkl'
Pkl_data='pickled_data'


with open(Pkl_Ridge, 'rb') as file1:  
    ridge_model = pickle.load(file1)

with open(Pkl_EN, 'rb') as file2:  
    en_model = pickle.load(file2)
    
with open(Pkl_XGB, 'rb') as file3:  
    xgb_model = pickle.load(file3)

with open(Pkl_Stacked, 'rb') as file4:  
    stacked_model = pickle.load(file4)
    
with open(Pkl_data, 'rb') as file4:  
    df = pickle.load(file4)


In [15]:
from sklearn.model_selection import train_test_split

X = df.drop('charges', axis=1)
y = df['charges']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X

Unnamed: 0,age,bmi,children,sex_male,smoker_yes,region_northwest,region_southeast,region_southwest,obese,obese_smoker
0,19,27.900,0,0,1,0,0,1,0,0
1,18,33.770,1,1,0,0,1,0,1,0
2,28,33.000,3,1,0,0,1,0,1,0
3,33,22.705,0,1,0,1,0,0,0,0
4,32,28.880,0,1,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
1333,50,30.970,3,1,0,1,0,0,1,0
1334,18,31.920,0,0,0,0,0,0,1,0
1335,18,36.850,0,0,0,0,1,0,1,0
1336,21,25.800,0,0,0,0,0,1,0,0


In [11]:
ridge_model.score(X_test,y_test)

0.8712125654141842

In [12]:
en_model.score(X_test,y_test)

0.8711719305610985

In [13]:
xgb_model.score(X_test,y_test)

0.8730627049627262

In [14]:
# bardziej powtarzalny, lepszy wynik z CV
stacked_model.score(X_test,y_test)

0.8728794330786501

In [None]:
en_model.predict(X_train)

In [30]:
en_model.predict(pd.DataFrame([[20,25, 3, 0, 1, 1, 0, 0, 1, 1]], columns=['age', 'bmi', 'children', 'sex_male', 'smoker_yes',
       'region_northwest', 'region_southeast', 'region_southwest', 'obese', 'obese_smoker']))

array([37101.0327961])

In [None]:
({'age':20,'bmi':25, 'children':3,'sex_male':0,'smoker_yes':1,
                  'region_northwest':1,'region_southeast':0,'region_southwest':0,'obese':1,'obese_smoker':1}