In [4]:
import streamlit as st
import pandas as pd
import numpy as np
import time

import matplotlib.pyplot as plt # plotting library
import seaborn as sns # pretty plotting

sns.set(rc={'figure.figsize':(20,10)})

from sklearn.linear_model import LinearRegression # linear regression package
from sklearn.model_selection import train_test_split # split dataset

from sklearn.linear_model import Lasso

from sklearn.linear_model import Ridge

from sklearn.metrics import mean_squared_error as mse

In [6]:
# insurance_df = pd.read_csv('insurance_regression.csv')
insurance_df = pd.read_csv('data/insurance_regression.csv').reset_index()

insurance_df.head()

Unnamed: 0,index,age,sex,bmi,children,smoker,region,charges
0,0,19,female,27.9,0,yes,southwest,16884.924
1,1,18,male,33.77,1,no,southeast,1725.5523
2,2,28,male,33.0,3,no,southeast,4449.462
3,3,33,male,22.705,0,no,northwest,21984.47061
4,4,32,male,28.88,0,no,northwest,3866.8552


In [7]:
columns = ['age','sex', 'bmi', 'children', 'smoker', 'region']
target = ['charges']

X = insurance_df[columns]
y = insurance_df[target]

X_dummies = pd.get_dummies(X)

X_final = X_dummies.drop(['sex_male', 'smoker_no'], axis=1)

X_final.head()

Unnamed: 0,age,bmi,children,sex_female,smoker_yes,region_northeast,region_northwest,region_southeast,region_southwest
0,19,27.9,0,1,1,0,0,0,1
1,18,33.77,1,0,0,0,0,1,0
2,28,33.0,3,0,0,0,0,1,0
3,33,22.705,0,0,0,0,1,0,0
4,32,28.88,0,0,0,0,1,0,0


In [8]:
X_train, X_test, y_train, y_test = train_test_split(X_final, y, test_size = 0.3, random_state = 42)

In [9]:
model = LinearRegression() 

In [10]:
model.fit(X_train, y_train)

LinearRegression()

In [11]:
print(model.score(X_test, y_test))

0.7696118054369008


In [12]:
import joblib

In [13]:
# saving the model
filename = 'model/finalized_model.sav'
joblib.dump(model, filename)

['model/finalized_model.sav']

In [14]:
loaded_model = joblib.load(filename)

In [15]:
loaded_model.predict(X_test)

array([[ 9.01636753e+03],
       [ 7.01997635e+03],
       [ 3.68708343e+04],
       [ 9.51853774e+03],
       [ 2.69740308e+04],
       [ 1.10808462e+04],
       [-4.06177600e+01],
       [ 1.71922331e+04],
       [ 9.74950536e+02],
       [ 1.13299594e+04],
       [ 2.80574861e+04],
       [ 9.47838799e+03],
       [ 4.98095281e+03],
       [ 3.84564701e+04],
       [ 4.03433327e+04],
       [ 3.71038753e+04],
       [ 1.51982019e+04],
       [ 3.57689693e+04],
       [ 8.88568387e+03],
       [ 3.13835989e+04],
       [ 3.79641903e+03],
       [ 1.02628964e+04],
       [ 2.37593679e+03],
       [ 7.26280819e+03],
       [ 1.12875675e+04],
       [ 1.31383480e+04],
       [ 1.46241614e+04],
       [ 6.07324604e+03],
       [ 1.00361537e+04],
       [ 1.97927814e+03],
       [ 9.07103928e+03],
       [ 1.31369887e+04],
       [ 4.30868680e+03],
       [ 3.21105955e+03],
       [ 4.48957162e+03],
       [ 1.33774493e+04],
       [ 1.82221028e+03],
       [ 8.79191682e+03],
       [ 3.3

In [14]:
# get coefficients
coefficient = model.coef_

# get intercept
intercept = model.intercept_

In [15]:
coefficient

array([[  261.29692414,   348.90691516,   424.11912829,  -104.81182299,
        23628.36722236,   596.05658924,   109.12197876,  -374.91224933,
         -330.26631867]])

In [16]:
intercept

array([-12865.20866928])

In [18]:
model.score(X_final, y)

0.7506272930769431

In [19]:
y_predicted = model.predict(X_test)

In [24]:
# put coefficients into dataframe
# pair the feature names with the coefficients
coefficients_pd = pd.DataFrame((list(zip(columns, model.coef_ * 1000))), columns = ['Features', 'Coefficients'])

# print table of coefficients for contribution chart
coefficients_pd

Unnamed: 0,Features,Coefficients
0,age,"[261296.92414191976, 348906.91516017495, 42411..."


In [25]:
coefficients_pd.sort_values('Coefficients').set_index('Features').plot(kind = 'barh', color = 'g')

TypeError: no numeric data to plot

In [1]:
# We input new advertising data into the model to predict future sales

# Sample TV, Radio, Newspaper
new_data = [[10, 30, 100]]
model.predict(new_data)

NameError: name 'model' is not defined

In [14]:
!pip install pipreqs



In [16]:
!pipreqs

