<a href="https://colab.research.google.com/github/hamzehdehghan/DeepLearnign_1/blob/main/insurance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [8]:
df = pd.read_csv('/insurance.csv')

In [None]:
df.shape
df.head()
df.describe()
df.dtypes
df.isnull().sum()


In [None]:
sns.set(style='whitegrid')
f, ax = plt.subplots(1,1, figsize=(12,8))
ax = sns.distplot(df['charges'], kde = True, color = 'c')
plt.title('Distribution of Charges')
f, ax = plt.subplots(1,1, figsize=(12,8))
ax = sns.distplot(np.log10(df['charges']), kde= True, color = 'r')
plt.title('Distribution of Log Charges')

In [None]:
charges = df['charges'].groupby(df.region).sum().sort_values(ascending=True)
f, ax = plt.subplots(1,1, figsize=(12,8))
ax = sns.barplot(charges.head(), charges.head().index, palette='Blues')

In [None]:
f, ax = plt.subplots(1,1, figsize=(12, 8))
ax = sns.barplot(x='region', y='charges', hue='sex', data=df, palette='cool')

In [None]:
f, ax = plt.subplots(1,1, figsize=(12,8))
ax = sns.barplot(x = 'region', y = 'charges',
                 hue='smoker', data=df, palette='Reds_r')

In [None]:
f, ax = plt.subplots(1, 1, figsize=(12, 8))
ax = sns.barplot(x='region', y='charges', hue='children', data=df, palette='Set1')

In [None]:
ax = sns.lmplot(x = 'age', y = 'charges', data=df, hue='smoker', palette='Set1')
ax = sns.lmplot(x = 'bmi', y = 'charges', data=df, hue='smoker', palette='Set2')
ax = sns.lmplot(x = 'children', y = 'charges', data=df, hue='smoker', palette='Set3')

In [None]:
f, ax = plt.subplots(1, 1, figsize=(10, 10))
ax = sns.violinplot(x = 'children', y = 'charges', data=df,
                 orient='v', hue='smoker', palette='inferno')

##Converting objects labels into categorical

In [None]:
df[['sex', 'smoker', 'region']] = df[['sex', 'smoker', 'region']].astype('category')
df.dtypes

In [None]:
##Converting category labels into numerical using LabelEncoder
from sklearn.preprocessing import LabelEncoder
lblencoder = LabelEncoder()
lblencoder.fit(df.sex.drop_duplicates())
df.sex = lblencoder.transform(df.sex)
lblencoder.fit(df.smoker.drop_duplicates())
df.smoker = lblencoder.transform(df.smoker)
lblencoder.fit(df.region.drop_duplicates())
df.region = lblencoder.transform(df.region)
df.dtypes

In [None]:
f, ax = plt.subplots(1, 1, figsize=(10, 10))
ax = sns.heatmap(df.corr(), annot=True,  cmap='cool')

Linear Regression

In [None]:
from sklearn.model_selection import train_test_split as holdout
from sklearn.linear_model import LinearRegression
from sklearn import metrics
x = df.drop('charges', axis=1)
y = df['charges']
x_train, x_test, y_train, y_test = holdout(x, y, test_size=0.2, random_state=0)
Lin_reg = LinearRegression()
Lin_reg.fit(x_train, y_train)
print(Lin_reg.intercept_)
print(Lin_reg.coef_)
print(Lin_reg.score(x_test, y_test))

**Ridge Regression**

In [None]:
from sklearn.linear_model import Ridge
Ridge = Ridge(alpha=0.2)
Ridge.fit(x_train, y_train)
print(Ridge.intercept_)
print(Ridge.coef_)
print(Ridge.score(x_test, y_test))

Lasso Regression

In [None]:
from sklearn.linear_model import Lasso
Lasso = Lasso(alpha=0.2)
Lasso.fit(x_train, y_train)
print(Lasso.intercept_)
print(Lasso.coef_)
print(Lasso.score(x_test, y_test))

Random Forest Regressor

In [None]:
from sklearn.ensemble import RandomForestRegressor as rfr
Rfr = rfr(n_estimators = 100, criterion = 'mse',
                              random_state = 1,
                              n_jobs = -1)
Rfr.fit(x_train,y_train)
x_train_pred = Rfr.predict(x_train)
x_test_pred = Rfr.predict(x_test)

print('MSE train data: %.3f, MSE test data: %.3f' % 
      (metrics.mean_squared_error(x_train_pred, y_train),
       metrics.mean_squared_error(x_test_pred, y_test)))
print('R2 train data: %.3f, R2 test data: %.3f' % 
      (metrics.r2_score(y_train,x_train_pred ),
       metrics.r2_score(y_test,x_test_pred)))


In [None]:
plt.figure(figsize=(8,6))

plt.scatter(x_train_pred, x_train_pred - y_train,
          c = 'gray', marker = 'o', s = 35, alpha = 0.5,
          label = 'Train data')