## Problem Statement
- Problem Statement :-  Predicting if a person would buy life insurnace based on his age(58) using logistic regression.
Above is a logistic regression problem as there are only two possible outcomes (i.e. if person buys insurance or he/she doesn't).
Predict for age 58.

### Import required packages

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### Get the data from the file

In [5]:
df = pd.read_csv('insurance_data.csv')
print(df)

    age  bought_insurance
0    22                 0
1    25                 0
2    47                 1
3    52                 0
4    46                 1
5    56                 1
6    55                 0
7    60                 1
8    62                 1
9    61                 1
10   18                 0
11   28                 0
12   27                 0
13   29                 0
14   49                 1
15   55                 1
16   25                 1
17   58                 1
18   19                 0
19   18                 0
20   21                 0
21   26                 0
22   40                 1
23   45                 1
24   50                 1
25   54                 1
26   23                 0


### Check for NA values in file

In [6]:
df.isna().sum()

age                 0
bought_insurance    0
dtype: int64

### EDA

In [7]:
df.head()

Unnamed: 0,age,bought_insurance
0,22,0
1,25,0
2,47,1
3,52,0
4,46,1


In [8]:
df.tail()

Unnamed: 0,age,bought_insurance
22,40,1
23,45,1
24,50,1
25,54,1
26,23,0


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27 entries, 0 to 26
Data columns (total 2 columns):
 #   Column            Non-Null Count  Dtype
---  ------            --------------  -----
 0   age               27 non-null     int64
 1   bought_insurance  27 non-null     int64
dtypes: int64(2)
memory usage: 560.0 bytes


In [10]:
df.describe()

Unnamed: 0,age,bought_insurance
count,27.0,27.0
mean,39.666667,0.518519
std,15.745573,0.509175
min,18.0,0.0
25%,25.0,0.0
50%,45.0,1.0
75%,54.5,1.0
max,62.0,1.0


In [11]:
df.cov()

Unnamed: 0,age,bought_insurance
age,247.923077,5.871795
bought_insurance,5.871795,0.259259


In [12]:
df.corr()

Unnamed: 0,age,bought_insurance
age,1.0,0.732395
bought_insurance,0.732395,1.0


### Prepare the data

In [13]:
# decide the independent variables
# x = df.drop('bought_insurance',axis=1)
x = df[['age']]
# decide the dependent variables
y = df['bought_insurance']

### Split the data

In [14]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8, random_state=12345)

### 

### Create the model

In [15]:
from sklearn.linear_model import LogisticRegressionCV

# create empty model
model = LogisticRegressionCV()

# train the model
model.fit(x,y)


### Evaluate the model

In [16]:
# consider the y_test as y_true
y_true = y_test

# predict the values on x_test
y_pred = model.predict(x_test)


In [17]:
y_true

23    1
8     1
10    0
18    0
12    0
21    0
Name: bought_insurance, dtype: int64

In [18]:
y_pred

array([1, 1, 0, 0, 0, 0], dtype=int64)

In [19]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

mae = mean_absolute_error(y_true, y_pred)
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
r2_score = r2_score(y_true, y_pred)

In [20]:
print(f"mae = {mae}")
print(f"mse = {mse}")
print(f"rmse = {rmse}")
print(f"r2_score = {r2_score}")

mae = 0.0
mse = 0.0
rmse = 0.0
r2_score = 1.0


In [21]:
# confusion matrix
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_true, y_pred)
cm

array([[4, 0],
       [0, 2]], dtype=int64)

In [22]:
accuracy = (cm[0][0] + cm[1][1]) / (cm[0][0] + cm[0][1] + cm[1][0] + cm[1][1])
accuracy

1.0

In [23]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_true, y_pred)
accuracy

1.0

In [24]:
tn, fp, fn, tp = cm.ravel()
print(tn)
print(fp)
print(fn)
print(tp)

4
0
0
2


### Predict the model for age 58

In [25]:
age = 58
insurance_prediction = model.predict([[age]])
print(insurance_prediction[0])

if insurance_prediction[0] == 1:
    print(f"This person will buy an insurance")
else:
    print(f"This person will not buy an insurance")

1
This person will buy an insurance


