In [1]:
# Importing Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
# Importing Data
df = pd.read_csv('data.csv')

In [3]:
# Data
df.head()

Unnamed: 0,ID,City_Code,Region_Code,Accomodation_Type,Reco_Insurance_Type,Upper_Age,Lower_Age,Is_Spouse,Health_Indicator,Holding_Policy_Duration,Holding_Policy_Type,Reco_Policy_Cat,Reco_Policy_Premium,Response
0,1,C3,3213,Rented,Individual,36,36,No,X1,14+,3.0,22,11628.0,0
1,2,C5,1117,Owned,Joint,75,22,No,X2,,,22,30510.0,0
2,3,C5,3732,Owned,Individual,32,32,No,,1.0,1.0,19,7450.0,1
3,4,C24,4378,Owned,Joint,52,48,No,X1,14+,3.0,19,17780.0,0
4,5,C8,2190,Rented,Individual,44,44,No,X2,3.0,1.0,16,10404.0,0


In [4]:
# Data Dimensions
df.shape

(50882, 14)

In [5]:
# Data Info
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50882 entries, 0 to 50881
Data columns (total 14 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   ID                       50882 non-null  int64  
 1   City_Code                50882 non-null  object 
 2   Region_Code              50882 non-null  int64  
 3   Accomodation_Type        50882 non-null  object 
 4   Reco_Insurance_Type      50882 non-null  object 
 5   Upper_Age                50882 non-null  int64  
 6   Lower_Age                50882 non-null  int64  
 7   Is_Spouse                50882 non-null  object 
 8   Health_Indicator         39191 non-null  object 
 9   Holding_Policy_Duration  30631 non-null  object 
 10  Holding_Policy_Type      30631 non-null  float64
 11  Reco_Policy_Cat          50882 non-null  int64  
 12  Reco_Policy_Premium      50882 non-null  float64
 13  Response                 50882 non-null  int64  
dtypes: float64(2), int64(6

In [6]:
# Null Count
df.isnull().sum()

ID                             0
City_Code                      0
Region_Code                    0
Accomodation_Type              0
Reco_Insurance_Type            0
Upper_Age                      0
Lower_Age                      0
Is_Spouse                      0
Health_Indicator           11691
Holding_Policy_Duration    20251
Holding_Policy_Type        20251
Reco_Policy_Cat                0
Reco_Policy_Premium            0
Response                       0
dtype: int64

In [7]:
# Dropping Irrelevant Columns and those with high percentage of Null Values
df = df.drop(columns=['ID', 'Holding_Policy_Duration', 'Holding_Policy_Type', 'Lower_Age'], axis=1)

In [8]:
# Checking for Null Count
df.isnull().sum()

City_Code                  0
Region_Code                0
Accomodation_Type          0
Reco_Insurance_Type        0
Upper_Age                  0
Is_Spouse                  0
Health_Indicator       11691
Reco_Policy_Cat            0
Reco_Policy_Premium        0
Response                   0
dtype: int64

In [9]:
# Dropping Null Values
df.dropna(axis=0, how='any', inplace=True)

In [10]:
# Checking for Null Count after removing all Null containing Rows
df.isnull().sum()

City_Code              0
Region_Code            0
Accomodation_Type      0
Reco_Insurance_Type    0
Upper_Age              0
Is_Spouse              0
Health_Indicator       0
Reco_Policy_Cat        0
Reco_Policy_Premium    0
Response               0
dtype: int64

In [11]:
# Dimensions of Dataset after removing all Null containing Rows
df.shape

(39191, 10)

In [12]:
# Data
df.head()

Unnamed: 0,City_Code,Region_Code,Accomodation_Type,Reco_Insurance_Type,Upper_Age,Is_Spouse,Health_Indicator,Reco_Policy_Cat,Reco_Policy_Premium,Response
0,C3,3213,Rented,Individual,36,No,X1,22,11628.0,0
1,C5,1117,Owned,Joint,75,No,X2,22,30510.0,0
3,C24,4378,Owned,Joint,52,No,X1,19,17780.0,0
4,C8,2190,Rented,Individual,44,No,X2,16,10404.0,0
5,C9,1785,Rented,Individual,52,No,X2,22,15264.0,1


In [13]:
# Resetting Index
df.reset_index(drop=True, inplace=True)

In [14]:
# Data after Resetting Index
df.head()

Unnamed: 0,City_Code,Region_Code,Accomodation_Type,Reco_Insurance_Type,Upper_Age,Is_Spouse,Health_Indicator,Reco_Policy_Cat,Reco_Policy_Premium,Response
0,C3,3213,Rented,Individual,36,No,X1,22,11628.0,0
1,C5,1117,Owned,Joint,75,No,X2,22,30510.0,0
2,C24,4378,Owned,Joint,52,No,X1,19,17780.0,0
3,C8,2190,Rented,Individual,44,No,X2,16,10404.0,0
4,C9,1785,Rented,Individual,52,No,X2,22,15264.0,1


In [15]:
# Dataset Information
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39191 entries, 0 to 39190
Data columns (total 10 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   City_Code            39191 non-null  object 
 1   Region_Code          39191 non-null  int64  
 2   Accomodation_Type    39191 non-null  object 
 3   Reco_Insurance_Type  39191 non-null  object 
 4   Upper_Age            39191 non-null  int64  
 5   Is_Spouse            39191 non-null  object 
 6   Health_Indicator     39191 non-null  object 
 7   Reco_Policy_Cat      39191 non-null  int64  
 8   Reco_Policy_Premium  39191 non-null  float64
 9   Response             39191 non-null  int64  
dtypes: float64(1), int64(4), object(5)
memory usage: 3.0+ MB


In [16]:
# Features and Class
X = df.iloc[:, :-1]
y = df.iloc[:, 9]

In [17]:
# Feature Set
X.head()

Unnamed: 0,City_Code,Region_Code,Accomodation_Type,Reco_Insurance_Type,Upper_Age,Is_Spouse,Health_Indicator,Reco_Policy_Cat,Reco_Policy_Premium
0,C3,3213,Rented,Individual,36,No,X1,22,11628.0
1,C5,1117,Owned,Joint,75,No,X2,22,30510.0
2,C24,4378,Owned,Joint,52,No,X1,19,17780.0
3,C8,2190,Rented,Individual,44,No,X2,16,10404.0
4,C9,1785,Rented,Individual,52,No,X2,22,15264.0


In [18]:
# Class Set
y.head()

0    0
1    0
2    0
3    0
4    1
Name: Response, dtype: int64

### Categorical Encoding
1. Label Encoding - Accomodation_Type, Reco_Insurance_Type, Is_Spouse
2. Dummy Encoding - City_Code, Health_Indicator

In [19]:
# Label Encoding
enc = {
    'Accomodation_Type': {'Owned': 0, 'Rented': 1},
    'Reco_Insurance_Type': {'Individual': 0, 'Joint': 1},
    'Is_Spouse': {'No': 0, 'Yes': 1}
}

X.replace(enc, inplace=True)

In [20]:
# Dummy Encoding
cols = ['City_Code', 'Health_Indicator']
X = pd.get_dummies(X, columns=cols, drop_first=True)

In [21]:
# New Data
X.head()

Unnamed: 0,Region_Code,Accomodation_Type,Reco_Insurance_Type,Upper_Age,Is_Spouse,Reco_Policy_Cat,Reco_Policy_Premium,City_Code_C10,City_Code_C11,City_Code_C12,...,City_Code_C8,City_Code_C9,Health_Indicator_X2,Health_Indicator_X3,Health_Indicator_X4,Health_Indicator_X5,Health_Indicator_X6,Health_Indicator_X7,Health_Indicator_X8,Health_Indicator_X9
0,3213,1,0,36,0,22,11628.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1117,0,1,75,0,22,30510.0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
2,4378,0,1,52,0,19,17780.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2190,1,0,44,0,16,10404.0,0,0,0,...,1,0,1,0,0,0,0,0,0,0
4,1785,1,0,52,0,22,15264.0,0,0,0,...,0,1,1,0,0,0,0,0,0,0


In [22]:
# New Data Info
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39191 entries, 0 to 39190
Data columns (total 50 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Region_Code          39191 non-null  int64  
 1   Accomodation_Type    39191 non-null  int64  
 2   Reco_Insurance_Type  39191 non-null  int64  
 3   Upper_Age            39191 non-null  int64  
 4   Is_Spouse            39191 non-null  int64  
 5   Reco_Policy_Cat      39191 non-null  int64  
 6   Reco_Policy_Premium  39191 non-null  float64
 7   City_Code_C10        39191 non-null  uint8  
 8   City_Code_C11        39191 non-null  uint8  
 9   City_Code_C12        39191 non-null  uint8  
 10  City_Code_C13        39191 non-null  uint8  
 11  City_Code_C14        39191 non-null  uint8  
 12  City_Code_C15        39191 non-null  uint8  
 13  City_Code_C16        39191 non-null  uint8  
 14  City_Code_C17        39191 non-null  uint8  
 15  City_Code_C18        39191 non-null 

In [23]:
# Feature Scaling
mm_scaler = MinMaxScaler()
X = mm_scaler.fit_transform(X)

In [24]:
# Splitting into Training and Testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

### Modelling

In [25]:
# Artificial Neural Network
model = Sequential()

# Input Layer
input_layer = Dense(X_train.shape[0], input_shape=(X_train.shape[1], ), activation='relu', kernel_initializer='normal')

# Hidden Layers
hidden_layer_1 = Dense(256, activation='relu', kernel_initializer='normal')
hidden_layer_2 = Dense(128, activation='relu', kernel_initializer='normal')

# Output Layer
output_layer = Dense(1, activation='sigmoid', kernel_initializer='normal')

# Adding Layers to Neural Network
model.add(input_layer)
model.add(hidden_layer_1)
model.add(hidden_layer_2)
model.add(output_layer)

In [26]:
# Compiling Model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [27]:
# Model Summary
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 31352)             1598952   
_________________________________________________________________
dense_1 (Dense)              (None, 256)               8026368   
_________________________________________________________________
dense_2 (Dense)              (None, 128)               32896     
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 129       
Total params: 9,658,345
Trainable params: 9,658,345
Non-trainable params: 0
_________________________________________________________________


In [28]:
# Setting Up Callback
stopper = EarlyStopping(
    monitor='accuracy', 
    min_delta=0, patience=3, 
    verbose=0, 
    mode='auto', 
    baseline=None, 
    restore_best_weights=True
)

In [29]:
# Fitting Data
history = model.fit(X_train, y_train, epochs=32, batch_size=128, callbacks=[stopper])

Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32


In [30]:
# Model History
history.history

{'loss': [0.0, 0.0, 0.0, 0.0],
 'accuracy': [0.7591222524642944,
  0.7591222524642944,
  0.7591222524642944,
  0.7591222524642944]}

In [31]:
# Evaluation
model.evaluate(X_test, y_test)



[0.0, 0.7551983594894409]