## Loading the libraries

In [1]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from keras.utils import np_utils
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import LabelEncoder
import seaborn as sns

## loading the data

In [2]:
!gdown 148pw0gWc4N2tCLGiMw_yW3pXbN1uso4N

Downloading...
From: https://drive.google.com/uc?id=148pw0gWc4N2tCLGiMw_yW3pXbN1uso4N
To: /content/Premium_Prediction.csv
100% 89.6M/89.6M [00:01<00:00, 77.7MB/s]


In [3]:
df = pd.read_csv('/content/Premium_Prediction.csv')

## Train and Test Split

In [4]:
train_len = df.shape[0]*0.80
test_len= df.shape[0]-train_len
test_len
train_set =df.iloc[0:int(train_len)]
test_set =df.iloc[int(train_len):]

In [5]:
train_set.shape,test_set.shape

((783404, 20), (195851, 20))

In [6]:
labels_categorical=train_set['POLICY_TYPE']
labels_continuous=train_set['TOTAL_PREMIUM']

In [7]:
X = train_set.drop(columns=['TOTAL_PREMIUM','POLICY_TYPE'])

In [8]:
X.shape,labels_categorical.shape,labels_continuous.shape

((783404, 18), (783404,), (783404,))

## Encoding the data

In [9]:
# using label encoder to encode the text to numbers
encoder = LabelEncoder()
encoder.fit(labels_categorical)
encoded_Y = encoder.transform(labels_categorical)
# convert integers to dummy variables (i.e. one hot encoded)
dummy_y = np_utils.to_categorical(encoded_Y)

In [10]:
X.shape,dummy_y.shape,labels_continuous.shape

((783404, 18), (783404, 15), (783404,))

## Model Architecture and Training

In [11]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, concatenate
from tensorflow.keras.models import Model

# Define inputs
input1 = Input(shape=(18,))

# Define layers
dense1 = Dense(256, activation='relu')(input1)
dense2 = Dense(64, activation='relu')(dense1)
dense3 = Dense(32, activation='relu')(dense2)

# Concatenate layers
merge = concatenate([dense1, dense2, dense3])

# Output layer for continuous variable
output1 = Dense(1, activation='relu', name='output1')(merge)

# Output layer for categorical variable
output2 = Dense(15, activation='softmax', name='output2')(merge)

# Define the model
model = Model(inputs=[input1], outputs=[output1, output2])

# Define loss functions for both outputs
losses = {
    "output1": "mse",
    "output2": "categorical_crossentropy"
}

# Compile the model with the loss functions and weights
model.compile(optimizer='adam', loss=losses, metrics=['mae','accuracy'])

# Train the model with your data
model.fit([X], [labels_continuous,dummy_y], epochs=10, batch_size=32,)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f6da6982e50>

## Model testing

In [12]:
# encoding for the test data
labels_categorical_test=test_set['POLICY_TYPE']
labels_continuous_test=test_set['TOTAL_PREMIUM']

In [13]:
X_test = test_set.drop(columns=['TOTAL_PREMIUM','POLICY_TYPE'])

In [14]:
encoder1 = LabelEncoder()
encoder1.fit(labels_categorical_test)
encoded_Y_test = encoder1.transform(labels_categorical_test)
# convert integers to dummy variables (i.e. one hot encoded)
dummy_y_test = np_utils.to_categorical(encoded_Y_test)

In [15]:
y1,y2 = model.predict(X_test)



## Evaluation

In [16]:
from sklearn.metrics import confusion_matrix,classification_report,f1_score

In [17]:
print(f1_score(dummy_y_test.argmax(axis=1), y2.argmax(axis=1),average='weighted'))

0.7482378629717167


In [38]:
l_policy_type_classes={0:'DP-1',1:'DP-3',2:"DW-2",3:'HO-3',4:'HO-4',5:'HO-6',6:'HO-8',7:'HW-2',8:'HW-4',9:'HW-6',10:'MD-1',11:'MDP-1',12:'MHO-3',13:'MHO-4',14:'MW-2'}

In [51]:
sample_test_case = X_test.sample()
sample_test_case

Unnamed: 0,HAS_POOL,IS_DWELLING_ON_MASONARY,IS_MOBILE_HOME,CONSTRUCTION,TOTAL_AREA_OF_BUILDING_SQFEET,REPLACEMENT_COST,OCCUPANCY,COVERAGE_A_DWELLING,COVERAGE_A_LOSS_SETTLEMENT,COVERAGE_C_PERSONAL_PROPERTY_AMOUNT,COVERAGE_C_LOSS_SETTLEMENT,COVERAGE_D_LOSS_OF_USE,SINKHOLE_LOSS_COVERAGE,HURRICANE_DEDUCTIBLE_AMOUNT,SINKHOLE_DEDUCTIBLE_AMOUNT,WIND_HAIL_DEDUCTIBLE_AMOUNT,MEDIAN_HOUSEHOLD_INCOME,PROPERTY_AGE
800363,0,1,0,1,1107.0,212300.0,0,208100.0,1,52030.0,0,20810.0,0,4162.0,0.0,0.0,46806.0,68


In [55]:
list(sample_test_case.columns)

['HAS_POOL',
 'IS_DWELLING_ON_MASONARY',
 'IS_MOBILE_HOME',
 'CONSTRUCTION',
 'TOTAL_AREA_OF_BUILDING_SQFEET',
 'REPLACEMENT_COST',
 'OCCUPANCY',
 'COVERAGE_A_DWELLING',
 'COVERAGE_A_LOSS_SETTLEMENT',
 'COVERAGE_C_PERSONAL_PROPERTY_AMOUNT',
 'COVERAGE_C_LOSS_SETTLEMENT',
 'COVERAGE_D_LOSS_OF_USE',
 'SINKHOLE_LOSS_COVERAGE',
 'HURRICANE_DEDUCTIBLE_AMOUNT',
 'SINKHOLE_DEDUCTIBLE_AMOUNT',
 'WIND_HAIL_DEDUCTIBLE_AMOUNT',
 'MEDIAN_HOUSEHOLD_INCOME',
 'PROPERTY_AGE']

In [42]:
## test the prediction
premium_predict,policy_type_predict= model.predict(sample_test_case)
print(f'Premium Predicted is: {premium_predict[0][0]}')
print(f'Policy Type Predicted is: {l_policy_type_classes[np.argmax(policy_type_predict)]}')


Premium Predicted is: 2439.609619140625
Policy Type Predicted is: HO-3


## Save the model

In [59]:
# save the model
model.save('final_model.h5')

In [60]:
## load the saved model
from tensorflow.keras.models import load_model
 
# load model
model = load_model('final_model.h5')
# summarize model.
model.summary()
# load dataset

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 18)]         0           []                               
                                                                                                  
 dense (Dense)                  (None, 256)          4864        ['input_1[0][0]']                
                                                                                                  
 dense_1 (Dense)                (None, 64)           16448       ['dense[0][0]']                  
                                                                                                  
 dense_2 (Dense)                (None, 32)           2080        ['dense_1[0][0]']                
                                                                                              