# **Deployment Model to Production**

# 10.1. Insurance Class

In [1]:
import os 
import pickle
import pandas as pd
import numpy as np

class Insurance(): 
    def __init__(self):
        self.home_path = '/home/caroline/repos/pa-004/src/'
        self.annual_premium_scaler = pickle.load(open(self.home_path + 'features/annual_premium_scaler.pkl', 'rb'))
        self.age_scaler = pickle.load(open(self.home_path + 'features/age_scaler.pkl', 'rb'))
        self.days_associated_scaler = pickle.load(open(self.home_path + 'features/days_associated_scaler.pkl', 'rb'))
        self.target_encoder_gender = pickle.load(open(self.home_path + '/features/target_encoder_gender.pkl', 'rb'))
        self.fe_region_code = pickle.load (open(self.home_path + 'features/fe_region_code.pkl', 'rb'))
        self.fe_vehicle_age = pickle.load (open(self.home_path + '/features/fe_vehicle_age.pkl', 'rb'))
        self.fe_policy_sales_channel = pickle.load (open(self.home_path + 'features/fe_policy_sales_channel.pkl', 'rb'))


    def data_cleaning(self, df1):
    
        # rename columns
        df1.columns = [x.lower() for x in df1.columns]
        df1 = df1.rename(columns={'vintage' :'days_associated'})
            
        return df1
    
    def feature_engineering(self, df2):
        # vehicle_age
        df2['vehicle_age'] = df2['vehicle_age'].apply(lambda x: 'over_2_years' if x == '> 2 Years' 
                                                         else 'between_1_2_year' if x == '1-2 Year' 
                                                         else 'below_1_year')
        # vehicle_damage
        df2['vehicle_damage'] = df2['vehicle_damage'].apply(lambda x: 1 if x == 'Yes' else 0)
    
        return df2


    def data_preparation(self, df3):
        # annual_premium
        df3['annual_premium'] = self.annual_premium_scaler.transform(df3[['annual_premium']].values)

        # age
        df3['age'] = self.age_scaler.transform(df3[['age']].values)

        # days_associated
        df3['days_associated'] = self.days_associated_scaler.transform(df3[['days_associated']].values)

        # gender
        df3.loc[:,'gender'] = df3['gender'].map(self.target_encoder_gender)
        
        # region_code
        df3.loc[:,'region_code'] = df3['region_code'].map(self.fe_region_code)

        # vehicle_age
        df3.loc[:,'vehicle_age'] = df3['vehicle_age'].map(self.fe_vehicle_age)

        # policy_sales_channel
        df3.loc[:,'policy_sales_channel'] = df3['policy_sales_channel'].map(self.fe_policy_sales_channel)
       
        
        cols_selected = ['age', 'region_code', 'previously_insured', 'vehicle_age', 'vehicle_damage', 'annual_premium', 'policy_sales_channel']

        return df3[cols_selected]
    
    def get_prediction( self, model, original_data, test_data ):

        #model prediction
        pred = model.predict_proba( test_data )

        #join prediction into original data and sort
        original_data['Score'] = pred[:, 1].tolist()
        original_data = original_data.sort_values('Score', ascending=False)

        return original_data.to_json( orient= 'records', date_format = 'iso' )

# 10.2. API Handler

In [None]:
import pickle
import os
import pandas as pd
from flask import Flask, request, Response
from insurance.Insurance import Insurance

# load model 
path = '/home/caroline/repos/pa-004/src/'
model = pickle.load(open(path + 'models/lgbm.pkl', 'rb'))

# initialize API
app = Flask(__name__)

# create endpoint
@app.route('/insurance/predict', methods=['POST'])
def insurance_predict():
    test_json = request.get_json()

    if test_json: #there is data
        if isinstance(test_json, dict): #unique row
            test_raw = pd.DataFrame(test_json, index = [0])
        else: # multiple rows
            test_raw = pd.DataFrame(test_json, columns = test_json[0].keys())

        #needed cause test_raw will be overwritten on pipeline
        test_raw_original = test_raw.copy()

        # instantiate Insurance class
        pipeline = Insurance()

        # data cleaning
        df1 = pipeline.data_cleaning(test_raw)

        # feature engineering
        df2 = pipeline.feature_engineering(df1)

        # data preparation
        df3 = pipeline.data_preparation(df2)

        # prediction
        df_response = pipeline.get_prediction(model, test_raw_original, df3)

        #returns a json
        return df_response

    else: #if empty:
        return Response('{}', status = 200, mimetype = 'application/json')

if __name__ == '__main__':
    port = os.environ.get( 'PORT', 5000 )
    app.run( host='0.0.0.0', port=port )

# 10.3. API Tester

In [2]:
import requests
import json
import pandas as pd

df = pd.read_csv('../data/processed/test.csv')

df_test = df.copy()

print ('Number os Rows: {}'.format (df.shape[0]))
print ('Number os Columns: {}'.format (df.shape[1]))

Number os Rows: 60978
Number os Columns: 12


In [3]:
df_test.head()

Unnamed: 0,id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
0,138265,Male,42,1,36.0,0,1-2 Year,Yes,2630.0,156.0,258,0
1,273548,Male,45,1,15.0,0,1-2 Year,Yes,30531.0,26.0,274,1
2,256446,Female,45,1,29.0,0,1-2 Year,Yes,21104.0,154.0,144,0
3,139601,Male,49,1,46.0,1,1-2 Year,No,29448.0,26.0,140,0
4,13805,Male,34,1,28.0,0,1-2 Year,No,52677.0,124.0,165,0


In [4]:
# API Call Prod
data = json.dumps(df_test.to_dict(orient = 'records'))

In [5]:
# # local
 
# url = 'http://0.0.0.0:5000/insurance/predict'
# header = {'Content-type':'application/json'}

# r = requests.post( url, data = data, headers = header )
# print( 'Status Code {}'.format( r.status_code ))

In [6]:
# render
url = 'https://insurance-propensity-score.onrender.com/insurance/predict'
header = {'Content-type':'application/json'}

r = requests.post( url, data = data, headers = header )
print( 'Status Code {}'.format( r.status_code ))

Status Code 200


In [23]:
df1 = pd.DataFrame(r.json(), columns=r.json()[0].keys())

In [24]:
df1.head(10)

Unnamed: 0,id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response,Score
0,39769,Male,32,1,11.0,0,1-2 Year,Yes,30263.0,163.0,61,1,0.865356
1,248795,Female,31,1,24.0,0,1-2 Year,Yes,28134.0,163.0,113,1,0.863579
2,148643,Male,31,1,41.0,0,1-2 Year,Yes,27072.0,163.0,295,1,0.863546
3,159024,Male,34,1,24.0,0,1-2 Year,Yes,25535.0,163.0,202,1,0.859393
4,164122,Male,35,1,18.0,0,1-2 Year,Yes,19847.0,163.0,106,1,0.857947
5,8851,Male,32,1,11.0,0,< 1 Year,Yes,28906.0,124.0,122,1,0.856599
6,171070,Male,33,1,11.0,0,< 1 Year,Yes,29220.0,124.0,94,1,0.856599
7,227476,Female,31,1,35.0,0,< 1 Year,Yes,25609.0,163.0,21,1,0.854932
8,311863,Male,33,1,41.0,0,< 1 Year,Yes,26052.0,124.0,89,0,0.854705
9,371879,Female,31,1,41.0,0,< 1 Year,Yes,28605.0,124.0,148,1,0.854705
