## Next Best Contract Type (Policy Type Recommendation)
## Data used for analysis is based on Integral Life Admin System

### The SQL query in below cells will retreive data based on database credentials provide. Right now these credentials have been left empty for security reasons. Please enter relevant details before running this notebook further.

In [1]:
# Import Packages
import pyodbc
import numpy as np
import pandas as pd
from scipy import sparse
from sklearn.metrics.pairwise import cosine_similarity

# Settings to view all columns and rows
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)

## Import Data

In [2]:
# Enter ip address and port number of the system where the database resides.
server   = '10.0.3.98'
database = 'INT77DB_R212'
username = 'sisensedb_user'
password = 'Sisense12#$'

In [3]:
# Add appropriate driver name
cnxn = pyodbc.connect('DRIVER={SQL Server};SERVER='+server+';DATABASE='+database+';UID='+username+';PWD='+ password)
cursor = cnxn.cursor()

OperationalError: ('08001', '[08001] [Microsoft][ODBC SQL Server Driver][DBNETLIB]SQL Server does not exist or access denied. (17) (SQLDriverConnect); [08001] [Microsoft][ODBC SQL Server Driver][DBNETLIB]ConnectionOpen (Connect()). (53)')

### Below query will only fetch details of those customers who have purchased policy. This dataset will be used for understanding purchase pattern which will be then applied to all the customers

In [None]:
# Run Query
query = '''SELECT 
rtrim(Q1.COWNNUM) AS 'CLIENT_NUMBER',
COALESCE(Q1.[AE1], 0) AS 'AE1',
COALESCE(Q1.[AEN], 0) AS 'AEN',
COALESCE(Q1.[CCI], 0) AS 'CCI',
COALESCE(Q1.[CDA], 0) AS 'CDA', 
COALESCE(Q1.[CED], 0) AS 'CED', 
COALESCE(Q1.[CEL], 0) AS 'CEL', 
COALESCE(Q1.[DAN], 0) AS 'DAN',
COALESCE(Q1.[DAT], 0) AS 'DAT',
COALESCE(Q1.[DAW], 0) AS 'DAW',
COALESCE(Q1.[DAX], 0) AS 'DAX', 
COALESCE(Q1.[END], 0) AS 'END', 
COALESCE(Q1.[ENT], 0) AS 'ENT',
COALESCE(Q1.[ENW], 0) AS 'ENW',
COALESCE(Q1.[FLX], 0) AS 'FLX', 
COALESCE(Q1.[FWL], 0) AS 'FWL', 
COALESCE(Q1.[GCS], 0) AS 'GCS', 
COALESCE(Q1.[IAN], 0) AS 'IAN', 
COALESCE(Q1.[IAX], 0) AS 'IAX',
COALESCE(Q1.[IPP], 0) AS 'IPP',
COALESCE(Q1.[LCE], 0) AS 'LCE',
COALESCE(Q1.[MRT], 0) AS 'MRT',
COALESCE(Q1.[OUL], 0) AS 'OUL',
COALESCE(Q1.[OWL], 0) AS 'OWL',
COALESCE(Q1.[PHI], 0) AS 'PHI',
COALESCE(Q1.[RSB], 0) AS 'RSB',
COALESCE(Q1.[RTA], 0) AS 'RTA',
COALESCE(Q1.[RTX], 0) AS 'RTX',
COALESCE(Q1.[RUL], 0) AS 'RUL',
COALESCE(Q1.[RUX], 0) AS 'RUX',
COALESCE(Q1.[SAP], 0) AS 'SAP',
COALESCE(Q1.[SFL], 0) AS 'SFL',
COALESCE(Q1.[SFT], 0) AS 'SFT',
COALESCE(Q1.[SHP], 0) AS 'SHP',
COALESCE(Q1.[SPB], 0) AS 'SPB',
COALESCE(Q1.[SUM], 0) AS 'SUM',
COALESCE(Q1.[SUS], 0) AS 'SUS',
COALESCE(Q1.[TEN], 0) AS 'TEN',
COALESCE(Q1.[TEX], 0) AS 'TEX',
COALESCE(Q1.[TPD], 0) AS 'TPD',
COALESCE(Q1.[TRM], 0) AS 'TRM',
COALESCE(Q1.[TRX], 0) AS 'TRX',
COALESCE(Q1.[TWL], 0) AS 'TWL',
COALESCE(Q1.[TWX], 0) AS 'TWX',
COALESCE(Q1.[ULP], 0) AS 'ULP',
COALESCE(Q1.[ULX], 0) AS 'ULX',
COALESCE(Q1.[UNI], 0) AS 'UNI',
COALESCE(Q1.[VAP], 0) AS 'VAP'
FROM 

(SELECT *
FROM
(
  SELECT DISTINCT COWNNUM, CNTTYPE, COUNT(DISTINCT CNTTYPE) AS COUNTING FROM VM1DTA.CHDRPF WHERE SERVUNIT = 'LP' AND VALIDFLAG = '1'   
  GROUP BY COWNNUM, CNTTYPE
) AS SourceTable PIVOT( SUM(COUNTING) FOR [CNTTYPE] IN([AE1],[AEN],[CCI],[CDA],[CED],[CEL],[DAN],[DAT],[DAW],[DAX],[END],[ENT],
[ENW],[FLX],[FWL],[GCS],[IAN],[IAX],[IPP],[LCE],[MRT],[OUL],[OWL],[PHI],[RSB],[RTA],
[RTX],[RUL],[RUX],[SAP],[SFL],[SFT],[SHP],[SPB],[SUM],[SUS],[TEN],[TEX],
[TPD],[TRM],[TRX],[TWL],[TWX],[ULP],[ULX],[UNI],[VAP],[LPL],[LPP],[LPR],
[LWH],[MAR],[MCD],[MCG],[MCM],[MCP],[MF1],[MGI],[MHB])) AS PivotTable ) Q1

INNER JOIN 
(SELECT  COWNNUM, COUNT( DISTINCT CNTTYPE) UNIQUE_PRODUCTS FROM VM1DTA.CHDRPF WHERE SERVUNIT = 'LP' AND VALIDFLAG = '1' 
GROUP BY COWNNUM
HAVING COUNT( DISTINCT CNTTYPE) > 0
) Q2
ON Q2.COWNNUM = Q1.COWNNUM
'''

In [None]:
# Read data
df_orig = pd.read_sql(query,cnxn)

In [None]:
# Check data sample
df_orig.head()

In [None]:
# Create another dataframe with only Client Numbers
df_client=pd.DataFrame(df_orig['CLIENT_NUMBER']) 

In [None]:
# Delete Client Number from Original dataframe
df_orig=df_orig.drop(['CLIENT_NUMBER'], axis=1)

In [None]:
# Read data
df_client = pd.read_sql(query,cnxn)

# Close the cursor
cursor.close() 

In [None]:
# Check data sample
df_client.info()

## Data Analysis

In [None]:
# Check data shape
df_orig.shape

In [None]:
# Check data attributes of the columns
df_orig.info()

In [None]:
# Normalize the entire dataset

magnitude = np.sqrt(np.square(df_orig).sum(axis=1))
df_orig = df_orig.divide(magnitude, axis='index')

### Recommendations using cosine similarity

In [None]:
#------------------------
# ITEM-ITEM CALCULATIONS
#------------------------
def calculate_similarity(data_items):
    """Calculate the column-wise cosine similarity for a sparse
    matrix. Return a new dataframe matrix with similarities.
    """
    # Sparse matrix to extract non zero values out of the normalized matrix
    data_sparse = sparse.csr_matrix(data_items)
    # Use Cosine Similarity to function
    similarities = cosine_similarity(data_sparse.transpose())
    sim = pd.DataFrame(data=similarities, index= data_items.columns, columns= data_items.columns)
    return sim



In [None]:
# Build the similarity matrix, pass the entire dataframe
data_matrix = calculate_similarity(df_orig)

In [None]:
#------------------------------------------------------------------------#
# Loop through all the customers and get their individual recommendations#
#------------------------------------------------------------------------#
recommendations = []
percentages = []

results = pd.DataFrame(columns=['ClientNumber','Recommendation', 'Percentage'])
for value in df_client['CLIENT_NUMBER']:
    customer_index = df_client[df_client.CLIENT_NUMBER == value].index.tolist()[0] # Get the frame index
    customer_id = value
    
    # Get the contract type the user has purchased.
    known_customer_contract = df_orig.ix[customer_index]
    known_customer_contract = known_customer_contract[known_customer_contract>0].index.values

    # Customers contracts for all items as a sparse vector.
    customer_contract_vector = df_orig.ix[customer_index]

    # Calculate the score.
    score = data_matrix.dot(customer_contract_vector).div(data_matrix.sum(axis=1))

    # Remove the known contract from the recommendation.
    score = score.drop(known_customer_contract)    
    
    results = results.append(pd.Series([value, score.nlargest(3).index[0],score.nlargest(3)[0]], index=results.columns),
                         ignore_index=True)
    results = results.append(pd.Series([value, score.nlargest(3).index[1],score.nlargest(3)[1]], index=results.columns),
                         ignore_index=True)
    results = results.append(pd.Series([value, score.nlargest(3).index[2],score.nlargest(3)[2]], index=results.columns),
                         ignore_index=True)


In [None]:
# Save results to csv
  results.to_csv('EX_SOURCE_PRODUCT_RECOMMENDATION_LIFE.csv', index = None, header=True)