In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor, XGBClassifier
import numpy as np

from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.retrievers.multi_query import MultiQueryRetriever

from pymongo import MongoClient
from dotenv import load_dotenv
import os
import joblib
from glob import glob

from langchain.embeddings.huggingface import HuggingFaceInstructEmbeddings
from langchain.vectorstores import MongoDBAtlasVectorSearch
from pprint import pprint

load_dotenv()

True

# Initialize the MongoDB Atlas connector to perform Vector Search queries

In [3]:
MONGO_CONN=os.environ.get("MONGO_CONNECTION_STRING")
client = MongoClient(MONGO_CONN)
col = client["bfsi-genai"]["credit_history"]
vcol = client["bfsi-genai"]["cc_products"]

llm = ChatOpenAI(temperature=0.2, model_name="gpt-3.5-turbo")
llm_large= ChatOpenAI(temperature=0, model_name="gpt-4")
repo_id = "hkunlp/instructor-base"
hf = HuggingFaceInstructEmbeddings(model_name=repo_id, cache_folder="tmp/")
hf.embed_instruction = "Represent the document for retrieval of personalized credit cards:"
vectorstore = MongoDBAtlasVectorSearch(vcol, hf)

  from tqdm.autonotebook import trange


load INSTRUCTOR_Transformer
max_seq_length  512


# Train a ML Model to predict Candidate deliquency and save model as ckpt

In [5]:
df = pd.DataFrame.from_records(col.find({}, {"_id":0,"Unnamed: 0":0}))

# Separate target from predictors
y = df.SeriousDlqin2yrs
X = df.drop(['SeriousDlqin2yrs'], axis=1)

if "../model/classifier.jlb" in glob("../model/*"):
    print("i am here")
    # Divide data into training and validation subsets
    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


    model = XGBClassifier(learning_rate = 0.1, n_estimators = 1000, verbose = 1)

    model.fit(X_train, y_train)
    joblib.dump(model, "../model/classifier.jlb")
else:
    model = joblib.load("../model/classifier.jlb")

i am here


Parameters: { "verbose" } are not used.



# Load saved Classifier model

In [48]:
#do not run each time
model = joblib.load("../model/classifier.jlb")
imp_idx = np.argsort(-1 * model.feature_importances_)

Percentage change the user is going to miss his payments:  0.7082840800285339


# Predict the Chance of Delinquency

In [50]:
user_id = 241

df = pd.DataFrame.from_records((col.find({"Unnamed: 0":user_id}, {"_id":0,"Unnamed: 0":0, "SeriousDlqin2yrs":0})))
feature_importance = "\n".join(i for i in list(map(lambda x:f"Columns:{x[0]}  Prob score for decision making:{x[1]}" ,zip(df.columns[imp_idx], model.feature_importances_[imp_idx]))))
user_profile_ip = df.to_dict(orient="records")[0]
pred = model.predict_proba(df)[:,1][0]
print(f"Percentage change the user is going to miss his payments:  {pred}")

Percentage change the user is going to miss his payments:  0.7082840800285339


# Approved Credit Limit

In [49]:
allowed_credit_limit = int(np.ceil(df.MonthlyIncome*6*(1-pred)))
print(f"Allowed Credit Limit for the user: {allowed_credit_limit}")

Allowed Credit Limit for the user: 240666


# Features important to classify by the model

In [41]:

print(feature_importance)

Columns:NumberOfTimes90DaysLate  Prob score for decision making:0.45220547914505005
Columns:NumberOfTime30-59DaysPastDueNotWorse  Prob score for decision making:0.12773236632347107
Columns:NumberOfTime60-89DaysPastDueNotWorse  Prob score for decision making:0.12348417937755585
Columns:RevolvingUtilizationOfUnsecuredLines  Prob score for decision making:0.0951855406165123
Columns:NumberRealEstateLoansOrLines  Prob score for decision making:0.034801777452230453
Columns:age  Prob score for decision making:0.03436748683452606
Columns:MonthlyIncome  Prob score for decision making:0.03352366387844086
Columns:NumberOfOpenCreditLinesAndLoans  Prob score for decision making:0.033431872725486755
Columns:NumberOfDependents  Prob score for decision making:0.03271704167127609
Columns:DebtRatio  Prob score for decision making:0.03255053237080574


# Custom Prompt To explain the approved credit limit

In [51]:
prompt = f"""
##Instruction: Taking into account the Definitions of various fields and their respective values a model is trained to predict weather a person will expericen delinquency or not in the next 2 years. Below both the values that was input to the model and the result produced by the model are provided. As a bank employee response to the candidate, It is expected to provide a detailed reason in layman language as to why a Credit request was rejected or processed given the profile of the candidate. Also while providing reason do not mention the use of AI model being used for decision making.

##Definitions
RevolvingUtilizationOfUnsecuredLines=Total balance on credit cards and personal lines of credit except real estate and no installment debt like car loans divided by the sum of credit limits DataType=percentage
age=Age of borrower in years DataType=integer
NumberOfTime30-59DaysPastDueNotWorse=Number of times borrower has been 30-59 days past due but no worse in the last 2 years. DataType=integer
DebtRatio=Monthly debt payments, alimony,living costs divided by monthy gross income DataType=percentage
MonthlyIncome=Monthly income in INR DataType=real
NumberOfOpenCreditLinesAndLoans=Number of Open loans (installment like car loan or mortgage) and Lines of credit (e.g. credit cards) DataType=integer
NumberOfTimes90DaysLate=Number of times borrower has been 90 days or more past due. DataType=integer
NumberRealEstateLoansOrLines=Number of mortgage and real estate loans including home equity lines of credit DataType=integer
NumberOfTime60-89DaysPastDueNotWorse=Number of times borrower has been 60-89 days past due but no worse in the last 2 years. DataType=integer
NumberOfDependents=Number of dependents in family excluding themselves (spouse, children etc.) DataType=integer
SeriousDlqin2yrs=Person experienced 90 days past due delinquency or worse  DataType=Percentage

##Feature importace of the model used:
{feature_importance}

##Values for given profile to be use to predict the Result(SeriousDlqin2yrs) with a reason
{user_profile_ip}

## Model Result
Allowed Credit Limit for the user={allowed_credit_limit}

##Reason in step by step points:
"""
print(prompt)


##Instruction: Taking into account the Definitions of various fields and their respective values a model is trained to predict weather a person will expericen delinquency or not in the next 2 years. Below both the values that was input to the model and the result produced by the model are provided. As a bank employee response to the candidate, It is expected to provide a detailed reason in layman language as to why a Credit request was rejected or processed given the profile of the candidate. Also while providing reason do not mention the use of AI model being used for decision making.

##Definitions
RevolvingUtilizationOfUnsecuredLines=Total balance on credit cards and personal lines of credit except real estate and no installment debt like car loans divided by the sum of credit limits DataType=percentage
age=Age of borrower in years DataType=integer
NumberOfTime30-59DaysPastDueNotWorse=Number of times borrower has been 30-59 days past due but no worse in the last 2 years. DataType=i

In [52]:
response = llm.predict(prompt)
print(response)

1. The person's revolving utilization of unsecured lines is 0.973, which means they have a high balance on their credit cards and personal lines of credit compared to their credit limits. This indicates that they may be relying heavily on credit and may have difficulty managing their debt.

2. The person's age is 61, which suggests that they may be closer to retirement age. This could be a factor in the decision as it may affect their ability to repay the credit in the future.

3. The person has been 30-59 days past due but no worse in the last 2 years. While this is not a severe delinquency, it still indicates a potential risk of late payments.

4. The person's debt ratio is 0.209, which means their monthly debt payments, alimony, and living costs are relatively high compared to their monthly gross income. This indicates a higher financial burden and may make it difficult for them to make timely payments.

5. The person's monthly income is 137,500 INR, which is a relatively high incom

# Credit Card suggestions

In [None]:
# LLM powered retriver for product suggestions
retriever = vectorstore.as_retriever(search_type='similarity',search_kwargs={'k': 3})
recommender_retriever = MultiQueryRetriever.from_llm(retriever=retriever,llm=llm_large)

In [54]:
user_profile_based_card_template=f"""
##Instruction: Given the user profile recommended credit cards that will best fit the user profile. Provide reason as to why the credit card is suggested to the user for each card.

## User profile:
{response}

## Recommendations with reasons point by point:
"""
rec = recommender_retriever.get_relevant_documents(user_profile_based_card_template)
card_suggestions= ""
for r in rec:
    card_suggestions += f'- Card name:{" ".join(r.metadata["title"].split("-"))} card \n  Card Features:{r.page_content} +\n'

recomendations_template=f"""
##Instruction:
-If the the user is considerd High/Moderate risk of default and suggestion on user profile in rejection of credit request then return "No Credit Card Recomended"
-Given the user profile recommended credit cards that will best fit the user profile. 
-Provide card by card reasons(concise) as to why the credit card is suggested to the user.  

## User profile:
{response}

## Recommended Credit cards if Eligible:
{card_suggestions}

## Recommendations=Output as Json with card name as Key and concise reasons point by point as Value:
"""
resp = llm.predict(recomendations_template)

for ele in list(filter(lambda x:x!="",resp.split("\n")[1:-1])):
    pprint(ele)

('  "titanium edge card": "1. Offers reward points for every ₹150 spent\\n2. '
 'Provides dining privileges with 50% more reward points\\n3. Offers a fuel '
 'surcharge waiver\\n4. EMV Chip technology for fraud protection\\n5. 50 days '
 'of interest-free period for purchases\\n6. Revolving credit at a nominal '
 'interest rate",')
('  "platinum edge card": "1. Earns 2 reward points for every ₹150 spent\\n2. '
 '50% more reward points at premium restaurants\\n3. Fuel surcharge '
 'waiver\\n4. EMV Chip Card technology for enhanced security\\n5. Zero '
 'liability on lost cards\\n6. 50 interest-free days from the date of '
 'purchase",')
