In [37]:
import pandas as pd
from pymongo import MongoClient
from dotenv import load_dotenv
import os
from tqdm import tqdm
from langchain.chat_models import ChatOpenAI
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains.mapreduce import MapReduceChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import MapReduceDocumentsChain,ReduceDocumentsChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
import requests
import glob
from bs4 import BeautifulSoup

load_dotenv()
MONGO_CONN=os.environ.get("MONGO_CONNECTION_STRING")
client = MongoClient(MONGO_CONN)
col = client["bfsi-genai"]["credit_history"]
vcol = client["bfsi-genai"]["cc_products"]

# Define prompt
prompt_template = """Write a concise summary of the following in exactly 4000 words:
"{text}"
Point by Point SUMMARY of Credit card mentioned Above:"""
prompt = PromptTemplate.from_template(prompt_template)
llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")
llm_chain = LLMChain(llm=llm, prompt=prompt)


# Define StuffDocumentsChain
stuff_chain = StuffDocumentsChain(
    llm_chain=llm_chain, document_variable_name="text"
)

In [5]:
df = pd.read_csv("data/cs-training.csv")
df["MonthlyIncome"] = df["MonthlyIncome"]*25

recs = df.to_dict(orient="records")
col.insert_many(recs)

<pymongo.results.InsertManyResult at 0x12fc80ca0>

In [38]:
from langchain.docstore.document import Document
url = "https://www.tengenbank.com/personal/pay/cards/credit-cards/marriott-bonvoy-credit-card"
def parser(url,i=-1):
    response = requests.get(url)
    page_content = response.content
    soup = BeautifulSoup(page_content, 'html.parser')
    products = soup.find('div', {"class": "main-content"})
    return [Document(metadata={"source": url,"title": url.split("/")[i]},page_content=products.text)]

In [40]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap  = 100,
)

def parse_n_load(url):
    op = []
    try:
        op = parser(url)
        try:
            op += parser(url+"/eligibility",-2)
        except:
            print(f"Failed to Parse Eligibility {url}")
        op += parser(url+"/fees-and-charges",-2)
    except:
        print(f"Failed to Parse Entire {url}")
    return op

def summarize_docs(docs):
    return stuff_chain.run(docs)

def get_data(url):
    data = parse_n_load(url)
    docs = text_splitter.split_documents(data)
    summary = summarize_docs(docs)
    doc = [Document(metadata={"source": url,"title": url.split("/")[-1]},page_content=summary)]
    return doc

# Split the long text and Summarize

In [41]:

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 4000,
    chunk_overlap  = 500,
)
op = parser(url)
op += parser(url+"/eligibility",-2)
op += parser(url+"/fees-and-charges",-2)
details = text_splitter.split_documents(op)
for detail in details:
    print(detail.page_content.replace(".", ".\n"))


Features Reward Type Reward Type Marriott Bonvoy Points Features Features 1 Free Night Award on Joining fee realization10 Elite Night Credits (ENCs) under the Marriott Bonvoy ProgramComplimentary Marriott Bonvoy Silver Elite StatusEarn 8 Marriott Bonvoy Points per Rs.
 150 spent at hotels participating in Marriott Bonvoy*Earn 4 Marriott Bonvoy Points per Rs.
 150 spent on travel, dining & entertainment*Earn 2 Marriott Bonvoy Points per Rs.
 150 spent on all other applicable purchases*Complimentary Golf Access 2 per quarter across the world (Green Fee Waiver)Marriott Bonvoy Points will not be accrued for the following non-core product features categories:FuelSmart EMI / Dial an EMI transactionWallet loads / Gift or Prepaid Card load/ Voucher purchaseCash AdvancesPayment of Outstanding BalancesPayment of card fees and other chargesGovernment related transactions & Rental transactions*Click here to view detailed Terms and Conditions**Click here to view detailed Terms and Conditions on Gol

In [42]:
print(summarize_docs(details).replace(".", ".\n"))


The Marriott Bonvoy 10gen Bank Credit Card offers various features and rewards.
 Upon joining, cardholders receive a free night award, 10 Elite Night Credits, and complimentary Marriott Bonvoy Silver Elite Status.
 They can earn 8 Marriott Bonvoy Points per Rs.
 150 spent at participating hotels, 4 points per Rs.
 150 spent on travel, dining, and entertainment, and 2 points per Rs.
 150 spent on other purchases.
 Cardholders also receive complimentary golf access and lounge access within India and internationally.
 Additional features include zero lost card liability and a foreign currency markup of 3.
5%.
 Milestone benefits include free night awards based on eligible spend.
 The card also offers insurance coverage and the ability to redeem points for stays, experiences, and travel.
 Cardholders can access concierge services, use Smart EMI for converting big spends into EMI, and make contactless payments.
 The eligibility criteria for the card include a minimum age of 21 years and a g

In [2]:
docs = []
results = ["https://www.tengenbank.com/personal/pay/cards/credit-cards/marriott-bonvoy-credit-card","https://www.tengenbank.com/personal/pay/cards/credit-cards/swiggy-hdfc-bank-credit-card","https://www.tengenbank.com/personal/pay/cards/credit-cards/regalia-gold-credit-card","https://www.tengenbank.com/personal/pay/cards/credit-cards/irctc-credit-card","https://www.tengenbank.com/personal/pay/cards/credit-cards/hdfc-bank-upi-rupay-credit-card","https://www.tengenbank.com/personal/pay/cards/credit-cards/diners-privilege","https://www.tengenbank.com/personal/pay/cards/credit-cards/moneyback-plus","https://www.tengenbank.com/personal/pay/cards/credit-cards/freedom-card-new","https://www.tengenbank.com/personal/pay/cards/credit-cards/indianoil-hdfc-bank-credit-card","https://www.tengenbank.com/personal/pay/cards/credit-cards/infinia-credit-card","https://www.tengenbank.com/personal/pay/cards/credit-cards/diners-club-black","https://www.tengenbank.com/personal/pay/cards/credit-cards/tata-neu-infinity-hdfc-bank-credit-card","https://www.tengenbank.com/personal/pay/cards/credit-cards/tata-neu-plus-hdfc-bank-credit-card","https://www.tengenbank.com/personal/pay/cards/credit-cards/shoppers-stop-hdfc-bank-credit-card","https://www.tengenbank.com/personal/pay/cards/credit-cards/shoppers-stop-black-hdfc-bank-credit-card","https://www.tengenbank.com/personal/pay/cards/credit-cards/paytm-hdfc-bank-credit-card","https://www.tengenbank.com/personal/pay/cards/credit-cards/paytm-hdfc-bank-select-credit-card","https://www.tengenbank.com/personal/pay/cards/credit-cards/paytm-hdfc-bank-mobile-credit-card","https://www.tengenbank.com/personal/pay/cards/credit-cards/paytm-hdfc-bank-digital-credit-card","https://www.tengenbank.com/personal/pay/cards/credit-cards/platinum-times-credit-card","https://www.tengenbank.com/personal/pay/cards/credit-cards/titanium-times-credit-card","https://www.tengenbank.com/personal/pay/cards/credit-cards/6e-rewards-indigo-hdfc-bank-credit-card","https://www.tengenbank.com/personal/pay/cards/credit-cards/6e-rewards-xl-indigo-hdfc-bank-credit-card","https://www.tengenbank.com/personal/pay/cards/credit-cards/all-miles-card","https://www.tengenbank.com/personal/pay/cards/credit-cards/freedom-card","https://www.tengenbank.com/personal/pay/cards/credit-cards/bharat-credit-card","https://www.tengenbank.com/personal/pay/cards/credit-cards/diners-club-premium","https://www.tengenbank.com/personal/pay/cards/credit-cards/diners-club-rewardz","https://www.tengenbank.com/personal/pay/cards/credit-cards/doctors-regalia","https://www.tengenbank.com/personal/pay/cards/credit-cards/doctors-superia","https://www.tengenbank.com/personal/pay/cards/credit-cards/jetprivilege-hdfc-bank-titanium-credit-card","https://www.tengenbank.com/personal/pay/cards/credit-cards/platinum-edge","https://www.tengenbank.com/personal/pay/cards/credit-cards/platinum-plus","https://www.tengenbank.com/personal/pay/cards/credit-cards/solitaire","https://www.tengenbank.com/personal/pay/cards/credit-cards/superia","https://www.tengenbank.com/personal/pay/cards/credit-cards/teachers-platinum","https://www.tengenbank.com/personal/pay/cards/credit-cards/titanium-edge","https://www.tengenbank.com/personal/pay/cards/credit-cards/visa-signature","https://www.tengenbank.com/personal/pay/cards/credit-cards/world-mastercard","https://www.tengenbank.com/personal/pay/cards/credit-cards/money-back","https://www.tengenbank.com/personal/pay/cards/credit-cards/regalia-first","https://www.tengenbank.com/personal/pay/cards/credit-cards/diners-club-miles","https://www.tengenbank.com/personal/pay/cards/credit-cards/regalia","https://www.tengenbank.com/personal/pay/cards/credit-cards/jet-privilege-hdfc-bank-diners-club","https://www.tengenbank.com/personal/pay/cards/credit-cards/jet-privilege-hdfc-bank-world","https://www.tengenbank.com/personal/pay/cards/credit-cards/jetprivilege-hdfc-bank-platinum-credit-card","https://www.tengenbank.com/personal/pay/cards/credit-cards/diners-privilege-old"]
for result in tqdm(results):
    docs += get_data(result)

# Generate embedding to the summarized documents

In [44]:
from langchain.embeddings.huggingface import HuggingFaceInstructEmbeddings
from langchain.vectorstores import MongoDBAtlasVectorSearch
repo_id = "hkunlp/instructor-base"
hf = HuggingFaceInstructEmbeddings(model_name=repo_id, cache_folder="tmp/")
hf.embed_instruction = "Represent the document for retrieval of credit cards:"
vectorstore = MongoDBAtlasVectorSearch(vcol, hf)

load INSTRUCTOR_Transformer
max_seq_length  512


In [45]:
vcol.delete_many({})
vectorstore.add_documents(docs)

[ObjectId('6517ebd2aba949c4b64d3bbe'),
 ObjectId('6517ebd2aba949c4b64d3bbf'),
 ObjectId('6517ebd2aba949c4b64d3bc0'),
 ObjectId('6517ebd2aba949c4b64d3bc1'),
 ObjectId('6517ebd2aba949c4b64d3bc2'),
 ObjectId('6517ebd2aba949c4b64d3bc3'),
 ObjectId('6517ebd2aba949c4b64d3bc4'),
 ObjectId('6517ebd2aba949c4b64d3bc5'),
 ObjectId('6517ebd2aba949c4b64d3bc6'),
 ObjectId('6517ebd2aba949c4b64d3bc7'),
 ObjectId('6517ebd2aba949c4b64d3bc8'),
 ObjectId('6517ebd2aba949c4b64d3bc9'),
 ObjectId('6517ebd2aba949c4b64d3bca'),
 ObjectId('6517ebd2aba949c4b64d3bcb'),
 ObjectId('6517ebd2aba949c4b64d3bcc'),
 ObjectId('6517ebd2aba949c4b64d3bcd'),
 ObjectId('6517ebd2aba949c4b64d3bce'),
 ObjectId('6517ebd2aba949c4b64d3bcf'),
 ObjectId('6517ebd2aba949c4b64d3bd0'),
 ObjectId('6517ebd2aba949c4b64d3bd1'),
 ObjectId('6517ebd2aba949c4b64d3bd2'),
 ObjectId('6517ebd2aba949c4b64d3bd3'),
 ObjectId('6517ebd2aba949c4b64d3bd4'),
 ObjectId('6517ebd2aba949c4b64d3bd5'),
 ObjectId('6517ebd2aba949c4b64d3bd6'),
 ObjectId('6517ebd2aba949