In [22]:
import yaml
import json
import re
import pandas as pd
import dotenv
dotenv.load_dotenv()


True

In [14]:
def read_topics(file):
    with open(file) as f:
        lines = f.read().strip().split('\n')
        dict_data = {}

        key = ""
        for line in lines:
            line = line.strip()
            if line.endswith(":"):
                key = line[3:-1]
                dict_data[key] = []
            elif line.startswith('-'):
                dict_data[key].append(line[2:])
    return dict_data


In [15]:
topics = read_topics("topics.txt")
credit_topics = read_topics("credit_topics.txt")

In [16]:
credit_topics

{'Types of Credit Cards': ['Standard Credit Cards',
  'Reward Cards (Cash Back, Points, Travel Rewards)',
  'Secured Credit Cards',
  'Student Credit Cards',
  'Business Credit Cards'],
 'Credit Card Application': ['Eligibility criteria',
  'Required documentation',
  'Application process'],
 'Credit Card Terms and Conditions': ['Interest rates (APR)',
  'Credit limits',
  'Fees (annual, late payment, foreign transaction, etc.)',
  'Grace period'],
 'Credit Card Rewards and Benefits': ['Explanation of reward programs',
  'How to earn and redeem rewards',
  'Special benefits (travel insurance, extended warranties, etc.)'],
 'Credit Card Management': ['How to avoid interest charges',
  'Paying credit card bills',
  'Managing credit card debt',
  'Credit card safety and fraud protection'],
 'Credit Score and Credit Card Use': ['How credit card usage affects credit scores',
  'How to use a credit card to build credit'],
 'Types of Loans': ['Personal Loans',
  'Home Loans/Mortgages',
  'Aut

In [17]:
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI

In [23]:
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.3, max_tokens=150)

In [103]:
lc_data = pd.read_csv("10K_Lending_Club_Loans.csv")
lc_data["desc"] = lc_data["desc"].fillna("")
lc_data["customer_id"] = lc_data.index+1000000

In [104]:
important_features = pd.read_csv("lending_club_features.csv")  

In [105]:
important_features

Unnamed: 0,feature_name,importance
0,desc,1.0
1,annual_inc,0.911351
2,int_rate,0.759657
3,title,0.665553
4,term,0.647081
5,inq_last_6mths,0.415404
6,grade,0.277711
7,revol_util,0.239155
8,purpose,0.228078
9,sub_grade,0.215034


In [106]:
example_customers = lc_data[important_features["feature_name"].tolist() + ['customer_id']].to_dict(orient="records")

In [107]:
from langchain.prompts import PromptTemplate

In [108]:
def make_description_template():
    description = """
    The customer (ID: {customer_id}), with the employment title of {emp_title}, resides in the area with the zip code {zip_code}. They currently have a loan amounting to ${loan_amnt} with a term of {term}. This loan, described as '{desc}', carries an interest rate of {int_rate}.

    The purpose of the loan is for '{purpose}', and it's classified under the grade '{grade}' with a sub-grade of '{sub_grade}'. The title of this loan is '{title}'.

    This customer has an annual income of ${annual_inc}, and their revolving line utilization rate (the amount of credit they're using relative to all their available revolving credit or their 'revol_util') stands at {revol_util}.

    Over the last 6 months, they've had {inq_last_6mths} inquiries on their credit report. It's important to note that too many hard inquiries might negatively impact a credit score.
    """
    return PromptTemplate(template=description, input_variables=["desc", "annual_inc", "int_rate", "title", "term", "inq_last_6mths", "grade", "revol_util", "purpose", "sub_grade", "loan_amnt", "emp_title", "zip_code", "customer_id"])


In [109]:
template = make_description_template()

In [110]:
example_customers[0]

{'desc': '',
 'annual_inc': 50000.0,
 'int_rate': '7.29%',
 'title': 'Medical',
 'term': ' 60 months',
 'inq_last_6mths': 0.0,
 'grade': 'A',
 'revol_util': 12.1,
 'purpose': 'medical',
 'sub_grade': 'A4',
 'loan_amnt': 4000,
 'emp_title': 'Time Warner Cable',
 'zip_code': '766xx',
 'customer_id': 1000000}

In [111]:
print(template.format(**example_customers[0]))


    The customer (ID: 1000000), with the employment title of Time Warner Cable, resides in the area with the zip code 766xx. They currently have a loan amounting to $4000 with a term of  60 months. This loan, described as '', carries an interest rate of 7.29%.

    The purpose of the loan is for 'medical', and it's classified under the grade 'A' with a sub-grade of 'A4'. The title of this loan is 'Medical'.

    This customer has an annual income of $50000.0, and their revolving line utilization rate (the amount of credit they're using relative to all their available revolving credit or their 'revol_util') stands at 12.1.

    Over the last 6 months, they've had 0.0 inquiries on their credit report. It's important to note that too many hard inquiries might negatively impact a credit score.
    


In [None]:
history_template = PromptTemplate(template="""

f"Give a a fictional conversation between a bank and a customer. the customer's data is given as {example_customers[0]}. 
""")

In [None]:
def generate_customer_history(customer_dict, topics, credit_topics, template, llm):
    customer_history = []
    for topic in topics:
        customer_history.append(f"Customer: {topic}")
        customer_history.append(f"Bank: {llm.generate(prompt=template.format(**customer_dict) + f"\n\nCustomer: {topic}").choices[0].text}")
    for topic in credit_topics:
        customer_history.append(f"Customer: {topic}")
        customer_history.append(f"Bank: {llm.generate(prompt=template.format(**customer_dict) + f"\n\nCustomer: {topic}").choices[0].text}")
    return customer_history

In [87]:
salary_out = llm.predict(f"Give a a fictional conversation between a bank and a customer. the customer's data is given as {example_customers[0]}. The customer is making changes to some of his data")
" ,".join(important_features["feature_name"])
salary_change = llm.predict(f"""
    given the customer {example_customers[0]} and the conversation below, can you identify any changes to these input variables to a dataset:
      {" ,".join(important_features["feature_name"])}
    Please only respond in json with the affected fetures and their old and new values.
    Here is the conversation: {salary_out}
    """)
json.loads(salary_change)

{'emp_title': {'old_value': 'Time Warner Cable', 'new_value': 'Verizon'},
 'purpose': {'old_value': 'medical', 'new_value': 'home improvement'}}

In [85]:
example_customers[0]

{'desc': '',
 'annual_inc': 50000.0,
 'int_rate': '7.29%',
 'title': 'Medical',
 'term': ' 60 months',
 'inq_last_6mths': 0.0,
 'grade': 'A',
 'revol_util': 12.1,
 'purpose': 'medical',
 'sub_grade': 'A4',
 'loan_amnt': 4000,
 'emp_title': 'Time Warner Cable',
 'zip_code': '766xx'}

In [88]:
print(salary_out)

Bank: Good morning, how may I assist you today?

Customer: Hi, I would like to make some changes to my account information.

Bank: Sure, can you please provide me with your account details?

Customer: My loan amount is $4000, my annual income is $50000, and my loan term is 60 months.

Bank: Okay, what changes would you like to make?

Customer: I would like to change my employment title from Time Warner Cable to Verizon.

Bank: Alright, I have updated your employment title to Verizon. Is there anything else you would like to change?

Customer: Yes, I would also like to change my purpose for the loan from medical to home improvement.

Bank: Noted. Your loan purpose


In [76]:
salary_out = llm.predict("Give a a fictional conversation between a bank and a customer. They are discussing the address and the customer makes a change to it")
" ,".join(important_features["feature_name"])
salary_change = llm.predict(f"""
    given the conversation below, can you identify any changes to these input variables to a dataset:
      {" ,".join(important_features["feature_name"])}
    Please only respond in json with the affected fetures and their old and new values.
    Here is the conversation: {salary_out}
    """)
json.loads(salary_change)

KeyboardInterrupt: 

In [79]:
salary_out = llm.predict("Give a a fictional conversation between a bank and a customer. They are discussing an existing mediacl loan and the annual income and the purpose for a loan and the customer makes a change to both")
" ,".join(important_features["feature_name"])
salary_change = llm.predict(f"""
    given the conversation below, can you identify any changes to these input variables to a dataset:
      {" ,".join(important_features["feature_name"])}
    Please only respond in json with the affected features and their old and new values.
    Here is the conversation: {salary_out}
    """)
json.loads(salary_change)

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [80]:
print(salary_out)

Bank: Good morning, how can I assist you today?

Customer: Hi, I would like to discuss my existing medical loan.

Bank: Sure, I can pull up your account. Can you please verify your name and account number?

Customer: Yes, my name is John Smith and my account number is 123456.

Bank: Thank you, Mr. Smith. I see that you have a medical loan with us. How can I assist you with that?

Customer: I would like to change the annual income on my loan application.

Bank: Okay, can you tell me what your new annual income is?

Customer: Yes, my new annual income is $70,000.

Bank: Noted. Is there anything else you would like to


In [24]:
for topic_cat, topic_list in topics.items():
    
    print(f"Topic: {topic_cat}")
    for topic in topic_list:
        print(f"  Subtopic: {topic}")
        print()
        # print(llm.generate_chat(topic_cat, topic, num_lines=10))
        print()
        print()

Topic: Account Management
  Subtopic: Opening a new account



  Subtopic: Closing an account



  Subtopic: Updating account information



  Subtopic: Understanding different types of accounts (savings, checking, joint, business, etc.)



Topic: Online and Mobile Banking
  Subtopic: Setup and usage



  Subtopic: Online security



  Subtopic: Mobile app features



Topic: Financial Products
  Subtopic: Credit cards



  Subtopic: Loans and mortgages



  Subtopic: Investment products (stocks, bonds, mutual funds)



  Subtopic: Retirement accounts (IRAs, 401k)



  Subtopic: Insurance products



Topic: Transactions
  Subtopic: Deposits and withdrawals



  Subtopic: Money transfers and wire services



  Subtopic: Check cashing



  Subtopic: Foreign currency exchange



Topic: Fees and Charges
  Subtopic: Explanation of any fees associated with accounts or transactions



  Subtopic: How to avoid fees



Topic: Interest Rates
  Subtopic: Current interest rates for savings and loan