Leveraging LLM Reasoning Enhances Personalized Recommender Systems(2024)에서 제안된 LLM을 이용한 추천시스템 제작 방안임

[필요한 파일]

총 6개

대출상품별 데이터 : [**products_name**]_loan_data.csv

대출상품별 사용자의 데이터 : [**products_name**]_user_data.csv

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## 1. 사용자별 데이터 가져오기

In [None]:
### 자기이름 한글로 변수에 넣고, 본인 Gemini Key 입력하기!!!

NAME = 'YOUR_NAME' # TODO
GOOGLE_API_KEY = 'YOUR_API_KEY' # TODO

name_2_id = {'재혁':'0', '지원':'1', '소정':'2', '지연':'3', '혜원':'4'}

NAME_ID = name_2_id[NAME]

각 대출 상품별로 필요한 컬럼들의 조합을 다르게 한 사용자들의 데이터

In [29]:
import pandas as pd

credit_user_df = pd.read_csv(f'/content/drive/MyDrive/BOAZ/mini_project_2/credit_user_data_{NAME_ID}.csv')
mortgage_user_df = pd.read_csv(f'/content/drive/MyDrive/BOAZ/mini_project_2/mortgage_user_data_{NAME_ID}.csv')
rent_user_df = pd.read_csv(f'/content/drive/MyDrive/BOAZ/mini_project_2/rent_user_data_{NAME_ID}.csv')

print(f'credit_user_df shape : {credit_user_df.shape}') # (125, ) 확인하기
print(f'mortgate_user_df shape : {mortgage_user_df.shape}') # (125, ) 확인하기
print(f'rent_user_df shape : {rent_user_df.shape}') # (100, ) 확인하기

credit_user_df shape : (125, 13)
mortgate_user_df shape : (125, 13)
rent_user_df shape : (100, 13)


## 2. 대출상품데이터 데이터 가져오기

In [30]:
credit_loan_df = pd.read_csv(f'/content/drive/MyDrive/BOAZ/mini_project_2/credit_loan_data_{NAME_ID}.csv')
mortgage_loan_df = pd.read_csv(f'/content/drive/MyDrive/BOAZ/mini_project_2/mortgage_loan_data_{NAME_ID}.csv')
rent_loan_df = pd.read_csv(f'/content/drive/MyDrive/BOAZ/mini_project_2/rent_loan_data_{NAME_ID}.csv')

print(f'credit_loan_df shape : {credit_loan_df.shape}') # (4, ) 확인하기
print(f'mortgage_loan_df shape : {mortgage_loan_df.shape}') # (4, ) 확인하기
print(f'rent_loan_df shape : {rent_loan_df.shape}') # (5, ) 확인하기

credit_loan_df shape : (4, 3)
mortgage_loan_df shape : (4, 4)
rent_loan_df shape : (5, 4)


In [31]:
credit_loan_df

Unnamed: 0,fin_prdt_cd,crdt_grad_avg,min_credit_score
0,1300,4.16,600
1,3,1.55,400
2,WR0002F,12.09,500
3,KB200200000001,5.1,500


## 3. UserData와 LoanProductData를 LoanApproved value에 기반하여 reasoning하기

In [6]:
!pip install google-generativeai



In [32]:
import google.generativeai as genai

genai.configure(api_key=GOOGLE_API_KEY)

model = genai.GenerativeModel('gemini-1.5-flash')

### Gemini 1.5 Flash Limitation ###
# 1. 15 Requests per Minute
# 2. 1M Tokens per Minute
# 3. 1500 Requests per Day(24H)

In [33]:
import time

def generate_reasoning(loan_type, user_data, loan_data, ground_truth):
    start_time = time.time()

    base_prompt = f"Base on given user's metadata {user_data}, Will user be able to get approved for following loan product? \n\n [Loan Product's Metadata]\n{loan_data}\n\n"
    description_prompt = "We will also provide you the ground truth that user has been approved for a loan.\nWe want you to generate a reasonable explanation with given informations\n"
    optional_prompt = None

    if loan_type == 'credit':
        optional_prompt = "In particular, create an explanation focusing on the values ​​of the (user's 'CreditScore', product's 'min_credit_score') pair and user's 'LoanAmount'."
    else:
        optional_prompt = "In particular, create an explanation focusing on the values ​​of the 'HomeOwnershipStatus', and 'LoanAmount'."

    format_prompt = f"===Please follow the format below:===\n ### Reason ### \n Write your reasoning explanation here. \n ### Loan Approved ### \n {ground_truth}"

    reasoning_prompt = f"{base_prompt}{description_prompt}{optional_prompt}{format_prompt}"

    reasoning_response = model.generate_content(reasoning_prompt).text

    end_time = time.time()
    execution_time = end_time - start_time
    # 분당 요청 제한(15회)에 안전하게 처리하기 위해 프로세스당 5초 확보
    if execution_time < 5:
        time.sleep(5 - execution_time)

    return reasoning_response

In [34]:
RecSAVER_credit = {
    'reasoning_sentences': [],
    'loan_approved' : []
}

In [35]:
RecSAVER_mortgage = {
    'reasoning_sentences': [],
    'loan_approved' : []
}

In [36]:
RecSAVER_rent = {
    'reasoning_sentences': [],
    'loan_approved' : []
}

In [37]:
from tqdm import tqdm

In [38]:
## 개인신용대출 reasoning - 개인별 500개(125명 X 4개 상품)
for _, user_row in tqdm(credit_user_df.iterrows(), desc="credit reasoning", total=len(credit_user_df)):
    for _, loan_row in credit_loan_df.iterrows():
        user_data = user_row.to_dict()
        user_data.pop('LoanApproved', None)
        loan_data = loan_row.to_dict()

        # 신용점수가 기준 이하면 무조건 NotApproved
        loan_approved = None
        if user_row['ScaledCreditScore'] >= loan_row['min_credit_score']:
            loan_approved = user_row['LoanApproved']
        else:
            loan_approved = 'NotApproved'

        reasoning_response = generate_reasoning('credit', user_data, loan_data, loan_approved)

        RecSAVER_credit['reasoning_sentences'].append(reasoning_response)
        RecSAVER_credit['loan_approved'].append(loan_approved)

credit reasoning: 100%|██████████| 125/125 [41:49<00:00, 20.08s/it]


In [39]:
## 주택담보대출 reasoning - 개인별 500개(125명 X 4개 상품)
for _, user_row in tqdm(mortgage_user_df.iterrows(), desc='mortgage reasoning', total=len(mortgage_user_df)):
    for _, loan_row in mortgage_loan_df.iterrows():
        user_data = user_row.to_dict()
        user_data.pop('LoanApproved', None)
        loan_data = loan_row.to_dict()

        loan_approved = user_row['LoanApproved']
        reasoning_response = generate_reasoning('mortgage', user_data, loan_data, loan_approved)

        RecSAVER_mortgage['reasoning_sentences'].append(reasoning_response)
        RecSAVER_mortgage['loan_approved'].append(loan_approved)

mortgage reasoning: 100%|██████████| 125/125 [41:44<00:00, 20.04s/it]


In [40]:
## 전세자금대출 reasoning - 개인별 500개(100명 X 5개 상품)
for _, user_row in tqdm(rent_user_df.iterrows(), desc='rent reasoning', total=len(rent_user_df)):
    for _, loan_row in rent_loan_df.iterrows():
        user_data = user_row.to_dict()
        user_data.pop('LoanApproved', None)
        loan_data = loan_row.to_dict()

        loan_approved = user_row['LoanApproved']
        reasoning_response = generate_reasoning('rent', user_data, loan_data, loan_approved)

        RecSAVER_rent['reasoning_sentences'].append(reasoning_response)
        RecSAVER_rent['loan_approved'].append(loan_approved)

rent reasoning:  98%|█████████▊| 98/100 [41:07<00:50, 25.18s/it]


TooManyRequests: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).

In [41]:
## 각각 500개씩 도합 1500개
credit_reasoning_df = pd.DataFrame(RecSAVER_credit)
credit_reasoning_df.to_csv(f'credit_reasoning_{NAME_ID}.csv', index=False)

mortgage_reasoning_df = pd.DataFrame(RecSAVER_mortgage)
mortgage_reasoning_df.to_csv(f'mortgage_reasoning_{NAME_ID}.csv', index=False)

rent_reasoning_df = pd.DataFrame(RecSAVER_rent)
rent_reasoning_df.to_csv(f'rent_reasoning_{NAME_ID}.csv', index=False)

In [42]:
print(f'credit_reasoning_df shape : {credit_reasoning_df.shape}')
print(f'mortgage_reasoning_df shape : {mortgage_reasoning_df.shape}')
print(f'rent_reasoning_df shape : {rent_reasoning_df.shape}')

credit_reasoning_df shape : (500, 2)
mortgage_reasoning_df shape : (500, 2)
rent_reasoning_df shape : (493, 2)
