In [7]:
# -------------------------------
# 0) Imports
# -------------------------------
import pandas as pd
# from langchain_openai import ChatOpenAI
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain
import os
import getpass
# from IPython.display import display

In [2]:
# -------------------------------
# 1) Load CSV into DataFrame
# -------------------------------
file_path = "banking_issues_all.csv" 
df_banking_issues = pd.read_csv(file_path)

In [4]:
display(df_banking_issues)

Unnamed: 0,emp_id,issue_description
0,11234,"Associate mistakenly credited INR 50,000 twice..."
1,11567,Employee entered wrong IFSC code while process...
2,11982,"Associate posted INR 4,350 as loan repayment a..."
3,12345,Teller forgot to obtain KYC verification docum...
4,12891,Staff miscalculated fixed deposit maturity amo...
5,13208,Employee delayed processing of RTGS transactio...
6,13677,"Wrong debit of INR 7,200 to a corporate accoun..."
7,14112,Associate incorrectly applied service charge o...
8,14753,Staff entered cheque number incorrectly while ...
9,15240,Employee uploaded an outdated interest rate ta...


In [8]:
# Setup Google Generative AI
if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Provide your Google API key here ")

In [9]:
# -------------------------------
# 2) Setup LLM (requires OPENAI_API_KEY or GOOGLE_API_KEY)
# -------------------------------
# llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0)

# Prompt template for issue_summary and multi-label risk_type
prompt_template = ChatPromptTemplate.from_template("""
You are an expert in banking risk management.

Issue description: {issue_description}

1. Provide a concise 1-2 line summary of the issue.
2. Assign one or more risk types from the following categories (multi-label allowed):
   - Operational Risk
   - Technology Risk
   - Compliance Risk
   - Financial Risk
   - Reputational Risk
   - Other

Return your answer in JSON format with keys: issue_summary (string), risk_type (list of strings).
""")

chain = LLMChain(llm=llm, prompt=prompt_template)

  chain = LLMChain(llm=llm, prompt=prompt_template)


In [None]:
# # RunnableSequence style
# # Define LLM
# # llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
# llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0)

# # Define prompt
# prompt_template = ChatPromptTemplate.from_template("""
# You are an expert in banking risk management.

# Issue description: {issue_description}

# 1. Provide a concise 1-2 line summary of the issue.
# 2. Assign one or more risk types from the following categories (multi-label allowed):
#    - Operational Risk
#    - Technology Risk
#    - Compliance Risk
#    - Financial Risk
#    - Reputational Risk
#    - Other

# Return your answer in JSON format with keys: issue_summary (string), risk_type (list of strings).
# """)

# # Build chain using RunnableSequence
# chain = prompt_template | llm

# # # Run on multiple inputs
# # response = chain.invoke({"issue_description": "Banking operations were halted for 30 minutes due to server outage."})

# # print(response.content)  # LLM response text


In [10]:
# -------------------------------
# 3) Function - Rule-based multi-label fallback classifier
# -------------------------------
def rule_based_risk_classification(issue_description: str) -> list:
    desc = issue_description.lower()
    labels = []

    if any(word in desc for word in ["server", "system", "app", "software", "technology", "portal", "atm", "network"]):
        labels.append("Technology Risk")
    if any(word in desc for word in ["delay", "manual", "clerical", "error", "batch", "process", "job failure"]):
        labels.append("Operational Risk")
    if any(word in desc for word in ["compliance", "kyc", "regulation", "audit"]):
        labels.append("Compliance Risk")
    if any(word in desc for word in ["interest", "charges", "payment", "debit", "credit", "funds", "amount"]):
        labels.append("Financial Risk")
    if any(word in desc for word in ["complaint", "apology", "customer dissatisfaction", "trust", "reputation"]):
        labels.append("Reputational Risk")

    if not labels:
        labels.append("Other")

    return labels

In [12]:
# -------------------------------
# 4) Get issue_description from DataFrame and LLM calls
# -------------------------------
summaries = []
risks = []

for desc in df_banking_issues["issue_description"]:
    try:
        # Try LLM first
        response = chain.run(issue_description=desc)
        # response = chain.invoke({"issue_description": desc})
        result = eval(response)  # assume JSON-like output
        issue_summary = result.get("issue_summary", "").strip()
        risk_type = result.get("risk_type", [])

        # Fallback if LLM gives empty response
        if not issue_summary:
            issue_summary = desc[:100] + "..."
        if not risk_type:
            risk_type = rule_based_risk_classification(desc)

    except Exception:
        # If LLM fails, use rule-based fallback
        issue_summary = desc[:100] + "..."
        risk_type = rule_based_risk_classification(desc)

    summaries.append(issue_summary)
    risks.append(", ".join(risk_type))  # store as comma-separated string

df_banking_issues["issue_summary"] = summaries
df_banking_issues["risk_type"] = risks

  response = chain.run(issue_description=desc)


In [13]:
display(df_banking_issues)

Unnamed: 0,emp_id,issue_description,issue_summary,risk_type
0,11234,"Associate mistakenly credited INR 50,000 twice...","Associate mistakenly credited INR 50,000 twice...",Financial Risk
1,11567,Employee entered wrong IFSC code while process...,Employee entered wrong IFSC code while process...,"Operational Risk, Reputational Risk"
2,11982,"Associate posted INR 4,350 as loan repayment a...","Associate posted INR 4,350 as loan repayment a...","Technology Risk, Compliance Risk, Financial Risk"
3,12345,Teller forgot to obtain KYC verification docum...,Teller forgot to obtain KYC verification docum...,Compliance Risk
4,12891,Staff miscalculated fixed deposit maturity amo...,Staff miscalculated fixed deposit maturity amo...,Financial Risk
5,13208,Employee delayed processing of RTGS transactio...,Employee delayed processing of RTGS transactio...,Operational Risk
6,13677,"Wrong debit of INR 7,200 to a corporate accoun...","Wrong debit of INR 7,200 to a corporate accoun...","Operational Risk, Financial Risk"
7,14112,Associate incorrectly applied service charge o...,Associate incorrectly applied service charge o...,"Technology Risk, Financial Risk, Reputational ..."
8,14753,Staff entered cheque number incorrectly while ...,Staff entered cheque number incorrectly while ...,Reputational Risk
9,15240,Employee uploaded an outdated interest rate ta...,Employee uploaded an outdated interest rate ta...,"Technology Risk, Financial Risk"


In [14]:
# -------------------------------
# 5) Save updated DataFrame
# -------------------------------
df_banking_issues.to_csv("banking_issues_with_multi_risks_tagging.csv", index=False)

print("✅ Processing complete! File saved as 'banking_issues_with_multi_risks_tagging.csv'")

✅ Processing complete! File saved as 'banking_issues_with_multi_risks_tagging.csv'
