In [1]:
import os
import boto3
from botocore.exceptions import ClientError

testing_list = sorted(os.listdir("test"))

print(f"{len(testing_list)} number  of testing")

6458 number  of testing


In [20]:
print(testing_list[1600])
print(testing_list[2230])

ACCT16914.json
ACCT19572.json


In [2]:
brt = boto3.client("bedrock-runtime")

model_id = "arn:aws:bedrock:us-west-2:455772463109:inference-profile/us.anthropic.claude-3-5-sonnet-20241022-v2:0"
# model_id = "arn:aws:bedrock:us-west-2:455772463109:inference-profile/us.anthropic.claude-3-5-sonnet-20241022-v2:0"

In [3]:
# Start a conversation with the user message.
user_message = '''
You are a professional banking and finance officer. Your current task is to determine, based on a given account’s transaction history, whether the account should be flagged as an alert (suspicious) account. Please judge carefully so as not to misclassify a normal account as suspicious.

## Notes:

1. Because ACCT31429 and ACCT31433 are internal bank intermediary accounts or virtual-number platform receiving accounts, most counterparties will be these accounts. If the account’s transactions are only with these accounts, do not regard that as problematic. Only consider it if the pattern of transactions with these accounts is abnormal.
2. Only consider transactions from the past one month.
3. Large transactions by themselves should not be deemed abnormal if they align with the customer’s asset level, income, or usual behavior.
4. Clearly distinguish between an alert account and a non-alert account by evaluating **combinations** of the following risk factors, not individual ones.

## Key Combined Features of Alert Accounts (must meet multiple criteria):

1. **Few large deposits followed by many small withdrawals**:
    - Presence of “a small number of large deposits” and “subsequent multiple small withdrawals”
    - Deposit and withdrawal totals nearly equal, indicating funds are “in–out balanced” or “quickly drained”
    - Withdrawal count significantly exceeds deposit count (usually ≥5:1)
2. **Abnormally active account**:
    - Daily average transaction count unusually high (usually >5/day)
    - High proportion of transactions outside business hours
    - Within a short period (e.g. 7 days), total transaction amount far exceeds account assets (AUM)
3. **Balance-reset behavior**:
    - After large transactions, the account balance repeatedly drops back to very low levels (e.g. around 1,000 TWD)
    - Multiple cycles of “large deposit → large withdrawal → balance zeroed out”
4. **Circular/hedging transactions**:
    - Similar amounts deposited and withdrawn on the same day or within a short timeframe
    - Net inflow ≈ net outflow, but large gross volumes
5. **Transaction patterns inconsistent with customer profile**:
    - Transaction amounts grossly mismatched with customer’s age or income level
    - New account (<6 months) immediately showing abnormal large fund flows
6. **Unidirectional Fund Flow Pattern**:
    - Only outgoing transactions during the observation period, no incoming transactions
    - Multiple consecutive outflows without corresponding income sources
    - Continuous outflows with unclear funding sources
7. **Non-standard Transaction Time Patterns**:
    - Early morning transactions (0:00-6:00), especially large or frequent ones
    - Combination patterns of late night and weekend transactions
    - Key account operations conducted during non-working hours
8. **Data Appearing Multiple Times or Repeatedly**
    - Consider all data we give is correct, no system error.
9. **Using Multiple Different Devices or IP Addresses for Transactions**

## Typical Features of Non-Alert Accounts:

1. Moderate transaction frequency (daily average <3)
2. Funds movements have reasonable explanations (e.g. salary credits, regular payments)
3. Large transactions may occur but infrequently and with clear purposes
4. Low proportion of transactions outside business hours, or if present, small amounts/frequency
5. Transaction patterns consistent with customer profile (age, income)
6. No obvious balance resets or hedging behavior

## Decision Criteria:

- **Alert Account**: Meets multiple alert criteria simultaneously, with patterns recurring or sustained.
- **Non-Alert Account**: May exhibit isolated risk factors, but overall transaction behavior is normal without multiple combined risk factors.

Using this framework, please provide an objective, comprehensive risk analysis report for the account, and indicate your conclusion as [Alert Account / Non-Alert Account], formatted as follows:

1. **Account Overview**
    
    [Review and present basic account information]
    
2. **Recent Transaction Summary**
    
    [Summarize transaction counts and key characteristics]
    
3. **Channels & Transaction Types Analysis**
    
    [Analyze channels and codes]
    
4. **Time Distribution Characteristics**
    
    [Analyze transaction timing patterns]
    
5. **Risk Factor Assessment**
    
    [Evaluate each risk factor in detail]
    
6. **Summary**
    
    [Overall evaluation]
    
7. **Conclusion**: [Alert Account / Non-Alert Account]

## Example:

- **Account Overview**
    - **Account Number**: ACCT4242
    - **Customer ID**: ID3986
    - **Age / Income / AUM**: 48 years old / 510K–2M TWD annual income / 6 TWD balance
    - **Digital Account**: Yes
    - **Account Tenure**: > 6 months
- **Recent Transaction Summary** (Last 30 Days)
    - **Total Transactions**: 7
    - **Incoming vs. Outgoing**: 2 incoming, 5 outgoing (incoming ratio 29 %)
    - **Total Volume**: 701,167 TWD
    - **Average Txn Amount**: 100,167 TWD
    - **Maximum Txn Amount**: 299,996 TWD
    - **Daily Average**: 2.33 txns/day over a 3-day span
- **Channels & Transaction Types Analysis**
    - **ATM (FEP-ATM-TXN)**: 3 deposits (2 × CD Transfer In via ACCT31429; 1 × large ATM deposit)
    - **Stored-Value Platform**: 4 withdrawals of exactly 49,995 TWD each via ACCT31450
    - **Digital-Bank-TXN**: Final large withdrawal (150,596 TWD) to ACCT31429
    - **Intermediary Accounts**: ACCT31429 / ACCT31450—normal counterparties but patterns matter
- **Time Distribution Characteristics**
    - **Friday**: 3 txns at 20:00–21:00
    - **Saturday**: 3 txns at 21:00–22:00
    - **Sunday**: 1 txn at 15:00
    - **After-Hours Concentration**: 6 of 7 txns occurred ≥ 20:00
- **Risk Factor Assessment**
    
    
    | Risk Factor | Observed | Explanation |
    | --- | --- | --- |
    | **1. Few large deposits → many smaller withdrawals** | Yes | Single large ATM deposit (299,996 TWD) followed by four equal withdrawals (~49,995 TWD) in quick succession |
    | **2. Abnormally active account** | No | Peaks at 3 txns/day (< 5/day threshold) |
    | **3. Balance-reset behavior** | Yes | Final digital-bank withdrawal (150,596 TWD) drains balance to 0 TWD shortly after large inflows |
    | **4. Circular/hedging transactions** | No | In/out amounts are not closely matched on the same day |
    | **5. Profile mismatch (volumes vs. AUM/income)** | Yes | Total 7-day volume (~701K TWD) is > 100 K × AUM (6 TWD); transaction sizes far exceed account balance |
    | **6. Unidirectional fund-flow** | No | Both inflows and outflows are present |
    | **7. Non-standard transaction times** | Yes | 86 % of txns took place between 20:00–06:00 |
    | **8. Repeated identical data entries** | N/A | No duplicate records |
    | **9. Multiple devices/IPs** | Yes | `is_same_device` and `is_same_ip` flags vary, indicating use of different endpoints |
- **Summary**
    - **Multiple red-flags** are present:
        - Large one-off deposit quickly split into multiple withdrawals (Pattern 1)
        - Balance entirely drained after large inflows (Pattern 3)
        - Transaction volumes hugely out of line with the tiny account balance (Pattern 5)
        - Concentration of activity in late-night hours (Pattern 7)
    - These combined patterns are classic indicators of layering or rapid fund movements that warrant scrutiny.
- **Conclusion**: **Alert Account**
'''


In [None]:

if not os.path.isdir("test_report"):
    os.mkdir("test_report")


"""
see discord for more details

"""
error_list = []

for i in range(0,800):

    pth = f"test/{testing_list[i]}"
    with open(pth, "r") as acc:
        conversation = [
            {
                "role": "user",
                "content": [{"text": user_message + str(acc.readlines())}],
            }
        ]

        try:
            print("running test ", i, " ", testing_list[i])
            response = brt.converse(
                modelId=model_id,
                messages=conversation,
                inferenceConfig={"maxTokens": 2000, "temperature": 0.5, "topP": 0.9},
            )
            response_text = response["output"]["message"]["content"][0]["text"]

        except (ClientError, Exception) as e:
            print(f"ERROR: Can't invoke '{model_id}'. Reason: {e}")
            error_list.append(testing_list[i])
            exit(1)

        s_pth = f"test_report/{testing_list[i].title()}.txt"
        
        with open(s_pth, "w") as f:
            f.write(response_text)    

running test  0   ACCT100.json
running test  1   ACCT10007.json
running test  2   ACCT10008.json
running test  3   ACCT1001.json
running test  4   ACCT10016.json


In [7]:
error_list

[]

In [5]:
print(testing_list[799])

ACCT13386.json


In [12]:
sus_list = sorted(os.listdir("suspicious"))
non_list = sorted(os.listdir("non_suspicious_400"))
print(f"{len(sus_list)} number  of testing")

400 number  of testing


In [None]:
if not os.path.isdir("sus_report"):
    os.mkdir("sus_report")

if not os.path.isdir("non_report"):
    os.mkdir("non_report")

error_list = []

for i in range(0,400):

    pth = f"suspicious/{sus_list[i]}"
    with open(pth, "r") as acc:
        conversation = [
            {
                "role": "user",
                "content": [{"text": user_message + str(acc.readlines())}],
            }
        ]
        try:
            print("running sus ", i, " ", sus_list[i])
            response = brt.converse(
                modelId=model_id,
                messages=conversation,
                inferenceConfig={"maxTokens": 2000, "temperature": 0.5, "topP": 0.9},
            )
            response_text = response["output"]["message"]["content"][0]["text"]

        except (ClientError, Exception) as e:
            print(f"ERROR: Can't invoke '{model_id}'. Reason: {e}")
            error_list.append(sus_list[i])
            exit(1)

        s_pth = f"sus_report/{sus_list[i].title()}.txt"
        
        with open(s_pth, "w") as f:
            f.write(response_text) 
            
    pth = f"non_suspicious_400/{non_list[i]}"
    with open(pth, "r") as acc:
        conversation = [
            {
                "role": "user",
                "content": [{"text": user_message + str(acc.readlines())}],
            }
        ]
        try:
            print("running sus ", i, " ", non_list[i])
            response = brt.converse(
                modelId=model_id,
                messages=conversation,
                inferenceConfig={"maxTokens": 2000, "temperature": 0.5, "topP": 0.9},
            )
            response_text = response["output"]["message"]["content"][0]["text"]

        except (ClientError, Exception) as e:
            print(f"ERROR: Can't invoke '{model_id}'. Reason: {e}")
            error_list.append(non_list[i])
            exit(1)

        s_pth = f"non_report/{non_list[i].title()}.txt"
        
        with open(s_pth, "w") as f:
            f.write(response_text)

running sus  0   suspicious_accounts_ACCT10004.json
running sus  0   non_suspicious_accounts_ACCT10217.json


In [14]:
import re

def extract_conclusion(text):
    # 正則表達式：抓 Conclusion 後，可能有冒號，可能有 ** 或 [] 包住 Alert/Non-Alert Account
    match = re.search(r'Conclusion\*?\*?:?\s*(?:\*\*|\[)?(Alert Account|Non-Alert Account)(?:\*\*|\])?', text, re.IGNORECASE)
    if match:
        return match.group(1)  # 只回傳 Alert Account 或 Non-Alert Account
    else:
        return None

# 測試各種情況
test_cases = [
    "7. **Conclusion**: **Alert Account**",
    "7. **Conclusion**: [Alert Account]",
    "7. Conclusion: Alert Account",
    "7. Conclusion: [Non-Alert Account]",
    "7) Conclusion: **Non-Alert Account**",
    "Conclusion: Alert Account",
    "Conclusion: [Non-Alert Account]"
]

for i, text in enumerate(test_cases):
    print(f"Test case {i+1}: {extract_conclusion(text)}")

Test case 1: Alert Account
Test case 2: Alert Account
Test case 3: Alert Account
Test case 4: Non-Alert Account
Test case 5: Non-Alert Account
Test case 6: Alert Account
Test case 7: Non-Alert Account


In [15]:
import pandas as pd
folder_path = './sus_report'

# 儲存結果
results = []
non = 0
sus = 0
# 批次讀取資料夾中的每個檔案
for filename in os.listdir(folder_path):
    if filename.endswith('.txt') or filename.endswith('.md') or filename.endswith('.json'):  # 看你檔案格式調整
        file_path = os.path.join(folder_path, filename)
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()
            conclusion = extract_conclusion(content)
            results.append(conclusion)
            if conclusion == 'Non-Alert Account':
                non += 1
            else:
                sus += 1

print(non)
print(sus)

96
304


In [16]:
folder_path = './non_report'

# 儲存結果
results = []
non = 0
sus = 0
# 批次讀取資料夾中的每個檔案
for filename in os.listdir(folder_path):
    if filename.endswith('.txt') or filename.endswith('.md') or filename.endswith('.json'):  # 看你檔案格式調整
        file_path = os.path.join(folder_path, filename)
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()
            conclusion = extract_conclusion(content)
            results.append(conclusion)
            if conclusion == 'Non-Alert Account':
                non += 1
            else:
                sus += 1

print(non)
print(sus)

384
16
