# **Agricultural Bank of China NY Branch - Suspicious Activity Report (SAR) Generator Using GenAI**

This Generator will focus on generating the SAR based on the **1st** Alert Narrative. 

-----

## Configuration Steps

In [1]:
#Only run this one time 
#pip install boto3

In [3]:
#Establishing acccess
{
    "Effect": "Allow",
    "Action": [
        "bedrock:InvokeModel",
        "bedrock:InvokeModelWithResponseStream",
        "bedrock:ListFoundationModels",
        "bedrock:GetFoundationModel"
    ],
    "Resource": "*"
}

{'Effect': 'Allow',
 'Action': ['bedrock:InvokeModel',
  'bedrock:InvokeModelWithResponseStream',
  'bedrock:ListFoundationModels',
  'bedrock:GetFoundationModel'],
 'Resource': '*'}

In [4]:
#Detailing all models available for use
!aws bedrock list-foundation-models --region us-east-1

{
    "modelSummaries": [
        {
            "modelArn": "arn:aws:bedrock:us-east-1::foundation-model/amazon.titan-tg1-large",
            "modelId": "amazon.titan-tg1-large",
            "modelName": "Titan Text Large",
            "providerName": "Amazon",
            "inputModalities": [
                "TEXT"
            ],
            "outputModalities": [
                "TEXT"
            ],
            "responseStreamingSupported": true,
            "customizationsSupported": [],
            "inferenceTypesSupported": [
                "ON_DEMAND"
            ],
            "modelLifecycle": {
                "status": "ACTIVE"
            }
        },
        {
            "modelArn": "arn:aws:bedrock:us-east-1::foundation-model/amazon.titan-image-generator-v1:0",
            "modelId": "amazon.titan-image-generator-v1:0",
            "modelName": "Titan Image Generator G1",
            "providerName": "Amazon",
            "inputModalities": [
                "TEXT",
     

### Setting up connection to LLM via Bedrock

In [6]:
import boto3
import json

#Initializing Bedrock client
bedrock = boto3.client("bedrock-runtime", region_name="us-east-1")

#Using the Model Inference ID from AWS Console
inference_id = "us.meta.llama3-3-70b-instruct-v1:0" 

#Inference ID for DeepSeek is us.deepseek.r1-v1:0 
#If they interested in Anthropic or Nova?

In [8]:
#Setting up a function to call the LLM
def call_llm(prompt):
    """Invoke AWS Bedrock using Model Inference ID with the correct request format."""
    try:
        response = bedrock.invoke_model(
            modelId=inference_id,  
            body=json.dumps({
                "prompt": prompt  
            }),
            contentType="application/json",
            accept="application/json"
        )

        #Parsing response
        response_body = json.loads(response['body'].read().decode('utf-8'))
        return response_body 
        
    except json.JSONDecodeError:
        return {"error": "Failed to parse response from Bedrock."}
    except Exception as e:
        return {"error": f"An unexpected error occurred: {str(e)}"}

## Database Creation

In [10]:
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT

#Connecting to the PostgreSQL default database
conn = psycopg2.connect(
    dbname="postgres",
    user="postgres",
    password="yourpassword",
    host="localhost",
    port="5432"
)

#Set ISOLATION_LEVEL_AUTOCOMMIT
conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)

cursor = conn.cursor()

#Creating a Database
cursor.execute("CREATE DATABASE aml_database;")

#Closing the connection
cursor.close()
conn.close()

print("Database created successfully!")

DuplicateDatabase: database "aml_database" already exists


### Check whether the database is connected normally: SELECT version()

In [13]:
conn = psycopg2.connect(
    dbname="aml_database",  # Connect to the newly created database
    user="postgres",
    password="yourpassword",
    host="localhost",
    port="5432"
)
cursor = conn.cursor()

#Checking whether the database is connected normally
cursor.execute("SELECT version();")
print(cursor.fetchone())

#Closing the connection
cursor.close()
conn.close()

('PostgreSQL 17.4 on x86_64-windows, compiled by msvc-19.42.34436, 64-bit',)


### Creating the 4 Applicable Tables

In [16]:
# Reconnect to aml_database
conn = psycopg2.connect(
    dbname="aml_database",
    user="postgres",
    password="yourpassword",
    host="localhost",
    port="5432"
)
cursor = conn.cursor()

# SQL Statements
create_tables_query = '''
DROP TABLE IF EXISTS Transaction;
DROP TABLE IF EXISTS Alert;
DROP TABLE IF EXISTS Account;
DROP TABLE IF EXISTS Customer;

CREATE TABLE Customer (
    customer_id VARCHAR(50) PRIMARY KEY,
    customer_type VARCHAR(50) NOT NULL,
    customer_name VARCHAR(100) NOT NULL,
    customer_line_of_business VARCHAR(100),
    customer_expected_products TEXT,
    customer_expected_geographies TEXT,
    customer_incorporation_residence_country VARCHAR(10) NOT NULL
);

CREATE TABLE Account (
    account_id VARCHAR(50) PRIMARY KEY,
    customer_id VARCHAR(50) REFERENCES Customer(customer_id) ON DELETE CASCADE,
    date_of_opening DATE NOT NULL,
    expected_incoming_activity NUMERIC(15,2),
    expected_outgoing_activity NUMERIC(15,2)
);

CREATE TABLE Alert (
    detection_id VARCHAR(50),
    alert_id VARCHAR(50) NOT NULL,
    alert_date DATE NOT NULL,
    customer_id VARCHAR(50) REFERENCES Customer(customer_id) ON DELETE SET NULL,
    rule_name VARCHAR(255) NOT NULL,
    alerted_transactions VARCHAR(50),
    false_positive_true_positive VARCHAR(20),
    alert_narrative TEXT,
    PRIMARY KEY (detection_id, alerted_transactions)  -- Composite Primary Key
);

CREATE TABLE Transaction (
    transaction_id VARCHAR(50) PRIMARY KEY,
    transaction_date DATE NOT NULL,
    transaction_type VARCHAR(100) NOT NULL,
    customer_id VARCHAR(50) REFERENCES Customer(customer_id) ON DELETE SET NULL,
    account_id VARCHAR(50) REFERENCES Account(account_id) ON DELETE CASCADE,
    incoming_outgoing VARCHAR(20) CHECK (incoming_outgoing IN ('Incoming', 'Outgoing')),
    amount NUMERIC(15,2) NOT NULL,
    originator VARCHAR(100),
    originator_country VARCHAR(10),
    beneficiary VARCHAR(100),
    beneficiary_country VARCHAR(10)
);
'''

# Execute SQL statements
cursor.execute(create_tables_query)
conn.commit()

print("All tables created successfully!")

# Close the connection
cursor.close()
conn.close()

All tables created successfully!


### Importing four local CSV Files into database

In [18]:
import pandas as pd

# Reestablishing a connection
conn = psycopg2.connect(
    dbname="aml_database",
    user="postgres",
    password="yourpassword",
    host="localhost",
    port="5432"
)
cursor = conn.cursor()

# ------------------- 1. Import Customer table ------------------- #
print("Starting import of Customer data...")
customer_df = pd.read_csv("/Users/moria/Documents/Mirands/ABC/Database/Customer Table.csv")

# Remove duplicate customer_id within the CSV itself
customer_df = customer_df.drop_duplicates(subset=["Customer ID"])

for _, row in customer_df.iterrows():
    cursor.execute(
        """INSERT INTO Customer (customer_id, customer_type, customer_name, customer_line_of_business, 
                                 customer_expected_products, customer_expected_geographies, customer_incorporation_residence_country) 
           VALUES (%s, %s, %s, %s, %s, %s, %s)
           ON CONFLICT (customer_id) DO NOTHING""",  # <- Skip if exists
        (row["Customer ID"], row["Customer Type"], row["Customer Name"], row["Customer Line of Business"], 
         row["Customer Expected Products"], row["Customer Expected Geographies"], row["Customer Incorporation/Residence Country"])
    )

conn.commit()
print("Customer data import completed!")

# ------------------- 2. Import Account table ------------------- #
print("\nStart importing Account data...")
account_df = pd.read_csv("/Users/moria/Documents/Mirands/ABC/Database/Account Table.csv")
# Remove duplicate account_id
account_df = account_df.drop_duplicates(subset=["Account ID"])

# Import only customer_ids that exist in the Customer table
cursor.execute("SELECT customer_id FROM Customer")
valid_customers = {row[0] for row in cursor.fetchall()}

for _, row in account_df.iterrows():
    if row["Customer ID"] in valid_customers:
        cursor.execute(
            "INSERT INTO Account (account_id, customer_id, date_of_opening, expected_incoming_activity, expected_outgoing_activity) VALUES (%s, %s, DATE '1900-01-01' + INTERVAL '1 day' * %s, %s, %s) ON CONFLICT (account_id) DO NOTHING",
            (row["Account ID"], row["Customer ID"], row["Date of Opening"], row["Expected Incoming Activity"], row["Expected Outgoing Activity"])
        )
conn.commit()
print("Account data import completed!")

# ------------------- 3. Import Alert table ------------------- #
print("\nStart importing Alert data...")
alert_df = pd.read_csv("/Users/moria/Documents/Mirands/ABC/Database/Alert Table.csv")

# Convert Alert Date from Excel Serial Date format to YYYY-MM-DD
alert_df["Alert Date"] = pd.to_datetime(alert_df["Alert Date"], origin="1899-12-30", unit="D")

for _, row in alert_df.iterrows():
    customer_id = row["Customer ID"] if pd.notna(row["Customer ID"]) and row["Customer ID"] in valid_customers else None

    cursor.execute(
        """INSERT INTO Alert (detection_id, alert_id, alert_date, customer_id, rule_name, alerted_transactions, 
                              false_positive_true_positive, alert_narrative) 
           VALUES (%s, %s, %s, %s, %s, %s, %s, %s) 
           ON CONFLICT (detection_id, alerted_transactions) DO NOTHING""",  
        (row["Detection ID"], row["Alert ID"], row["Alert Date"].date(), customer_id, row["Rule Name"],
         row["Alerted Transactions per Detection"], row["False Positive / True Positive"], row["Alert Narrative"])
    )

conn.commit()
print("Alert data import completed!")

# ------------------- 4. Import Transaction table ------------------- #

# Load Transaction CSV
transaction_df = pd.read_csv("/Users/moria/Documents/Mirands/ABC/Database/Transaction Table.csv")

# 🔹 Fix: Convert Excel Serial Date to YYYY-MM-DD
transaction_df["Transaction Date"] = pd.to_datetime(transaction_df["Transaction Date"], origin="1899-12-30", unit="D")

print("\nStart importing transaction data...")

# Remove duplicate transaction_id
transaction_df = transaction_df.drop_duplicates(subset=["Transaction ID"])

# Get a valid customer_id and account_id
cursor.execute("SELECT customer_id FROM Customer")
valid_customers = {row[0] for row in cursor.fetchall()}

cursor.execute("SELECT account_id FROM Account")
valid_accounts = {row[0] for row in cursor.fetchall()}

for _, row in transaction_df.iterrows():
    customer_id = row["Customer ID"] if pd.notna(row["Customer ID"]) and row["Customer ID"] in valid_customers else None
    account_id = row["Account"] if pd.notna(row["Account"]) and row["Account"] in valid_accounts else None

    if account_id is not None:  # account_id is required because it is a foreign key
        # Make sure all None are handled correctly
        transaction_values = (
            row["Transaction ID"],
            row["Transaction Date"],
            row["Transaction Type"],
            customer_id,
            account_id,
            row["Incoming/Outgoing"] if pd.notna(row["Incoming/Outgoing"]) else None,
            row["Amount"] if pd.notna(row["Amount"]) else 0,  # If the numeric column is empty, replace it with 0, or use None as needed
            row["Originator"] if pd.notna(row["Originator"]) else None,
            row["Originator Country"] if pd.notna(row["Originator Country"]) else None,
            row["Beneficiary"] if pd.notna(row["Beneficiary"]) else None,
            row["Beneficiary Country"] if pd.notna(row["Beneficiary Country"]) else None
        )

        cursor.execute(
            "INSERT INTO Transaction (transaction_id, transaction_date, transaction_type, customer_id, account_id, incoming_outgoing, amount, originator, originator_country, beneficiary, beneficiary_country) "
            "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
            "ON CONFLICT (transaction_id) DO NOTHING",
            transaction_values
        )

conn.commit()
print("Transaction data import completed!")

# Close the database connection
cursor.close()
conn.close()
print("\nAll data has been successfully imported into the database!")

Starting import of Customer data...
Customer data import completed!

Start importing Account data...
Account data import completed!

Start importing Alert data...
Alert data import completed!

Start importing transaction data...
Transaction data import completed!

All data has been successfully imported into the database!


### Confirming data was successfully imported

In [21]:
from sqlalchemy import create_engine
#import pandas as pd

# Creating a SQLAlchemy Engine
db_user = "postgres"
db_password = "yourpassword"  # Replace with your password
db_host = "localhost"
db_name = "aml_database"

engine = create_engine(f"postgresql+psycopg2://{db_user}:{db_password}@{db_host}/{db_name}")

In [23]:
# ------------------- 1. View the Customer table ------------------- #
print("\nViewing data from the Customer table...")
query = "SELECT * FROM Customer LIMIT 10;"
df_customer = pd.read_sql(query, engine)
display(df_customer)


Viewing data from the Customer table...


Unnamed: 0,customer_id,customer_type,customer_name,customer_line_of_business,customer_expected_products,customer_expected_geographies,customer_incorporation_residence_country
0,C-1,Individual,John Diamond,Manufacturing,ACH; Wire,US,US
1,C-2,Business,RDF Plumbing,Plumbing Services,ACH; Wire; Cash Deposit; Internal Transfer,US,US
2,C-3,Individual,Kyle Strong,Service Industry,ACH; Wire; Cash Deposit; Internal Transfer,US; HK,HK
3,C-4,Business,JDF Industries,Oil refinement,ACH; Wire,US; SA,US


In [26]:
# ------------------- 2. View the Account table ------------------- #
print("\nViewing data from the Account table...")
query = "SELECT * FROM Account LIMIT 10;"
df_account = pd.read_sql(query, engine)
display(df_account)


Viewing data from the Account table...


Unnamed: 0,account_id,customer_id,date_of_opening,expected_incoming_activity,expected_outgoing_activity
0,ACC-1,C-1,1980-03-03,100000.0,10000.0
1,ACC-2,C-2,2010-01-03,200000.0,200000.0
2,ACC-3,C-2,2024-02-17,200000.0,200000.0
3,ACC-4,C-3,2024-09-03,2000.0,2000.0
4,ACC-5,C-4,2007-07-04,10000000.0,10000000.0


In [28]:
# ------------------- 3. View the Alert table ------------------- #
print("\nViewing data from the Alert table...")
query = "SELECT * FROM Alert LIMIT 10;"
df_alert = pd.read_sql(query, engine)
display(df_alert)


Viewing data from the Alert table...


Unnamed: 0,detection_id,alert_id,alert_date,customer_id,rule_name,alerted_transactions,false_positive_true_positive,alert_narrative
0,A-1-1,A-1,2024-10-01,C-1,Cash Structuring $10k,T-1,True Positive,No reasonable explanation for customer activit...
1,A-1-1,A-1,2024-10-01,C-1,Cash Structuring $10k,T-2,True Positive,No reasonable explanation for customer activit...
2,A-1-1,A-1,2024-10-01,C-1,Cash Structuring $10k,T-3,True Positive,No reasonable explanation for customer activit...
3,A-1-1,A-1,2024-10-01,C-1,Cash Structuring $10k,T-4,True Positive,No reasonable explanation for customer activit...
4,A-1-1,A-1,2024-10-01,C-1,Cash Structuring $10k,T-5,True Positive,No reasonable explanation for customer activit...
5,A-1-1,A-1,2024-10-01,C-1,Cash Structuring $10k,T-6,True Positive,No reasonable explanation for customer activit...
6,A-1-1,A-1,2024-10-01,C-1,Cash Structuring $10k,T-7,True Positive,No reasonable explanation for customer activit...
7,A-1-2,A-1,2024-10-01,C-1,Cash Structuring $10k,T-7,True Positive,No reasonable explanation for customer activit...
8,A-1-2,A-1,2024-10-01,C-1,Cash Structuring $10k,T-8,True Positive,No reasonable explanation for customer activit...
9,A-1-2,A-1,2024-10-01,C-1,Cash Structuring $10k,T-9,True Positive,No reasonable explanation for customer activit...


In [30]:
# ------------------- 4. View the Transaction table ------------------- #
print("\nViewing data of Transaction table...")
query = "SELECT * FROM Transaction LIMIT 10;"
df_transaction = pd.read_sql(query, engine)
display(df_transaction)

print("\nThe data of all tables has been successfully displayed!")


Viewing data of Transaction table...


Unnamed: 0,transaction_id,transaction_date,transaction_type,customer_id,account_id,incoming_outgoing,amount,originator,originator_country,beneficiary,beneficiary_country
0,T-1,2024-09-02,Cash Deposit,C-1,ACC-1,Incoming,9000.0,John Diamond,US,John Diamond,US
1,T-2,2024-09-03,Cash Deposit,C-1,ACC-1,Incoming,9000.0,John Diamond,US,John Diamond,US
2,T-3,2024-09-04,Cash Deposit,C-1,ACC-1,Incoming,9000.0,John Diamond,US,John Diamond,US
3,T-4,2024-09-05,Cash Deposit,C-1,ACC-1,Incoming,9000.0,John Diamond,US,John Diamond,US
4,T-5,2024-09-06,Cash Deposit,C-1,ACC-1,Incoming,9000.0,John Diamond,US,John Diamond,US
5,T-6,2024-09-07,Cash Deposit,C-1,ACC-1,Incoming,9000.0,John Diamond,US,John Diamond,US
6,T-7,2024-09-08,Cash Deposit,C-1,ACC-1,Incoming,9000.0,John Diamond,US,John Diamond,US
7,T-8,2024-09-09,Cash Deposit,C-1,ACC-1,Incoming,9000.0,John Diamond,US,John Diamond,US
8,T-9,2024-09-10,Cash Deposit,C-1,ACC-1,Incoming,9000.0,John Diamond,US,John Diamond,US
9,T-10,2024-09-11,Cash Deposit,C-1,ACC-1,Incoming,9000.0,John Diamond,US,John Diamond,US



The data of all tables has been successfully displayed!


In [32]:
engine.dispose()
print("\nDatabase connection closed.")


Database connection closed.


## Static Part Generation

### Transaction Header - Section 1 

In [36]:
#Defining Alert Narrative File Path
#This can be updated as needed to test out other alerts
alert_file_path = "/Users/moria/Documents/Mirands/ABC/Knowledge/ExampleInput1.txt"

In [38]:
import re

#Function to read alert narrative and extract customer ID from a specific alert narrative
def extract_customer_id(alert_file_path):
    with open(alert_file_path, 'r', encoding='utf-8') as file:
        content = file.read()
    
    #Using regex to find "CIN: C-<number>" format
    match = re.search(r'CIN:\s*(C-\d+)', content)
    if match:
        return match.group(1)  
    else:
        return None

In [40]:
import psycopg2

#Function to fetch SAR data from database
def fetch_sar_data(SAR_reference):
    conn = psycopg2.connect(
        dbname="aml_database",
        user="postgres",
        password="yourpassword",
        host="localhost",
        port="5432"
    )
    cursor = conn.cursor()

    query = """
    WITH filtered_transactions AS (
        SELECT DISTINCT t.transaction_id, t.amount, t.transaction_date
        FROM Transaction t
        WHERE t.transaction_id IN (
            SELECT DISTINCT UNNEST(string_to_array(a.alerted_transactions, ',')) 
            FROM Alert a
            WHERE a.customer_id = %s
            AND a.false_positive_true_positive = 'True Positive'
        )
    )
    SELECT 
        COUNT(transaction_id) AS transaction_count,  
        SUM(amount) AS total_transaction_amount,     
        MIN(transaction_date) AS start_date,         
        MAX(transaction_date) AS end_date            
    FROM filtered_transactions;
    """
    
    cursor.execute(query, (SAR_reference,))
    result = cursor.fetchone()
    
    conn.close()
    
    if result:
        return {
            "transaction_count": result[0],
            "total_transaction_amount": result[1],
            "start_date": result[2],
            "end_date": result[3],
        }
    else:
        return None

#Function to generate transaction header with formatted values
def generate_transaction_header(sar_data):
    
    # Format transaction amount as "$XXX,XXX.XX"
    formatted_amount = f"${sar_data['total_transaction_amount']:,.2f}"

    # Format dates to "MM/DD/YYYY"
    formatted_start_date = sar_data["start_date"].strftime('%m/%d/%Y')
    formatted_end_date = sar_data["end_date"].strftime('%m/%d/%Y')

    text = (
        'LLM Bank New York Branch ("LLM NY") is a wholesale branch of LLM Bank Ltd. ("LLM"), '
        'a commercial bank located in mainland China. LLM NY is filing this Suspicious Activity Report ("SAR") '
        '(Internal SAR Reference Number 2025-0001) to report {transaction_count} transactions totaling '
        '{total_transaction_amount} and sent between {start_date} and {end_date}.'
    )

    return text.format(
        transaction_count=sar_data["transaction_count"],
        total_transaction_amount=formatted_amount,
        start_date=formatted_start_date,
        end_date=formatted_end_date
    )

In [42]:
#Extract customer ID from the alert narrative
customer_id = extract_customer_id(alert_file_path)

if customer_id:
    print(f"Extracted Client ID: {customer_id}\n")  # Adds a blank line after output

    sar_data = fetch_sar_data(customer_id)

    if sar_data:
        transaction_header = generate_transaction_header(sar_data)
        print(transaction_header)
    else:
        print("No data found for the given customer ID.")
else:
    print("Customer ID not found in the alert narrative.")

Extracted Client ID: C-1

LLM Bank New York Branch ("LLM NY") is a wholesale branch of LLM Bank Ltd. ("LLM"), a commercial bank located in mainland China. LLM NY is filing this Suspicious Activity Report ("SAR") (Internal SAR Reference Number 2025-0001) to report 13 transactions totaling $213,000.00 and sent between 09/02/2024 and 09/14/2024.


**For now have manually input the SAR reference but we need a new table for this**

### Legal Contact Information - Section 3 

**NEED TO UPDATE BASED ON SAR REFERENCE TABLE**

In [47]:
#Generating Section 3 - Legal Contact Information
def generate_legal_contact_information2(SAR_ref):
    text = (
        "This SAR pertains to LLM NY Case No. {SAR_ref}. For inquiries, please contact Donald J. Orange, "
        "Chief Compliance Officer and Chief BSA/AML Officer (646-555-5555 or donaldjorange@llmbank.com) or "
        "Alyn Mask, General Counsel (646-666-6666 or alynmask@llmbank.com). All supporting documentation is "
        "maintained by the Financial Crime Compliance Department at LLM NY."
    ).format(SAR_ref=SAR_ref)
    return text

### With database - for later use

In [49]:
import psycopg2

#Function to fetch SAR data from database
def fetch_sar_data(case_number):
    conn = psycopg2.connect(
        dbname="aml_database",
        user="postgres",
        password="yourpassword",
        host="localhost",
        port="5432"
    )
    cursor = conn.cursor()

    #UPDATE TO JUST PULL THE SAR Reference Number - Need to make a new table for this
    query = """
    SELECT (SAR_reference) AS SAR_ref
    FROM ________
    """
    
    cursor.execute(query, (case_number,))
    result = cursor.fetchone()
    
    conn.close()
    
    if result:
        return {
            "SAR_ref": result[0],
        }
    else:
        return None

#Function to generate transaction header with formatted values
def generate_legal_contact_information(sar_data):
    text = (
        "This SAR pertains to LLM NY Case No. {SAR_ref}. For inquiries, please contact Donald J. Orange, "
        "Chief Compliance Officer and Chief BSA/AML Officer (646-555-5555 or donaldjorange@llmbank.com) or "
        "Alyn Mask, General Counsel (646-666-6666 or alynmask@llmbank.com). All supporting documentation is "
        "maintained by the Financial Crime Compliance Department at LLM NY."
    )

    return text.format(
        SAR_ref=sar_data["SAR_ref"],
    )

## RAG - Dynamic Part Generation

### Including the knowledge for the RAG

In [53]:
#Loading RAG Training Documents
def load_text_file(filepath):
    """Reads a text file and returns its content as a string."""
    with open(filepath, 'r', encoding='utf-8') as file:
        return file.read()

#Loading reference documents for RAG training
reference_docs = {
    "ExampleInput2": load_text_file("/Users/moria/Documents/Mirands/ABC/Knowledge/ExampleInput2.txt"),
    "ExampleInput3": load_text_file("/Users/moria/Documents/Mirands/ABC/Knowledge/ExampleInput3.txt"),
    "ExampleSAR2": load_text_file("/Users/moria/Documents/Mirands/ABC/Knowledge/ExampleSAR2.txt"),
    "ExampleSAR3": load_text_file("/Users/moria/Documents/Mirands/ABC/Knowledge/ExampleSAR3.txt"),
    "Introduction": load_text_file("/Users/moria/Documents/Mirands/ABC/Knowledge/Introduction.txt")}

In [55]:
import pandas as pd
import psycopg2
import json

#Detailing the input alert file
example_input1_text = load_text_file("/Users/moria/Documents/Mirands/ABC/Knowledge/ExampleInput1.txt")

#Constructing the Final Input for Bedrock
final_input = f"""
The system has access to the following reference documents, which should be used to understand how an alert narrative transforms into a SAR:

- ExampleInput2.txt → Corresponds to ExampleSAR2.txt
- ExampleInput3.txt → Corresponds to ExampleSAR3.txt
- Introduction.txt → Provides additional context

The current alert narrative that needs to be processed:

{example_input1_text}

{"""
Objective 
Based on the provided input, generate a formal, structured, and human-readable section of a Suspicious Activity Report (SAR) containing the following three sections: 

Suspicious Transactions
-	Detail only the transactions classified as suspicious. 
-	Transactions explicitly determined to be reasonable should be excluded from this section. 
-	Ensure that details are given about all transactions classified as suspicious. 
-	Do not include any information regarding KYC - Know Your Customer details in this section. 

Customer Information (KYC - Know Your Customer Details)
-	Provide key client details such as DOB, SSN, address, occupation, and connections to involved companies. 
-	Also, include any external research findings about the lines of business of companies involved in the transfers. 

Reason for Reporting 
Begin with the exact text: "This transaction is being reported due to the following:" followed by a numbered list (1), (2), (3), … (n) outlining the specific reasons for the report. 

The generated text must follow the formal style and structure observed in the example SARs and should be presented as a continuous narrative without section headings, but with proper paragraph separation. 

Input 
The input will be extracted from a document titled "Alert Narrative." as well as data from a query of all client data for the customer ID detailed in the "Alert Narrative". 

The "Alert Narrative" file and the query data, saved under the variable "db_data" contain the necessary details for constructing the SAR. 

The agent should extract and analyze: 

Case Summary 
The timeframe of the reviewed transactions. 
A general explanation of the suspicious activity. 

Suspicious Transactions 
Extract only transactions flagged as suspicious. 

Each transaction must include: 
Date 
Amount 
Transaction type (e.g., wire transfer, cash deposit) 
Counterparty details (jurisdiction if relevant) 
Account numbers (when applicable) 
A specific description of the involved accounts and transactions, identifying if known, both the origination and application of funds (usually identified in chronological order by date and amount); 
Breaking out larger volumes of financial activity into categories of credits and debits, and by date and amount; 
Transactor and beneficiary information, providing as much detail as possible, including the name and location of any involved domestic and/or international financial institution(s); names, addresses, account numbers, and any other available identifiers of originator and beneficiary transactor(s) and/or third parties or business entities on whose behalf the conductor was acting; the date(s) of the transaction(s); and amount(s) 
Specific details on cash transactions that identify the branch(es) where the transaction(s) occurred, the type of transaction(s), and how the transaction(s) occurred (e.g., night deposit, on-line banking, ATM, etc.); and 

Customer Information (KYC Details) 
Extract client’s full name, DOB, SSN, address, and occupation. 
Identify if there are connections or lack thereof between the client and counterparties. 
Extract and summarize any external research findings regarding the companies involved in transfers.
An explanation of any observed relationships among the transactors (e.g., shared accounts, addresses, employment, known or suspected business relationships and/or frequency of transactions occurring amongst them; appearing together at the institution and/or counter) 

Reasons for Reporting 
Extract and format the specific reasons for the SAR in a numbered list (1), (2), (3), … (n). 

Relationships Between Files 
The following relationships exist between the alert files and their corresponding SAR narratives: 
ExampleInput1.txt → corresponds to ExampleSAR1.txt 
ExampleInput2.txt → corresponds to ExampleSAR2.txt 
ExampleInput3.txt → corresponds to ExampleSAR3.txt 

These files are included in the knowledge base of the RAG, and the agent should use them to understand how an alert transforms into a SAR narrative. 

Expected Output Format 
The SAR narrative should be a continuous, well-structured response with paragraph separation but without section headings. The output should look like the reference documents ExampleSAR. 

Exclusions 
Do NOT include section headings (e.g., "Suspicious Transactions," "Customer Information," or "Reason for Reporting"). 
Do NOT include transactions that were determined to be reasonable.
Do NOT reference supporting documents—only summarize relevant details. 

Stylistic Considerations 
Maintain a formal, structured, and objective tone as seen in example SARs. 
Use precise language without speculation. 
Ensure clarity and readability with smooth transitions and proper paragraph spacing.
"""}
"""

In [57]:
#Calling predefined AWS Bedrock LLM function to run the prompt
dynamic_sar_output = call_llm(final_input)

In [66]:
#Extracting the SAR narrative from the JSON response
if isinstance(dynamic_sar_output, dict) and "generation" in dynamic_sar_output:
    sar_text = dynamic_sar_output["generation"].strip()

    #First removing only the first line if it contains the unwanted phrase
    unwanted_first_line = "Here is the work product resulting from detailed instruction analysis:"
    sar_lines = sar_text.split("\n")  # Convert text into list of lines

    if sar_lines[0].strip() == unwanted_first_line:
        sar_lines.pop(0)  # Remove the first line

    #Joining the remaining lines back into a single text
    sar_text = "\n".join(sar_lines).strip()

    #Removing any extra metadata from the response
    stop_phrases = [
        "Note: the variable \"db_data\" contains the following:",
        "Based on the provided information and the knowledge base, generate a SAR narrative",
        "Using the Alert Narrative and the db_data as a reference",
        "Generate the SAR narrative:",
        "In constructing this SAR narrative, the agent should ensure that details from the alert narrative are accurately captured and translated into the SAR format.",
        "The SAR narrative should maintain objectivity and only include relevant information from the alert narrative and client data.",
        "The final SAR narrative should be constructed based on the provided input, and the agent should verify that all required information is included and presented in a clear and concise manner.",
        "The generated SAR narrative will be reviewed for accuracy, completeness, and adherence to the specified format and stylistic considerations.",
        "It is essential to ensure that the SAR narrative is well-structured, objective, and provides a clear summary of the suspicious activity, customer information, and reasons for reporting.",
        "The SAR narrative should provide sufficient detail to facilitate effective review and analysis by the relevant authorities.",
        "To achieve this, the agent should carefully review the alert narrative and client data, extracting and analyzing the necessary information to construct a comprehensive and accurate SAR narrative.",
        "The final output should be a well-structured and formal SAR narrative that meets the specified requirements and stylistic considerations.",
        "By following these guidelines and ensuring attention to detail, the agent can produce a high-quality SAR narrative that effectively communicates the suspicious activity and supports the reporting requirements.",
    ]
    
    for phrase in stop_phrases:
        if phrase in sar_text:
            sar_text = sar_text.split(phrase)[0]  # Keep only the relevant SAR portion
    
    #Displaying the cleaned SAR narrative
    print("Generated SAR Dynamic Narrative Section:\n")
    print(sar_text.strip())

else:
    print("Error: SAR output could not be processed correctly.")


Generated SAR Dynamic Narrative Section:

The 12 cash deposits made by the customer, John Diamond, between 9/2/2024 and 9/13/2024, totaling $108,000.00, are considered suspicious due to their structured nature and the lack of a legitimate source of funds. The deposits were made in consecutive days, with each deposit amounting to $9,000.00. Additionally, a wire transfer of $105,000.00 was sent to ACME Investment Management in the Cayman Islands on 9/14/2024, which raises concerns due to the customer's lack of apparent connection to the company or the jurisdiction. The transactions involved the customer's account, with the cash deposits being made at various branches and the wire transfer being initiated online.

John Diamond, born on an unknown date, with an SSN of 123-45-6789, resides at an unknown address and is employed in the manufacturing industry. External research findings indicate that ACME Investment Management is a financial services company, but there is no apparent connectio

### Database option - For later

In [None]:
# ----------------- Step 3: Query Database for CIN-Related Data -----------------
def fetch_cin_data(cin):
    """Fetches all database records related to a specific CIN."""
    conn = psycopg2.connect(
        dbname="aml_database",
        user="postgres",
        password="yourpassword",
        host="localhost",
        port="5432"
    )
    cursor = conn.cursor()

    query = """
    SELECT 
        c.customer_id, c.customer_name, c.customer_line_of_business, 
        c.customer_expected_products, c.customer_expected_geographies, 
        a.account_id, a.date_of_opening, a.expected_incoming_activity, 
        a.expected_outgoing_activity, t.transaction_id, t.transaction_date, 
        t.transaction_type, t.amount, t.originator, t.originator_country, 
        t.beneficiary, t.beneficiary_country
    FROM Customer c
    LEFT JOIN Account a ON c.customer_id = a.customer_id
    LEFT JOIN Transaction t ON a.account_id = t.account_id
    WHERE c.customer_id = %s;
    """

    cursor.execute(query, (cin,))
    result = cursor.fetchall()
    conn.close()

    # Format results into a readable text format
    if not result:
        return "No related CIN data found."
    
    formatted_results = []
    for row in result:
        formatted_results.append(f"""
        Customer Name: {row[1]}
        Line of Business: {row[2]}
        Expected Products: {row[3]}
        Expected Geographies: {row[4]}
        Account ID: {row[5]}
        Date of Opening: {row[6]}
        Expected Incoming: {row[7]}
        Expected Outgoing: {row[8]}
        Transaction ID: {row[9]}
        Transaction Date: {row[10]}
        Type: {row[11]}
        Amount: ${row[12]:,.2f}
        Originator: {row[13]} ({row[14]})
        Beneficiary: {row[15]} ({row[16]})
        """)
    
    return "\n".join(formatted_results)

# Fetch CIN-related data
cin_data = fetch_cin_data(cin)
print("\nDatabase Query Results:\n", cin_data)

# ----------------- Step 4: Construct the Final Input for Bedrock -----------------
final_input = f"""
{example_input1_text}

Additional Data from Transaction Database:
{cin_data}

## SAR Output

### Combining all three ouputs into one

In [63]:
#Printing Section 1 
transaction_header = generate_transaction_header(sar_data)
print(transaction_header)
print()

#Printing Section 2
print(sar_text.strip())

#Printing Section 3
legal_contact_information = generate_legal_contact_information2("2025-0001")
print()
print(legal_contact_information)

LLM Bank New York Branch ("LLM NY") is a wholesale branch of LLM Bank Ltd. ("LLM"), a commercial bank located in mainland China. LLM NY is filing this Suspicious Activity Report ("SAR") (Internal SAR Reference Number 2025-0001) to report 13 transactions totaling $213,000.00 and sent between 09/02/2024 and 09/14/2024.

The 12 cash deposits made by the customer, John Diamond, between 9/2/2024 and 9/13/2024, totaling $108,000.00, are considered suspicious due to their structured nature and the lack of a legitimate source of funds. The deposits were made in consecutive days, with each deposit amounting to $9,000.00. Additionally, a wire transfer of $105,000.00 was sent to ACME Investment Management in the Cayman Islands on 9/14/2024, which raises concerns due to the customer's lack of apparent connection to the company or the jurisdiction. The transactions involved the customer's account, with the cash deposits being made at various branches and the wire transfer being initiated online.

J

# To Do:

1. Modify the static formatting to ensure that the number of transactions if below 10 are represented in writing and the number in parenthesis.
2. Review what we want to do with the case number - do we make a table? What is the local process for this? 
3. Make a separate entity for individuals and businesses?
4. Attach database to dynamic part.