# Approach 1 - Using gpt-4o-mini

In [29]:
import os
from dotenv import load_dotenv
load_dotenv()
api_key = os.getenv("OPEN_API_KEY")

In [30]:
llm_config_mini = {"model": "gpt-4o-mini"}
llm_config_large = {"model": "gpt-4o"}

In [31]:
import autogen
config_list = autogen.config_list_from_dotenv(
    dotenv_file_path="../.env",
    model_api_key_map={
        "gpt-4o": "OPENAI_API_KEY", 
        "gpt-4o-mini": "OPENAI_API_KEY"
    },
    filter_dict={"model":["gpt-4o-mini"]}
)

## Phase 1 - Extract Narratives and other relevant information

### Define Agents

In [32]:
from autogen import ConversableAgent

In [33]:
entity_extraction_agent = ConversableAgent(
    name="Entity_Extraction_Agent",
    system_message='''You are an Entity Extraction Agent, an AI Assistant tasked with extracting entitites from SAR Narratives.
      
      Using the SAR narrative shared by the user, do the following:

      Step 1) Extract the main entities described. Entities could be Individuals or Organizations or Financial Institutions. Return this in the form of a dictionary called Entities
              Entities = { "Individuals" : <List of Individuals> , "Organizations": <List of Organizations>, "Financial_Institutions": <List of Financial Instituions >} 

      Step 2) Extract the Account IDs described. Account IDs could be numeric or alphanumeric.If an Account ID is missing, create a Dummy account ID with the prefix "Dummy_Acct_".
              e.g. Dummy_Acct_1, Dummy_Acct_2 and so on. Put this in a list called Account_IDs

      Step 3) Map the extracted Account IDs to Financial institutions where the account is held. If the name of the institution is not specified, create a Dummy institution name e.g Dummy_Bank_1, Dummy_Bank_2 etc.
              Return this in the form of a dictionary  called Acct_to_FI E.g. {<Account_ID> : <Financial Instituion>}

      Step 4) Map the extracted account IDs to Individuals or Organizations who own the account. Return this in the form of a dictionary called Acct_to_Cust E.g. {<Account_ID>: <Entity_Name>}

      

      An example is given below.

      Narrative:

      John deposited $5000 in Cash into Acct #345723 at Bank of America. John sends $3000 to Jill's account at  Chase. Jill deposited $3000 in Cash into her Acct at Chase Bank.John and Jill own a business Acme Inc that has a  Business account, Account #98765 . John sends $2000 from Acct #345723 to Account #98765. Jill sends $1000 from her Acct at Chase Bank to Acct #98765.

      Step 1) Extract the main entities described. Identify the Individuals , Organizations and Financial Instituions mentioned. 
      Individuals and Organizations are entities that usually conduct transactions through Financial institutions. Here, John and Jill are Individuals. Acme Inc is an Organization.
      Financial institutions are institutions such as banks or insurance companies that offer financial services to customers. Hence Bank of America and Chase Bank are Financial institutions.

      Record this as a dictionary named Entities.

      Entities = {"Individuals": ["John", "Jill"],"Organizations":["Acme Inc"],"Financial Institutions":["Chase"," Bank of America"]}}

      Step 2) Extract the account IDs described. Accounts mentioned in the narrative are #345723,#98765. There is also an account at Chase Bank that is missing. Assign a dummy account ID ("Dummy_Acct_1")
      to this account.

      Record this in a list named Account_IDs
      Account_IDs = ["345723","98765","Dummy_Acct_1"]

      Step 3) Map the Account IDs extracted in Step 2 to Financial Instititions where they are held. Account ID #345723 is held at Bank of America. Account ID Dummy_Acct_1 is held at Chase Bank.
      Account ID #98765 is referenced but the Financial instituion where it is held is not specified. So it can be assumed to be held at "Dummy_Bank_1".

      Multiple Accounts could be held at the same Financial Institution.

      Record this in a dictionary named Acct_to_FI.
      Acct_to_FI = {"345723":"Bank of America","Dummy_Acct_1":"Chase Bank", "98765":"Dummy_Bank_1" }

      
      Step 4) Map the extracted Account IDs extracted in Step 2 to Individuals or Organizations that are customers of the Financial Institutions. 
      Account ID #345723 belongs to John. 
      Account ID #98765 belongs to Acme Inc. 
      Account ID Dummy_Acct_1 belongs to Jill.
      

      Record this in a dictionary named Acct_to_Cust

      Acct_to_Cust = {"345723": "John,"Dummy_Acct_1" : "Jill","98765": "Acme Inc"}   
    
    ''',
    llm_config=llm_config_mini,
    code_execution_config=False,
    human_input_mode="NEVER",
)





In [34]:
entity_resolution_agent = ConversableAgent(
    name="entity_resolution_agent",
    system_message='''
    You are an AI Agent tasked with assigning Customer IDs to entities identified in a SAR and mapping Account IDs to these customer IDs 
      You will have the following four pieces of information.

      1) List of Account IDs given by the list Account_IDs. </n>
      2) The mapping  from Account IDs to Entities (Individuals, Organizations and Financial Instituions), given by the dictionary Acct_to_Cust.  </n>
      3) The mapping from Accts to Financial Institutions given by the dictionary Acct_to_FI. 
      4) SAR narrative 

      Using this information, do the following:

      Step 1)  For each Account_ID in in the list , identify the owner of the account from Acct_to_Cust

      Step 2)  For each such account, identify the FI where the account is held from Acct_to_FI.Multiple Account IDs can be held at the same FI. If the Financial institution for certain accounts is not specified, a dummy financial institution e.g. "Dummy_Bank_1" should be used for them.</n> 

      Step 3) For each FI, identify customers that hold accounts at tht FI

      Step 4) Assign  customer ID (E.g. CUST_001, CUST_002) for each unique customer at an FI and map the Account ID to the Customer IDs
      If multiple accounts are owned by the same Individual or Organization at the same FI, map them to the same Customer ID

      Return this final mapping between Customer IDs and Account IDs in the form of a Python Dictionary

      An example is given below, demarcated by the delimiter ----.

      ----

      1) Account_IDs = ["345723","98765","12345","99999","Dummy_Acct_1"]
      
      2) Acct_to_Cust =  {"345723": "John, "99999":"John", "12345":"Jill", "Dummy_Acct_1" : "Jill","98765": "Acme Inc"}

      3) Acct_to_FI =  {"345723":"Bank of America","99999":"Bank of America","12345":"Bank of America","Dummy_Acct_1":"Chase Bank", "98765":"Dummy_Bank_1" }

      4) Narrative: </n>
      John deposited $5000 each in Cash into Acct #345723 and Acct #99999, both of which are at Bank of America. John sends $4000  from Acct #345723 to Jill's account at  Chase. Jill deposited $3000 in Cash into her Acct at Chase Bank and wired $2000 to her Acct #12345 at Bank of America .John and Jill own a business Acme Inc that has a  Business account, Account #98765 . John sends $2000 from Acct #99999 to Account #98765. Jill sends $1000 from her Acct at Chase Bank to Acct #98765.

  
      Step 1) For each Account ID in the list Account_IDs,  identify the owner of the account from Acct_to_Cust. 

      Account #345723 is owned by John. 
      Account #99999 is owned by John
      Account #98765 is owned by Acme Inc.
      Account #12345 is owned by Jill.
      Account #Dummy_Acct_1 is owned by Jill.

      Step 2) For each Account ID, identify the FI where it is held from Acct_to_FI

      Account #345723 is held at Bank of America.
      Account #99999 is held at Bank of America.
      Account #12345 is held at Bank of America
      Account #98765 is held at Dummy_Bank_1.
      Account #Dummy_Acct_1 is held at Chase Bank.
      
      Step 3) For each FI, identify customers that hold accounts at that FI

      There are a total of three distinct FIs in Acct_to_FI: Bank of America, Dummy_Bank, Chase Bank

      From Acct_to_FI and Acct_to_Cust,
      Bank of America has three accounts - #345723, #99999 and #12345. 
      #345723 and #99999 is owned by John and #12345 is owned by Jill. So customers at Bank of America are John and Jill.

      Chase Bank has one account - #Dummy_Acct_1 which is owned by Jill.  So customers at Chase Bank is only Jill

      Dummy_Bank_1 has one account -  #98765 owned by Acme Inc.  So customers at Dummy_Bank_1 is only Acme Inc

      Step 4) Assign  customer ID (E.g. CUST_001, CUST_002) for each unique customer at an FI and map the Account IDs at that FI to the Customer IDs

      At Bank of America, there are two unique customers - John and Jill,  
      So John can be assiged the customer ID CUST_001 and Jill can be assiged the customer ID CUST_002.

      The Account #345723 owned by John can be mapped to CUST_001.
      The Account #99999 owned by John can also be mapped to CUST_001
      The Account #12345 owned by Jill can be mapped to CUST_002

      "Bank of America": {"345723":"CUST_001","99999":"CUST_001","12345":"CUST_002"}

      At Chase Bank , there is only one customer Jill. The account Dummy_Acct_1 at Chase Bank is owned by Jill. 
      So Jill can be assiged the customer ID CUST_003.

      The account Dummy_Acct_1 can be mapped to CUST_003

      "Chase Bank": {"Dummy_Acct_1":"CUST_003"}

      At Dummy_Bank_1, there is only one customer Acme Inc. The account #98765 at Dummy_Bank_1 is owned by Acme Inc.
      So Acme Inc can be assigned the customer ID CUST_004.

      The account #98765 can be mapped to CUST_004
      
      "Dummy_Bank_1": {"98765":"CUST_004"}

      Return this information  as Python Dictionary. Return only the final Python dictionary. Do not include any extra commentary, code fences, or text outside the dictionary.

      {"Bank of America": {"345723":"CUST_001","99999":"CUST_001","12345":"CUST_002"}, 
       "Chase Bank": {"Dummy_Acct_1":"CUST_003"},
       "Dummy_Bank_1": {"98765":"CUST_004"} }
      
      ----


    ''',
    llm_config=llm_config_mini,
    code_execution_config=False,
    human_input_mode="NEVER",
)





In [35]:
narrative_extraction_agent = ConversableAgent(
    name="narrative_extraction_agent",
    system_message='''
    You are an AI Agent tasked with extracting or summarizing  parts of a narrative that describe activity conducted by certain accounts.

      You will have the following four pieces of information.

      1)  SAR narrative

      2)  The mapping between Customers referenced in the narrative to Account IDs which is given by the dictionary Acct_to_Cust

      3)  The mapping between Financial Institutions referenced in the narrative to Account IDs which is given by the dictionary Acct_to_FI

      4)  List of Account IDs given by a list Account_IDs
      
      Using this information, Do the following:

      Step 1) Identify an Account ID from the list of Account IDs.Note that account IDs starting with the prefix "Dummy_" are placeholders for account IDs that have not been explicitly described in the narrative.

      Step 2)  From the dictionary Acct_to_Cust, Identify the customer who owns the account.

      Step 3) From the dictionary Acct_to_FI, Identify the FI where the account is held. Note that FIs starting with the prefix "Dummy_" are placeholders for FIs that have not been explicitly described in the narrative.

      Step 4) Extract or summarize the SAR to produce a narrative describing transactions involving this account. If the narrative describes withdrawals from an account, ensure the deposits and beneficiary accounts are also captured where possible. If the narrative describes deposits into an account, ensure the originators or source of funds are also captured where possible.

      An example is given below, demarcated by the delimiter ----.

      ----

      1) SAR Narrative:
          John deposited $5000 each in Cash into Acct #345723 and Acct #99999, both of which are at Bank of America. John sends $4000  from Acct #345723 to Jill's account at Chase Bank. Jill deposited $3000 in Cash into her Acct at Chase Bank and  then wired $2000 from that account to her Acct #12345 at Bank of America .John and Jill own a business Acme Inc that has a  Business account, Account #98765 . John sends $2000 from Acct #99999 to Account #98765. Jill sends $1000 from her Acct at Chase Bank to Acct #98765 by Wire.
      
      2) Acct_to_Cust =  {"345723": "John, "99999":"John", "12345":"Jill", "Dummy_Acct_1" : "Jill","98765": "Acme Inc"}

      3) Acct_to_FI = {"345723":"Bank of America","99999":"Bank of America","12345":"Bank of America","Dummy_Acct_1":"Chase Bank", "98765":"Dummy_Bank_1" }

      4) Account_IDs = ["345723","98765","12345","99999","Dummy_Acct_1"]

      
      Step 1) The first account ID  is 345723

      Step 2) This Account ID is owned by John.

      Step 3) This Account ID is held at Bank of America

      Step 4) Extract or Summarize  the SAR to produce a narrative that references  Account 345723 capturing both deposits and withdrawals.

      {"345723": "John deposited $5000 each in Cash into Acct #345723 at Bank of America. John sends $4000  from Acct #345723 to Jill's account at  Chase." }

      
      Now repeat the process for the second account ID

      Step 1) The second  account ID  is 98765

      Step 2) This Account ID is owned by Acme, Inc.

      Step 3) The Account ID is held at Dummy_Bank_1. Given this FI starts with Dummy_, there won't be an expplicit reference to this FI in the SAR narrative.

      Step 4) Extract or Summarize  the SAR to produce a narrative that references the Account 98765 capturing both deposits and withdrawals.

      {"98765": " John sends $2000 from Acct #99999 to Account #98765." }

      Now repeat the process for the third account ID

      Step 1) The third account ID  is 12345

      Step 2) This Account ID is owned by Jill.

      Step 3) This account is held at Bank of America

      Step 4) Extract the part of the narrative that references the Account 12345 capturing both deposits and withdrawals.

      {"12345": "Jill deposited $3000 in Cash into her Acct at Chase Bank and then wired $2000 to her Acct #12345 at Bank of America" }

      Now repeat the process for the fourth account ID

      Step 1) The fourth account ID  is 99999

      Step 2) This Account ID is owned by John.

      Step 3) This Account ID is held at Bank of America

      Step 4) Extract or Summarize  the SAR to produce a narrative  that references the Account 99999 capturing both deposits and withdrawals.

      {"99999": "John sends $2000 from Acct #99999 to Account #98765" }


      Now repeat the process for the fifth Account ID.

      Step 1) The fifth account ID  is Dummy_Acct_1. Given the accunt ID starts with Dummy_ , there won't be direct references to this account ID in the narrative. 

      Step 2) This Account ID is owned by Jill. Given the Account ID is dummy, pay attention to transactions made by Jill,the owner of the dummy account.

      Step 3) The Account ID is held at Chase Bank. Pay attention to transactions involvinng Chase Bank

      Step 4) Extract or Summarize  the SAR to produce a narrative that references transactions made by Jill from the Dummy_Acct_1. Include both deposits and withdrawals where possible.

      {"Dummy_Acct_1": "John sends $4000  from Acct #345723 to Jill's account at  Chase Bank. Jill deposited $3000 in Cash into her Acct at Chase Bank and  then  wired $2000 from that account to her Acct #12345 at Bank of America. Jill sends $1000 from her Acct at Chase Bank to Acct #98765 by Wire." }

      Consolidate narratives for all accounts. Return this information  as Python Dictionary. Return only the final Python dictionary. Do not include any extra commentary, code fences, or text outside the dictionary.

      Narratives = {"345723": "John deposited $5000 each in Cash into Acct #345723 at Bank of America. John sends $4000  from Acct #345723 to Jill's account at  Chase.",
                   "98765": "John sends $2000 from Acct #99999 to Account #98765.",
                   "12345": "Jill deposited $3000 in Cash into her Acct at Chase Bank and then wired $2000 to her Acct #12345 at Bank of America",
                   "99999": "John sends $2000 from Acct #99999 to Account #98765" ,
                   "Dummy_Acct_1": "John sends $4000  from Acct #345723 to Jill's account at  Chase Bank. Jill deposited $3000 in Cash into her Acct at Chase Bank and  then  wired $2000 from that account to her Acct #12345 at Bank of America. Jill sends $1000 from her Acct at Chase Bank to Acct #98765 by Wire."
                        }


      ----



    ''',
    llm_config=llm_config_large,
    code_execution_config=False,
    human_input_mode="NEVER",
)





In [36]:
sar_agent = ConversableAgent(
    name="SAR_Agent",
    system_message="You are assigned a SAR from which to extract transactions. You do this by sharing this SAR with other assistant agents who "
                   "work on the SAR and extract useful information.      ",
    llm_config=llm_config_mini,
    human_input_mode="NEVER",
)





Read in training sar files

In [42]:
import os
train_sars = []
data_dir = "../data/input"

for filename in os.listdir(data_dir):
        if 'train' in filename  and filename.endswith('.txt') :
            file_path = os.path.join(data_dir, filename)
            with open(file_path,'r') as file:
                content = file.read()
                train_sars.append(content)

In [44]:
len(train_sars)

4

In [45]:
message = train_sars[0]

In [52]:
ee_agent_summary_prompt = '''
 Return the extracted entity information including Dummy Account IDs and Dummy Entities as a Python dictionary only.Do not include any extra commentary, code fences, or text outside the dictionary.
        {
          "Entities": {
            "Individuals": <List of Individuals>,
            "Organizations": <List of Organizations>,
            "Financial_Institutions": <List of Financial Institutions>
                      },
          "Account_IDs": <List of Account IDs>,
          "Acct_to_FI": {
            "<Acct_ID_1> : "<Financial_Institution_1>",
            "<Acct_ID_2>" : "<Financial_Institution_2>",
            "<Acct_ID_3>" : "<Financial_Institution_3>"
          },
          "Acct_to_Cust": {
            "<Acct_ID_1>": "<Individual_1>",
            "<Acct_ID_2>": "<Individual_2>",
            "<Acct_ID_3>": "<Organization_1>"
          }
        }
'''

In [55]:
er_agent_summary_prompt = '''
                    Return the extracted information as a Python dictionary only. Do not include any extra commentary, code fences, or text outside the dictionary.
                  { "FI_to_Acct_to_Cust" :{<Financial Institution 1>: { <ACCT_1> : <CUST_ID_1> , <ACCT_2> : <CUST_ID_2>, <ACCT_3> : <CUST_ID_3> }
                                           <Financial Institution 2>: { <ACCT_4> : <CUST_ID_4> , <ACCT_5> : <CUST_ID_5>} } }
                          '''

In [56]:
ne_agent_summary_prompt = ''' Return the extracted  excerpts from the SAR narrative as JSON object only: 
                                    'Narrative': {<Acct_1>: <Excerpt relevant to Acct_1>,
                                                  <Acct_2>: <Excerpt relevant to Acct_2>,
                                                  <Acct_3>: <Excerpt relevant to Acct_3> } 
                            '''

Combine to create a sequential chat

In [57]:
chat_results = sar_agent.initiate_chats(
      [
        {
            "recipient": entity_extraction_agent,
            "message": message,
            "max_turns": 1,
            "summary_method": "reflection_with_llm",
            "summary_args": {
                "summary_prompt" : ee_agent_summary_prompt
            },
        },
        {
            "recipient": entity_resolution_agent,
            "message": message,
            "max_turns": 1,
            "summary_method": "reflection_with_llm",
            "summary_args": {
                "summary_prompt" : er_agent_summary_prompt
                                },
        },
        {
            "recipient": narrative_extraction_agent,
            "message": message,
            "max_turns": 1,
            "summary_method": "reflection_with_llm",
            "summary_args": {
                "summary_prompt" : ne_agent_summary_prompt
                                },
        },

    ]


)

[34m
********************************************************************************[0m
[34mStarting a new chat....[0m
[34m
********************************************************************************[0m
[33mSAR_Agent[0m (to Entity_Extraction_Agent):

Investigation case number: A5678910. The customer, a grocery store and  its owner, are suspected of intentionally structuring cash deposits to  circumvent federal reporting requirements.  The customer is also  engaged in activity indicative of an informal value transfer operation: deposits of bulk cash, third party out of state personal checks and money 
orders, and engaging in aggregate wire transfers to Dubai, UAE.  The type and volume of activity observed is non-commensurate with the customer’s expected business volume and deviates from the normal volume of similar types of businesses located in the same area as the customer.  Investigative activities are continuing. Our bank has elected to 
directly contact law enforcemen

In [58]:
for chat_result in chat_results:
    print(chat_result.summary)
    print("\n")

{
  "Entities": {
    "Individuals": ["John Doe"],
    "Organizations": ["Acme, Inc.", "Kulkutta Building Supply Company"],
    "Financial_Institutions": ["Bank of Anan"]
  },
  "Account_IDs": ["12345-6789", "23456-7891", "3489728"],
  "Acct_to_FI": {
    "12345-6789": "Dummy_Bank_1",
    "23456-7891": "Dummy_Bank_1",
    "3489728": "Bank of Anan"
  },
  "Acct_to_Cust": {
    "12345-6789": "John Doe",
    "23456-7891": "Acme, Inc.",
    "3489728": "Kulkutta Building Supply Company"
  }
}


{
  "FI_to_Acct_to_Cust": {
    "Dummy_Bank_1": {
      "12345-6789": "CUST_001",
      "23456-7891": "CUST_002"
    },
    "Bank of Anan": {
      "3489728": "CUST_003"
    }
  }
}


{
  "Narrative": {
    "12345-6789": "John Doe opened a personal checking account, #12345-6789, in March of 1994. Between January 2 and March 17, 2003, 13 deposits totaling approximately $50,000 were posted to the account, consisting of cash, checks, and money orders, with amounts ranging from $1,500 to $9,500. Third-pa

Extract results for consumption by trxn generation model

In [61]:
results0 = chat_results[0].summary
results1 = chat_results[1].summary 
results2 = chat_results[2].summary

In [66]:
import ast
results0_dict = ast.literal_eval(results0)
results1_dict = ast.literal_eval(results1)
results2_dict = ast.literal_eval(results2)

In [68]:
results0_dict, results1_dict,results2_dict

({'Entities': {'Individuals': ['John Doe'],
   'Organizations': ['Acme, Inc.', 'Kulkutta Building Supply Company'],
   'Financial_Institutions': ['Bank of Anan']},
  'Account_IDs': ['12345-6789', '23456-7891', '3489728'],
  'Acct_to_FI': {'12345-6789': 'Dummy_Bank_1',
   '23456-7891': 'Dummy_Bank_1',
   '3489728': 'Bank of Anan'},
  'Acct_to_Cust': {'12345-6789': 'John Doe',
   '23456-7891': 'Acme, Inc.',
   '3489728': 'Kulkutta Building Supply Company'}},
 {'FI_to_Acct_to_Cust': {'Dummy_Bank_1': {'12345-6789': 'CUST_001',
    '23456-7891': 'CUST_002'},
   'Bank of Anan': {'3489728': 'CUST_003'}}},
 {'Narrative': {'12345-6789': 'John Doe opened a personal checking account, #12345-6789, in March of 1994. Between January 2 and March 17, 2003, 13 deposits totaling approximately $50,000 were posted to the account, consisting of cash, checks, and money orders, with amounts ranging from $1,500 to $9,500. Third-party out of state checks and money orders were also deposited. Between January 17

In [91]:
import copy
results2_dict_ = copy.copy(results2_dict)

Retain only narrative for account 3489728 for simplicity.

In [92]:
keys_to_keep = {'3489728'}
results2_dict_new = {k: v for k, v in results2_dict["Narrative"].items() if k in keys_to_keep}
print(results2_dict_new)  # {'a': 1, 'c': 3}

{'3489728': "Nine wire transfers totaling $225,000 were sent from John Doe's personal account #12345-6789 at Dummy_Bank_1 to Kulkutta Building Supply Company, account #3489728 at the Bank of Anan in Dubai, UAE, between January 17, 2003, and March 21, 2003."}


In [93]:
results2_dict_["Narrative"] = results2_dict_new
results2_dict_

{'Narrative': {'3489728': "Nine wire transfers totaling $225,000 were sent from John Doe's personal account #12345-6789 at Dummy_Bank_1 to Kulkutta Building Supply Company, account #3489728 at the Bank of Anan in Dubai, UAE, between January 17, 2003, and March 21, 2003."}}

In [94]:
combined_dict = {**results0_dict,**results1_dict,**results2_dict_}
combined_dict

{'Entities': {'Individuals': ['John Doe'],
  'Organizations': ['Acme, Inc.', 'Kulkutta Building Supply Company'],
  'Financial_Institutions': ['Bank of Anan']},
 'Account_IDs': ['12345-6789', '23456-7891', '3489728'],
 'Acct_to_FI': {'12345-6789': 'Dummy_Bank_1',
  '23456-7891': 'Dummy_Bank_1',
  '3489728': 'Bank of Anan'},
 'Acct_to_Cust': {'12345-6789': 'John Doe',
  '23456-7891': 'Acme, Inc.',
  '3489728': 'Kulkutta Building Supply Company'},
 'FI_to_Acct_to_Cust': {'Dummy_Bank_1': {'12345-6789': 'CUST_001',
   '23456-7891': 'CUST_002'},
  'Bank of Anan': {'3489728': 'CUST_003'}},
 'Narrative': {'3489728': "Nine wire transfers totaling $225,000 were sent from John Doe's personal account #12345-6789 at Dummy_Bank_1 to Kulkutta Building Supply Company, account #3489728 at the Bank of Anan in Dubai, UAE, between January 17, 2003, and March 21, 2003."}}

Convert back to a string

In [95]:
import json
trxn_extraction_inputs = json.dumps(combined_dict,indent=2)
trxn_extraction_inputs

'{\n  "Entities": {\n    "Individuals": [\n      "John Doe"\n    ],\n    "Organizations": [\n      "Acme, Inc.",\n      "Kulkutta Building Supply Company"\n    ],\n    "Financial_Institutions": [\n      "Bank of Anan"\n    ]\n  },\n  "Account_IDs": [\n    "12345-6789",\n    "23456-7891",\n    "3489728"\n  ],\n  "Acct_to_FI": {\n    "12345-6789": "Dummy_Bank_1",\n    "23456-7891": "Dummy_Bank_1",\n    "3489728": "Bank of Anan"\n  },\n  "Acct_to_Cust": {\n    "12345-6789": "John Doe",\n    "23456-7891": "Acme, Inc.",\n    "3489728": "Kulkutta Building Supply Company"\n  },\n  "FI_to_Acct_to_Cust": {\n    "Dummy_Bank_1": {\n      "12345-6789": "CUST_001",\n      "23456-7891": "CUST_002"\n    },\n    "Bank of Anan": {\n      "3489728": "CUST_003"\n    }\n  },\n  "Narrative": {\n    "3489728": "Nine wire transfers totaling $225,000 were sent from John Doe\'s personal account #12345-6789 at Dummy_Bank_1 to Kulkutta Building Supply Company, account #3489728 at the Bank of Anan in Dubai, UAE, 

In [96]:
print(trxn_extraction_inputs)

{
  "Entities": {
    "Individuals": [
      "John Doe"
    ],
    "Organizations": [
      "Acme, Inc.",
      "Kulkutta Building Supply Company"
    ],
    "Financial_Institutions": [
      "Bank of Anan"
    ]
  },
  "Account_IDs": [
    "12345-6789",
    "23456-7891",
    "3489728"
  ],
  "Acct_to_FI": {
    "12345-6789": "Dummy_Bank_1",
    "23456-7891": "Dummy_Bank_1",
    "3489728": "Bank of Anan"
  },
  "Acct_to_Cust": {
    "12345-6789": "John Doe",
    "23456-7891": "Acme, Inc.",
    "3489728": "Kulkutta Building Supply Company"
  },
  "FI_to_Acct_to_Cust": {
    "Dummy_Bank_1": {
      "12345-6789": "CUST_001",
      "23456-7891": "CUST_002"
    },
    "Bank of Anan": {
      "3489728": "CUST_003"
    }
  },
  "Narrative": {
    "3489728": "Nine wire transfers totaling $225,000 were sent from John Doe's personal account #12345-6789 at Dummy_Bank_1 to Kulkutta Building Supply Company, account #3489728 at the Bank of Anan in Dubai, UAE, between January 17, 2003, and March 21, 

Construct the prompt to pass to the trxn generation agent

# Phase 2 - Convert Narratives to Trxns

Agent that synthesizes transactions when there are only a few trxns to synthesize.

In [75]:
trxn_generation_agent = ConversableAgent(
    name="trxn_generation_agent",
    system_message='''
    You are an AI Agent tasked with synthesizing transactions from a narrative you are provided. Your strength is synthesizing a small number of transactions, especially when 
    transaction amounts and dates are explicitly listed.

    You will have the following four pieces of information.

    
    1) A JSON called Narrative with Account IDs as key  and  narratives potentially describing transactions made by that account as the values. </n>
    2) The mapping between Individuals and Organizations referenced in the narrative to Account IDs, given by the dictionary Customers_to_Accts. Note, account IDs starting with the prefix "Dummy_" 
    are placeholders for account IDs that have not been identified in the narrative. </n>
    3) The mapping between Financial Institutions to Account IDs given by the dictionary FIs_to_Accts. If the Financial institution for certain accounts are not specified in the narrative,
      a dummy financial institution "Dummy_Bank" will be used for them.</n>
    4) A mapping between Financial institution, Customer IDs and Accounts owned by those Customer IDs given by the dictionary FI_to_Cust_to_Acct
    
    For the narrative corresponding  to each account ID, do the following:

    Step 1) Identify the number of transactions described in the narrative. Note this in a scratch pad for verification later.

    
    Step 2)For each transaction involving the account ID,  

    a)  Identify the transaction channel or product used. This could be Wire, Cash, Checks or something similar. If this is missing, you can make a reasonable assumption.

    b) Identify the Originator Account ID, the Customer ID corresponding to the account, the Originating_Name which is the Individual Or Organization originating
    the transaction.

    c)  Identify the Beneficiary Account ID, the Customer ID corresponding to the account, the beneficiary_Name which is the Individual Or Organization which is the 
    Beneficiary of the transaction. Note that for cash transactions, if only information on the originator or benficary are available, you can assume the originator and beneficiary are the same.

    d) Identify the date and amount of the transaction. If this is missing, you can make reasonable assumptions.

    e) If this is a Cash or Check or similar transaction, determine the Branch or ATM Location where the transaction was conducted. If this is not specified, indicate it as "Missing"

    Ensure the following are extracted.

    -Originator_Name
    -Originator_Account_ID
    -Originator_Customer_ID
    -Beneficiary_Name
    -Beneficiary_Account_ID
    -Beneficiary_Customer_ID
    -Trxn_Channel
    -Trxn_Date in yyyy-mm-dd format
    -Trxn_Amount
    -Branch or ATM Location

    If any information is missing and can't be reasonably assumed, use an empty string "" to indicate it is missing.

    Step 3) Ensure the number of transactions extracted in Step 2 are the same as that noted in Step 1. if this is not the case, go back to Step 2.

    Step 4) Return the transactions as a JSON file with the key being a Trxn ID and value being the transaction attributes

    An example is given below, demarcated by the delimiter ----..

    ----

    Narrative = {"345723": "John deposited $5000 in Cash into Acct #345723 at the Main Road, NY Branch of Bank of America on Jan 4, 2024.
                  John sends $3000 to Acme Inc's account at Bank of America by Wire on Jan 6, 2024 . John  wrote a check to Jill from Acct #345723 on Jan 8,2024 for $1,000"}

                  Customers_to_Accts = {"John":["345723"],"Jill":["Dummy_001"],"Acme Inc":"98765"}
                  FIs_to_Accts = {"Bank of America":["345723","98765"],"Chase Bank":["Dummy_001"]}
                  FI_to_Cust_to_Acct = {"Bank of America": {"CUST_001": ["345723"], "CUST_002":["98765"]}, "Chase Bank": {"CUST_003":["Dummy_001"]}}

    
    Step 1) Identify the number of transactions described in the narrative. Note this is a scratch pad for verification later.
      There are three trxns described in the narrative. A $5000 deposit, A $3000 transfer and a $3000 transfer.

   Step 2) For each transaction, identify the required attributes.

   For the first transaction, the Beneficiary_Name is John as the money is being deposited into John's account. The Beneficiary_Account_ID is #345723. From Bank_to_Cust_to_Acct, 
   The Beneficiary Customer ID is CUST_001.

   The Originator_Name is John, but this is a cash deposit , so there is no Account ID specified from which the cash originates.
   Given this is a cash deposit and information on the Originator is unavailable, it can be assumed to be the same the Beneficary Information.

   The Trxn Channel is Cash as it is a cash deposit.
   The Trxn Date is  2024-01-04.
   The Trxn Amount is $5,000.
   The Branch was Main Road , NY

   For the second transaction, the Originator_Name is John. Although no account ID is specified, it is a wire transaction following a cash transaction, 
   so it has to come from John's account #345723.The Originator_Account_ID is #345723. The Originator Customer ID is CUST_001 as per Bank_to_Cust_to_Acct.

   The Beneficiary_Name is Acme Inc. The Beneficiary_Account_ID is #98765 as this account belongs to Acme Inc at Bank of America.
   From Bank_to_Cust_to_Acct, The Beneficiary Customer ID is CUST_002.


    The Trxn Channel is Wire.
    The Trxn Date is  2024-01-06.
    The Trxn Amount is $3,000.
    The Branch is Missing as it is a wire transaction.

    For the third transaction, the Originator_Name is John. The Originator_Account_ID is 345723 . The Originator Customer ID is CUST_001 as per Bank_to_Cust_to_Acct.

    The Beneficiary_Name is Jill. The Beneficiary_Account_ID must be #Dummy_001 as the information provided indicated that this is Jill's account at Chase Bank.
    From Bank_to_Cust_to_Acct, The Beneficiary Customer ID is CUST_003.


    The Trxn Channel is Check
    The Trxn Date is  2024-01-08.
    The Trxn Amount is $1,000.
    The Branch is Missing as it is a Check.

    Step 3) Ensure the number of transactions extracted in Step 2 are the same as that noted in Step 1. if this is not the case, go back to Step 2.

    Three transactions have been identified, which is the same as the number of transaction in scratch pad from Step 1)

    Step 4) Return the transactions as a JSON file with the key being a Trxn ID and value being the transaction attributes

    Now the transactions can be combined into a JSON file with a key being a Trxn ID staring from 1.

    Trxns = {"345723": {1 : {"Originator_Name": "John", "Originator_Account_ID": "", "Originator_Customer_ID":"","Beneficiary_Name": "John","Beneficiary_Account_ID":"345723",
              "Beneficiary_Customer_ID":"CUST_001" ,"Trxn_Channel":"Cash", "Trxn_Date":"2024-01-04", "Trxn_Amount":5000, "Branch or ATM Location": "Main Road,NY"
            },
              2: {"Originator_Name": "John", "Originator_Account_ID": "345723", "Originator_Customer_ID":"CUST_001","Beneficiary_Name": "Acme,Inc","Beneficiary_Account_ID":"98765",
              "Beneficiary_Customer_ID":"CUST_002" ,"Trxn_Channel":"Wire", "Trxn_Date":"2024-01-06", "Trxn_Amount":3000, "Branch or ATM Location": ""
            },
            3: {"Originator_Name": "John", "Originator_Account_ID": "345723", "Originator_Customer_ID":"CUST_001","Beneficiary_Name": "Jill","Beneficiary_Account_ID":"Dummy_001",
              "Beneficiary_Customer_ID":"CUST_003" ,"Trxn_Channel":"Check", "Trxn_Date":"2024-01-08", "Trxn_Amount":1000, "Branch or ATM Location": ""

        }
    }

    ----



    ''',
    llm_config=llm_config_mini,
    code_execution_config=False,
    human_input_mode="NEVER",
)





Create a tool to help synthesize transactions

In [76]:
from typing import List, Literal
from typing_extensions import Annotated

import numpy as np
from datetime import datetime, timedelta
import random

Channels_allowed = Literal["Wire","Cash","Check"]
def generate_transactions(
        Originator_Name:Annotated[str, "Entity or Customer originating the transactions"],
        Originator_Account_ID:Annotated[str, "Account  of Entity or Customer originating the transactions"],
        Originator_Customer_ID:Annotated[str, "Customer ID of Entity or Customer originating the transactions"],
        Beneficiary_Name:Annotated[str, "Customer ID of Entity or Customer  receiving the transactions"], 
        Beneficiary_Account_ID:Annotated[str, "Account of Entity or Customer  receiving the transactions"],
        Beneficiary_Customer_ID:Annotated[str, "Customer ID of Entity or Customer receiving the transactions"],
        Trxn_Channel:Annotated[List[Channels_allowed], "Transaction Channels used to make the transactions."],
        Start_Date:Annotated[str, "Date on which the first transaction was made"], 
        End_Date:Annotated[str, "Date on which the last transaction was made"],
        Min_Ind_Trxn_Amt:Annotated[float, "The smallest transaction amount"],
        Max_Ind_Trxn_Amt:Annotated[float, "The largest transaction amount"],
        Branch_or_ATM_Location:Annotated[str, "The location where transaction was originated or received"],
        N_transactions:Annotated[int, "The number of transactions made between the Originator and Beneficary"]) -> dict:
    
    Start_Date = datetime.strptime(Start_Date,"%Y-%m-%d")
    End_Date = datetime.strptime(End_Date,"%Y-%m-%d")
    trxns = {} #Dictionary to hold transactions
    trxn_channels = random.choices(Trxn_Channel, k = N_transactions)
    
    sample_deltas  =  random.choices(range((End_Date - Start_Date).days),k = N_transactions) #Get random number of days to be added to get new dates
    trxn_dates = [   Start_Date + timedelta(delta) for delta in sample_deltas] # TO DO: Add start and end date to the list
    #Convert back to string
    trxn_dates = [trxn_date.strftime("%Y-%m-%d") for trxn_date in trxn_dates]
    trxn_amounts = np.round(np.random.uniform(low=Min_Ind_Trxn_Amt,high=Max_Ind_Trxn_Amt,size = N_transactions),2)

    for i in range(N_transactions):
        trxns[f"Trxn_{i+1}"] = {"Originator_Name": Originator_Name , "Originator_Account_ID": Originator_Account_ID,"Originator_Customer_ID": Originator_Customer_ID,
                            "Beneficiary_Name": Beneficiary_Name, "Beneficiary_Account_ID": Beneficiary_Account_ID,"Beneficiary_Customer_ID": Beneficiary_Customer_ID,
                             "Trxn_Channel": trxn_channels[i], "Trxn_Date": trxn_dates[i], "Trxn_Amount":trxn_amounts[i],
                              "Branch_or_ATM_Location": Branch_or_ATM_Location }

    return trxns

In [77]:
from autogen.agentchat.contrib.gpt_assistant_agent import GPTAssistantAgent
from autogen.function_utils import get_function_schema

#Assistant API Tool Schema for Trxn Generation
generate_transactions_schema = get_function_schema(
    generate_transactions,
    name = "generate_transactions",
    description = " A function for generating transactions when a large number of transactions have to be synthesizes"

)

Create an agent that can use this tool.

In [78]:
trxn_generation_agent_gpt = GPTAssistantAgent(
    name = "trxn_generation_agent_gpt",
    instructions = """
    You are an AI Agent tasked with synthesizing transactions from a narrative you are provided. Your strength is synthesizing transactions when a larger number of transactions
    are made between two entities  when a range of trxn amounts and dates are specified. When invoked, use the provided function to generate transactions.

    You will have the following four pieces of information.

    
    1) A JSON called Narrative with Account IDs as key  and  narratives potentially describing transactions made by that account as the values. </n>
    2) The mapping between Individuals and Organizations referenced in the narrative to Account IDs, given by the dictionary Customers_to_Accts. Note thataccount IDs starting with the prefix "Dummy_" 
    are placeholders for account IDs that have not been identified in the narrative. </n>
    3) The mapping between Financial Institutions to Account IDs given by the dictionary FIs_to_Accts. If the Financial institution for certain accounts are not specified in the narrative,
      a dummy financial institution "Dummy_Bank" will be used for them.</n>
    4) A mapping between Financial institution, Customer IDs and Accounts owned by those Customer IDs given by the dictionary FI_to_Cust_to_Acct
    
    For the narrative corresponding  to each account ID, do the following:

    Step 1) Identify the number of transactions described in the narrative. Note this in a scratch pad for verification later.

    
    Step 2) Identify the transaction channels or product used. This could be Wire, Cash, Checks or something similar. If this is missing, you can make a reasonable assumption.

    Step 3) Identify the Beneficiary Account ID, the Customer ID corresponding to the account, the beneficiary_Name which is the Individual Or Organization which is the 
    Beneficiary of the transaction. Note that for cash transactions, if only information on the originator or benficary are available, you can assume the originator and beneficiary are the same.

    Step 4) Identify the Originator Account ID, the Customer ID corresponding to the account, the Originating_Name which is the Individual Or Organization originating
    the transaction.

    Step 5) Identify the first and last dates the transactions. If this is missing, you can make reasonable assumptions.

    Step 6) Identify the minimum and maximum amount of the transactions.

    Step 7) If this is a Cash or Check or similar transaction, determine the Branches or ATM Locations where the transaction was conducted. If this is not specified, indicate it as "Missing"

    Ensure the following are extracted.

    -Originator_Name
    -Originator_Account_ID
    -Originator_Customer_ID
    -Beneficiary_Name
    -Beneficiary_Account_ID
    -Beneficiary_Customer_ID
    -Trxn_Channel or List of Trxn Channels
    -First Trxn_Date and Last Trxn Date in yyyy-mm-dd format
    -Min Trxn_Amount and Max Trxn_Amount
    -Branch or ATM Location or List of such locations

    If any information is missing and can't be reasonably assumed, use an empty string "" to indicate it is missing.

    Step 8) Use the function `generate_transactions` that returns a dictionary with the specified number of transactions and attributes.

    An example is given below, demarcated by the delimiter ----..

    ----

    Narrative = {"345723": "John transferred a total of $100,000 from Acct #345723 at Bank of America  to Jill between Jan 1, 2024 and July 4,2024.
                             There were 10 trns, each transactions was $10,000"}

                  Customers_to_Accts = {"John":["345723"],"Jill":["Dummy_001"],"Acme Inc":"98765"}
                  FIs_to_Accts = {"Bank of America":["345723","98765"],"Chase Bank":["Dummy_001"]}
                  FI_to_Cust_to_Acct = {"Bank of America": {"CUST_001": ["345723"], "CUST_002":["98765"]}, "Chase Bank": {"CUST_003":["Dummy_001"]}}

    
   Step 1) Identify the number of transactions described in the narrative. Note this is a scratch pad for verification later.
      There are three trxns described in the narrative. A $5000 deposit, A $3000 transfer and a $1000 transfer.

   Step 2) Identify the transaction channels or product used.
      The first $5000 deposit is Cash. The second $3000 transfer is a Wire. The third $1,000 tranfer is a check.


   Step 3) The Beneficary of the trxns is Jill.The Beneficary_Name is Jill. From Customers_to_Accts, it is noted that Jill's account is Dummy_001. From FIs_to_Accts, this account is at Chase Bank.
           From FI_to_Cust_to_Acct, the Customer ID corresponding to this account is CUST_003
   

   Step 4) The Originator of the trxns is John. rom the narrative, the Originator name is John and the originating account is 345723. From FIs_to_Accts, this account is at Bank of America.
           From FI_to_Cust_to_Acct, the Customer ID corresponding to this account is CUST_001
           
   Step 5) The first transaction was made on Jan 1,2024 and last transaction was made on July 4, 2024.

   Step 6) The mininmum and maximum transaction amount is $10,000

   Step 7) Since the transactions are Wire transactions, Branch or ATM Location can be considered Missing. All the required attributes have been gathered.

   Step 8) Call the function generatr_transactions with the collected arguments. Return results as JSON only.

    ----


    """,
    overwrite_instructions=True,
    overwrite_tools=True,
    llm_config= {
        "config_list":config_list,
        "tools":[generate_transactions_schema]

    }
)









    You are an AI Agent tasked with synthesizing transactions from a narrative you are provided. Your strength is synthesizing transactions when a larger number of transactions
    are made between two entities  when a range of trxn amounts and dates are specified. When invoked, use the provided function to generate transactions.

    You will have the following four pieces of information.

    
    1) A JSON called Narrative with Account IDs as key  and  narratives potentially describing transactions made by that account as the values. </n>
    2) The mapping between Individuals and Organizations referenced in the narrative to Account IDs, given by the dictionary Customers_to_Accts. Note thataccount IDs starting with the prefix "Dummy_" 
    are placeholders for account IDs that have not been identified in the narrative. </n>
    3) The mapping between Financial Institutions to Account IDs given by the dictionary FIs_to_Accts. If the Financial institution for certain accounts are not 

In [79]:
trxn_generation_agent_gpt.register_function(
    function_map={
        "generate_transactions":generate_transactions
    }
)

In [80]:
sar_agent = autogen.UserProxyAgent(
    name="sar_agent",
    system_message="Share the SAR extract given so that trxns can be extracted",
    code_execution_config={
        "last_n_messages": 2,
        "work_dir": "temp",
        "use_docker": False,
    },  # Please set use_docker=True if docker is available to run the generated code. Using docker is safer than running the generated code directly.
    human_input_mode="ALWAYS",
)

In [81]:
groupchat = autogen.GroupChat(agents = [sar_agent,trxn_generation_agent, trxn_generation_agent_gpt],messages=[],max_round=2)
manager = autogen.GroupChatManager(groupchat=groupchat, llm_config = llm_config_mini)





In [106]:
chat_results = sar_agent.initiate_chat(
    manager,
    message = trxn_extraction_inputs,
    summary_method="reflection_with_llm",
    summary_args= {
        "summary_prompt": '''
                            Return the synthesized transactions in the following format as a Python Dictionary only.
                                 { <Acct_ID> : {<Trxn_ID> : 
                                                    {"Originator_Name": <Originator_Name>,
                                                     "Originator_Account_ID": <Originator_Account_ID>, 
                                                     "Originator_Customer_ID": <Originator_Customer_ID>,
                                                     "Beneficiary_Name": <Beneficiary_Name>,
                                                     "Beneficiary_Account_ID":<Beneficiary_Account_ID>, 
                                                     "Beneficiary_Customer_ID": <Beneficiary_Customer_ID> ,
                                                     "Trxn_Channel": <Trxn_Channel>, 
                                                     "Trxn_Date":<Trxn_Date>, 
                                                     "Trxn_Amount":<Trxn_Amount>, 
                                                     "Branch_or_ATM_Location": <Branch_or_ATM_Location> }
                             


                          '''
                   }
    )


[33msar_agent[0m (to chat_manager):

{
  "Entities": {
    "Individuals": [
      "John Doe"
    ],
    "Organizations": [
      "Acme, Inc.",
      "Kulkutta Building Supply Company"
    ],
    "Financial_Institutions": [
      "Bank of Anan"
    ]
  },
  "Account_IDs": [
    "12345-6789",
    "23456-7891",
    "3489728"
  ],
  "Acct_to_FI": {
    "12345-6789": "Dummy_Bank_1",
    "23456-7891": "Dummy_Bank_1",
    "3489728": "Bank of Anan"
  },
  "Acct_to_Cust": {
    "12345-6789": "John Doe",
    "23456-7891": "Acme, Inc.",
    "3489728": "Kulkutta Building Supply Company"
  },
  "FI_to_Acct_to_Cust": {
    "Dummy_Bank_1": {
      "12345-6789": "CUST_001",
      "23456-7891": "CUST_002"
    },
    "Bank of Anan": {
      "3489728": "CUST_003"
    }
  },
  "Narrative": {
    "3489728": "Nine wire transfers totaling $225,000 were sent from John Doe's personal account #12345-6789 at Dummy_Bank_1 to Kulkutta Building Supply Company, account #3489728 at the Bank of Anan in Dubai, UAE, b



[32m
Next speaker: trxn_generation_agent
[0m
[33mtrxn_generation_agent[0m (to chat_manager):

Step 1) Identify the number of transactions described in the narrative. There are nine wire transactions totaling $225,000.

Step 2) For each transaction, identify the required attributes.

Given that the narrative describes multiple transactions collectively, I will divide the total amount among the nine wire transfers equally. This gives:

- Transaction amount per wire transfer = $225,000 / 9 = $25,000.

The details for each transaction can be derived as follows:

1. **Originator_Name**: John Doe
2. **Originator_Account_ID**: 12345-6789
3. **Originator_Customer_ID**: CUST_001 (from FI_to_Acct_to_Cust mapping)
4. **Beneficiary_Name**: Kulkutta Building Supply Company
5. **Beneficiary_Account_ID**: 3489728
6. **Beneficiary_Customer_ID**: CUST_003 (from FI_to_Acct_to_Cust mapping)
7. **Trxn_Channel**: Wire (as the narrative specifies wire transfers)
8. **Trxn_Date**: The transactions occurr

In [107]:
chat_results.summary

'```python\n{\n    "3489728": {\n        "1": {\n            "Originator_Name": "John Doe",\n            "Originator_Account_ID": "12345-6789",\n            "Originator_Customer_ID": "CUST_001",\n            "Beneficiary_Name": "Kulkutta Building Supply Company",\n            "Beneficiary_Account_ID": "3489728",\n            "Beneficiary_Customer_ID": "CUST_003",\n            "Trxn_Channel": "Wire",\n            "Trxn_Date": "2003-01-17",\n            "Trxn_Amount": 25000,\n            "Branch_or_ATM_Location": ""\n        },\n        "2": {\n            "Originator_Name": "John Doe",\n            "Originator_Account_ID": "12345-6789",\n            "Originator_Customer_ID": "CUST_001",\n            "Beneficiary_Name": "Kulkutta Building Supply Company",\n            "Beneficiary_Account_ID": "3489728",\n            "Beneficiary_Customer_ID": "CUST_003",\n            "Trxn_Channel": "Wire",\n            "Trxn_Date": "2003-01-24",\n            "Trxn_Amount": 25000,\n            "Branch_o

In [108]:
cleaned_trxns = chat_results.summary.split('```',2)[1]
trxns = cleaned_trxns.strip('```python').strip('```')

In [109]:
print(trxns)


{
    "3489728": {
        "1": {
            "Originator_Name": "John Doe",
            "Originator_Account_ID": "12345-6789",
            "Originator_Customer_ID": "CUST_001",
            "Beneficiary_Name": "Kulkutta Building Supply Company",
            "Beneficiary_Account_ID": "3489728",
            "Beneficiary_Customer_ID": "CUST_003",
            "Trxn_Channel": "Wire",
            "Trxn_Date": "2003-01-17",
            "Trxn_Amount": 25000,
            "Branch_or_ATM_Location": ""
        },
        "2": {
            "Originator_Name": "John Doe",
            "Originator_Account_ID": "12345-6789",
            "Originator_Customer_ID": "CUST_001",
            "Beneficiary_Name": "Kulkutta Building Supply Company",
            "Beneficiary_Account_ID": "3489728",
            "Beneficiary_Customer_ID": "CUST_003",
            "Trxn_Channel": "Wire",
            "Trxn_Date": "2003-01-24",
            "Trxn_Amount": 25000,
            "Branch_or_ATM_Location": ""
        },
    

In [110]:
import ast
trxns_dict = ast.literal_eval(trxns)

In [111]:
trxns_dict

{'3489728': {'1': {'Originator_Name': 'John Doe',
   'Originator_Account_ID': '12345-6789',
   'Originator_Customer_ID': 'CUST_001',
   'Beneficiary_Name': 'Kulkutta Building Supply Company',
   'Beneficiary_Account_ID': '3489728',
   'Beneficiary_Customer_ID': 'CUST_003',
   'Trxn_Channel': 'Wire',
   'Trxn_Date': '2003-01-17',
   'Trxn_Amount': 25000,
   'Branch_or_ATM_Location': ''},
  '2': {'Originator_Name': 'John Doe',
   'Originator_Account_ID': '12345-6789',
   'Originator_Customer_ID': 'CUST_001',
   'Beneficiary_Name': 'Kulkutta Building Supply Company',
   'Beneficiary_Account_ID': '3489728',
   'Beneficiary_Customer_ID': 'CUST_003',
   'Trxn_Channel': 'Wire',
   'Trxn_Date': '2003-01-24',
   'Trxn_Amount': 25000,
   'Branch_or_ATM_Location': ''},
  '3': {'Originator_Name': 'John Doe',
   'Originator_Account_ID': '12345-6789',
   'Originator_Customer_ID': 'CUST_001',
   'Beneficiary_Name': 'Kulkutta Building Supply Company',
   'Beneficiary_Account_ID': '3489728',
   'Benefi

In [112]:
trxns_dict['3489728']

{'1': {'Originator_Name': 'John Doe',
  'Originator_Account_ID': '12345-6789',
  'Originator_Customer_ID': 'CUST_001',
  'Beneficiary_Name': 'Kulkutta Building Supply Company',
  'Beneficiary_Account_ID': '3489728',
  'Beneficiary_Customer_ID': 'CUST_003',
  'Trxn_Channel': 'Wire',
  'Trxn_Date': '2003-01-17',
  'Trxn_Amount': 25000,
  'Branch_or_ATM_Location': ''},
 '2': {'Originator_Name': 'John Doe',
  'Originator_Account_ID': '12345-6789',
  'Originator_Customer_ID': 'CUST_001',
  'Beneficiary_Name': 'Kulkutta Building Supply Company',
  'Beneficiary_Account_ID': '3489728',
  'Beneficiary_Customer_ID': 'CUST_003',
  'Trxn_Channel': 'Wire',
  'Trxn_Date': '2003-01-24',
  'Trxn_Amount': 25000,
  'Branch_or_ATM_Location': ''},
 '3': {'Originator_Name': 'John Doe',
  'Originator_Account_ID': '12345-6789',
  'Originator_Customer_ID': 'CUST_001',
  'Beneficiary_Name': 'Kulkutta Building Supply Company',
  'Beneficiary_Account_ID': '3489728',
  'Beneficiary_Customer_ID': 'CUST_003',
  'Tr

In [114]:
import pandas as pd
# Flatten the dictionary into a list of rows
flattened_data = []

for key, inner_dict in trxns_dict.items():
    for transaction_id, transaction_details in inner_dict.items():
        # Add the transaction ID and account ID to the details
        transaction_details['Transaction_ID'] = transaction_id
        transaction_details['Account_ID'] = key
        flattened_data.append(transaction_details)

# Convert the flattened data to a DataFrame
df = pd.DataFrame(flattened_data)

In [115]:
# Reorder the columns to make Transaction_ID and Account_ID the first two columns
column_order = ['Transaction_ID', 'Account_ID'] + [col for col in df.columns if col not in ['Transaction_ID', 'Account_ID']]
df = df[column_order]

In [116]:
df

Unnamed: 0,Transaction_ID,Account_ID,Originator_Name,Originator_Account_ID,Originator_Customer_ID,Beneficiary_Name,Beneficiary_Account_ID,Beneficiary_Customer_ID,Trxn_Channel,Trxn_Date,Trxn_Amount,Branch_or_ATM_Location
0,1,3489728,John Doe,12345-6789,CUST_001,Kulkutta Building Supply Company,3489728,CUST_003,Wire,2003-01-17,25000,
1,2,3489728,John Doe,12345-6789,CUST_001,Kulkutta Building Supply Company,3489728,CUST_003,Wire,2003-01-24,25000,
2,3,3489728,John Doe,12345-6789,CUST_001,Kulkutta Building Supply Company,3489728,CUST_003,Wire,2003-01-31,25000,
3,4,3489728,John Doe,12345-6789,CUST_001,Kulkutta Building Supply Company,3489728,CUST_003,Wire,2003-02-07,25000,
4,5,3489728,John Doe,12345-6789,CUST_001,Kulkutta Building Supply Company,3489728,CUST_003,Wire,2003-02-14,25000,
5,6,3489728,John Doe,12345-6789,CUST_001,Kulkutta Building Supply Company,3489728,CUST_003,Wire,2003-02-21,25000,
6,7,3489728,John Doe,12345-6789,CUST_001,Kulkutta Building Supply Company,3489728,CUST_003,Wire,2003-02-28,25000,
7,8,3489728,John Doe,12345-6789,CUST_001,Kulkutta Building Supply Company,3489728,CUST_003,Wire,2003-03-07,25000,
8,9,3489728,John Doe,12345-6789,CUST_001,Kulkutta Building Supply Company,3489728,CUST_003,Wire,2003-03-21,25000,
