# Approach 1 - Using gpt-4o-mini

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()
api_key = os.getenv("OPEN_API_KEY")

In [3]:
llm_config_mini = {"model": "gpt-4o-mini"}
llm_config_large = {"model": "gpt-4o"}

In [4]:
import autogen
config_list = autogen.config_list_from_dotenv(
    dotenv_file_path="../.env",
    model_api_key_map={
        "gpt-4o": "OPENAI_API_KEY", 
        "gpt-4o-mini": "OPENAI_API_KEY"
    },
    filter_dict={"model":["gpt-4o-mini"]}
)



## Phase 1 - Extract Narratives and other relevant information

### Define Agents

In [5]:
from autogen import ConversableAgent

In [12]:
entity_extraction_agent = ConversableAgent(
    name="Entity_Extraction_Agent",
    system_message='''You are an Entity Extraction Agent, an AI Assistant tasked with extracting entitites from SAR Narratives.
      
      Using the SAR narrative shared by the user, do the following:

      Step 1) Extract the main entities described. Entities could be Individuals or Organizations or Financial Institutions. Return this in the form of a dictionary called Entities
              Entities = { "Individuals" : <List of Individuals> , "Organizations": <List of Organizations>, "Financial_Institutions": <List of Financial Instituions >} 

      Step 2) Extract the Account IDs described. Account IDs could be numeric or alphanumeric.If an Account ID is missing, create a Dummy account ID with the prefix "Dummy_Acct_".
              e.g. Dummy_Acct_1, Dummy_Acct_2 and so on. Put this in a list called Account_IDs

      Step 3) Map the extracted Account IDs to Financial institutions where the account is held. If the name of the institution is not specified, create a Dummy institution name e.g Dummy_Bank_1, Dummy_Bank_2 etc.
              Return this in the form of a dictionary  called Acct_to_FI E.g. {<Account_ID> : <Financial Instituion>}

      Step 4) Map the extracted account IDs to Individuals or Organizations who own the account. Return this in the form of a dictionary called Acct_to_Cust E.g. {<Account_ID>: <Entity_Name>}

      

      An example is given below.

      Narrative:

      John deposited $5000 in Cash into Acct #345723 at Bank of America. John sends $3000 to Jill's account at  Chase. Jill deposited $3000 in Cash into her Acct at Chase Bank.John and Jill own a business Acme Inc that has a  Business account, Account #98765 . John sends $2000 from Acct #345723 to Account #98765. Jill sends $1000 from her Acct at Chase Bank to Acct #98765.

      Step 1) Extract the main entities described. Identify the Individuals , Organizations and Financial Instituions mentioned. 
      Individuals and Organizations are entities that usually conduct transactions through Financial institutions. Here, John and Jill are Individuals. Acme Inc is an Organization.
      Financial institutions are institutions such as banks or insurance companies that offer financial services to customers. Hence Bank of America and Chase Bank are Financial institutions.

      Record this as a dictionary named Entities.

      Entities = {"Individuals": ["John", "Jill"],"Organizations":["Acme Inc"],"Financial Institutions":["Chase"," Bank of America"]}}

      Step 2) Extract the account IDs described. Accounts mentioned in the narrative are #345723,#98765. There is also an account at Chase Bank that is missing. Assign a dummy account ID ("Dummy_Acct_1")
      to this account.

      Record this in a list named Account_IDs
      Account_IDs = ["345723","98765","Dummy_Acct_1"]

      Step 3) Map the Account IDs extracted in Step 2 to Financial Instititions where they are held. Account ID #345723 is held at Bank of America. Account ID Dummy_Acct_1 is held at Chase Bank.
      Account ID #98765 is referenced but the Financial instituion where it is held is not specified. So it can be assumed to be held at "Dummy_Bank_1".

      Multiple Accounts could be held at the same Financial Institution.

      Record this in a dictionary named Acct_to_FI.
      Acct_to_FI = {"345723":"Bank of America","Dummy_Acct_1":"Chase Bank", "98765":"Dummy_Bank_1" }

      
      Step 4) Map the extracted Account IDs extracted in Step 2 to Individuals or Organizations that are customers of the Financial Institutions. 
      Account ID #345723 belongs to John. 
      Account ID #98765 belongs to Acme Inc. 
      Account ID Dummy_Acct_1 belongs to Jill.
      

      Record this in a dictionary named Acct_to_Cust

      Acct_to_Cust = {"345723": "John,"Dummy_Acct_1" : "Jill","98765": "Acme Inc"}   
    
    ''',
    llm_config=llm_config_mini,
    code_execution_config=False,
    human_input_mode="NEVER",
)





In [22]:
entity_resolution_agent = ConversableAgent(
    name="entity_resolution_agent",
    system_message='''
    You are an AI Agent tasked with assigning Customer IDs to entities identified in a SAR and mapping Account IDs to these customer IDs 
      You will have the following four pieces of information.

      1) List of Account IDs given by the list Account_IDs. </n>
      2) The mapping  from Account IDs to Entities (Individuals, Organizations and Financial Instituions), given by the dictionary Acct_to_Cust.  </n>
      3) The mapping from Accts to Financial Institutions given by the dictionary Acct_to_FI. 
      4) SAR narrative 

      Using this information, do the following:

      Step 1)  For each Account_ID in in the list , identify the owner of the account from Acct_to_Cust

      Step 2)  For each such account, identify the FI where the account is held from Acct_to_FI.Multiple Account IDs can be held at the same FI. If the Financial institution for certain accounts is not specified, a dummy financial institution e.g. "Dummy_Bank_1" should be used for them.</n> 

      Step 3) For each FI, identify customers that hold accounts at tht FI

      Step 4) Assign  customer ID (E.g. CUST_001, CUST_002) for each unique customer at an FI and map the Account ID to the Customer IDs
      If multiple accounts are owned by the same Individual or Organization at the same FI, map them to the same Customer ID

      Return this final mapping between Customer IDs and Account IDs in the form of a Python Dictionary

      An example is given below, demarcated by the delimiter ----.

      ----

      1) Account_IDs = ["345723","98765","12345","99999","Dummy_Acct_1"]
      
      2) Acct_to_Cust =  {"345723": "John, "99999":"John", "12345":"Jill", "Dummy_Acct_1" : "Jill","98765": "Acme Inc"}

      3) Acct_to_FI =  {"345723":"Bank of America","99999":"Bank of America","12345":"Bank of America","Dummy_Acct_1":"Chase Bank", "98765":"Dummy_Bank_1" }

      4) Narrative: </n>
      John deposited $5000 each in Cash into Acct #345723 and Acct #99999, both of which are at Bank of America. John sends $4000  from Acct #345723 to Jill's account at  Chase. Jill deposited $3000 in Cash into her Acct at Chase Bank and wired $2000 to her Acct #12345 at Bank of America .John and Jill own a business Acme Inc that has a  Business account, Account #98765 . John sends $2000 from Acct #99999 to Account #98765. Jill sends $1000 from her Acct at Chase Bank to Acct #98765.

  
      Step 1) For each Account ID in the list Account_IDs,  identify the owner of the account from Acct_to_Cust. 

      Account #345723 is owned by John. 
      Account #99999 is owned by John
      Account #98765 is owned by Acme Inc.
      Account #12345 is owned by Jill.
      Account #Dummy_Acct_1 is owned by Jill.

      Step 2) For each Account ID, identify the FI where it is held from Acct_to_FI

      Account #345723 is held at Bank of America.
      Account #99999 is held at Bank of America.
      Account #12345 is held at Bank of America
      Account #98765 is held at Dummy_Bank_1.
      Account #Dummy_Acct_1 is held at Chase Bank.
      
      Step 3) For each FI, identify customers that hold accounts at that FI

      There are a total of three distinct FIs in Acct_to_FI: Bank of America, Dummy_Bank, Chase Bank

      From Acct_to_FI and Acct_to_Cust,
      Bank of America has three accounts - #345723, #99999 and #12345. 
      #345723 and #99999 is owned by John and #12345 is owned by Jill. So customers at Bank of America are John and Jill.

      Chase Bank has one account - #Dummy_Acct_1 which is owned by Jill.  So customers at Chase Bank is only Jill

      Dummy_Bank_1 has one account -  #98765 owned by Acme Inc.  So customers at Dummy_Bank_1 is only Acme Inc

      Step 4) Assign  customer ID (E.g. CUST_001, CUST_002) for each unique customer at an FI and map the Account IDs at that FI to the Customer IDs

      At Bank of America, there are two unique customers - John and Jill,  
      So John can be assiged the customer ID CUST_001 and Jill can be assiged the customer ID CUST_002.

      The Account #345723 owned by John can be mapped to CUST_001.
      The Account #99999 owned by John can also be mapped to CUST_001
      The Account #12345 owned by Jill can be mapped to CUST_002

      "Bank of America": {"345723":"CUST_001","99999":"CUST_001","12345":"CUST_002"}

      At Chase Bank , there is only one customer Jill. The account Dummy_Acct_1 at Chase Bank is owned by Jill. 
      So Jill can be assiged the customer ID CUST_003.

      The account Dummy_Acct_1 can be mapped to CUST_003

      "Chase Bank": {"Dummy_Acct_1":"CUST_003"}

      At Dummy_Bank_1, there is only one customer Acme Inc. The account #98765 at Dummy_Bank_1 is owned by Acme Inc.
      So Acme Inc can be assigned the customer ID CUST_004.

      The account #98765 can be mapped to CUST_004
      
      "Dummy_Bank_1": {"98765":"CUST_004"}

      Return this information  as Python Dictionary. Return only the final Python dictionary. Do not include any extra commentary, code fences, or text outside the dictionary.

      {"Bank of America": {"345723":"CUST_001","99999":"CUST_001","12345":"CUST_002"}, 
       "Chase Bank": {"Dummy_Acct_1":"CUST_003"},
       "Dummy_Bank_1": {"98765":"CUST_004"} }
      
      ----


    ''',
    llm_config=llm_config_mini,
    code_execution_config=False,
    human_input_mode="NEVER",
)





In [13]:
narrative_extraction_agent = ConversableAgent(
    name="narrative_extraction_agent",
    system_message='''
    You are an AI Agent tasked with extracting or summarizing  parts of a narrative that describe activity conducted by certain accounts.

      You will have the following four pieces of information.

      1)  SAR narrative

      2)  The mapping between Customers referenced in the narrative to Account IDs which is given by the dictionary Acct_to_Cust

      3)  The mapping between Financial Institutions referenced in the narrative to Account IDs which is given by the dictionary Acct_to_FI

      4)  List of Account IDs given by a list Account_IDs
      
      Using this information, Do the following:

      Step 1) Identify an Account ID from the list of Account IDs. Note that account IDs starting with the prefix "Dummy_" are placeholders for account IDs that have not been explicitly described in the narrative.

      Step 2)  From the dictionary Acct_to_Cust, Identify the customer who owns the account.

      Step 3) From the dictionary Acct_to_FI, Identify the FI where the account is held. Note that FIs starting with the prefix "Dummy_" are placeholders for FIs that have not been explicitly described in the narrative.

      Step 4) Extract or summarize the SAR to produce a narrative describing transactions involving this account.Ensure key details such as dates of trxns and trxn channels are also captured if available. Also ensure that for the trxns described, both the originator and beneficary of the trxn has been captured. 

      Step 5) Determine if the summary generated in Step 4) describes multiple sets of Trxns.A set of trxns should fully describe the originator and beneficary of the trxns. If the narrative describes multiple sets of trxns, create multiple narratives describing each set of trxns.

      An example is given below, demarcated by the delimiter ----.

      ----

      1) SAR Narrative:
          John deposited $5000 each in Cash into Acct #345723 and Acct #99999, both of which are at Bank of America on Jan 1, 2025 . John sends $4000  from Acct #345723 to Jill's account at Chase Bank on Jan 15,2025. Jill deposited $3000 in Cash into her Acct at Chase Bank on Jan 17,2025  and  then wired $2000 from that account to her Acct #12345 at Bank of America on Jan 19,2025 .John and Jill own a business Acme Inc that has a  Business account, Account #98765 . John sends $2000 from Acct #99999 to Account #98765 on Feb 1,2025. Jill sends $1000 from her Acct at Chase Bank to Acct #98765 by Wire on Feb 7,2025.
      
      2) Acct_to_Cust =  {"345723": "John, "99999":"John", "12345":"Jill", "Dummy_Acct_1" : "Jill","98765": "Acme Inc"}

      3) Acct_to_FI = {"345723":"Bank of America","99999":"Bank of America","12345":"Bank of America","Dummy_Acct_1":"Chase Bank", "98765":"Dummy_Bank_1" }

      4) Account_IDs = ["345723","98765","12345","99999","Dummy_Acct_1"]

      
      Step 1) The first account ID  is 345723

      Step 2) This Account ID is owned by John.

      Step 3) This Account ID is held at Bank of America

      Step 4) Extract or Summarize  the SAR to produce a narrative that references  Account 345723 capturing  both originators and ebenficaries of relevant trxns.

      {"345723": "John deposited $5000 each in Cash into Acct #345723 at Bank of America on Jan 15,2025. John sends $4000  from Acct #345723 to Jill's account at  Chase on Jan 15,2025" }

      Step 5) The narrative above describes two fully specified, yet distinct set of trxns. The first repersents deposits into Acct #345723.
              The second repreents transfers between Acct #345723 and Jull's Acct at Chase. So they should be separated into distinct narratives.

      {"345723": 
         { "Trxn_Set_1":"John deposited $5000 each in Cash into Acct #345723 at Bank of America on Jan 15,2025. 
          "Trxn_Set_2": John sends $4000  from Acct #345723 to Jill's account at  Chase on Jan 15,2025" } }

      
      Now repeat the process for the second account ID

      Step 1) The second  account ID  is 98765

      Step 2) This Account ID is owned by Acme, Inc.

      Step 3) The Account ID is held at Dummy_Bank_1. Given this FI starts with Dummy_, there won't be an expplicit reference to this FI in the SAR narrative.

      Step 4) Extract or Summarize  the SAR to produce a narrative that references the Account 98765 capturing both deposits and withdrawals.

      {"98765": "John sends $2000 from Acct #99999 to Account #98765 on Feb 1,2025" }

      Step 5) The narrative above describes only one set of trxns. So they can be included under one Trxn Set.
      {"98765": 
          {"Trxn_set_1": "John sends $2000 from Acct #99999 to Account #98765 on Feb 1,2025" } }

      Now repeat the process for the third account ID

      Step 1) The third account ID  is 12345

      Step 2) This Account ID is owned by Jill.

      Step 3) This account is held at Bank of America

      Step 4) Extract the part of the narrative that references the Account 12345 capturing both deposits and withdrawals.

      {"12345": "Jill wired $2000 from her Acct at Chase Bank to her Acct #12345 at Bank of America on Jan 19,2025" }

      Step 5) The narrative above describes only one set of trxns. So they can be included under one Trxn Set.
      {"12345": 
          {"Trxn_set_1": "Jill wired $2000 from her Acct at Chase Bank to her Acct #12345 at Bank of America on Jan 19,2025" } }

      Now repeat the process for the fourth account ID

      Step 1) The fourth account ID  is 99999

      Step 2) This Account ID is owned by John.

      Step 3) This Account ID is held at Bank of America

      Step 4) Extract or Summarize  the SAR to produce a narrative  that references the Account 99999 capturing both deposits and withdrawals.

      {"99999": "John deposited $5000 each in Cash into  Acct #99999 at Bank of America on Jan 1, 2025. John sends $2000 from Acct #99999 to Account #98765 on Feb 1,2025. " }

      Step 5) The narrative above describes two sets of trxns that are fully specified. So they should be included under two Trxn Sets.
      {"99999": 
          {"Trxn_Set_1": "John deposited $5000 each in Cash into  Acct #99999 at Bank of America on Jan 1, 2025",
           "Trxn_Set_2": "John sends $2000 from Acct #99999 to Account #98765 on Feb 1,2025."}

      Now repeat the process for the fifth Account ID.

      Step 1) The fifth account ID  is Dummy_Acct_1. Given the accunt ID starts with Dummy_ , there won't be direct references to this account ID in the narrative. 

      Step 2) This Account ID is owned by Jill. Given the Account ID is dummy, pay attention to transactions made by Jill,the owner of the dummy account.

      Step 3) The Account ID is held at Chase Bank. Pay attention to transactions involving Chase Bank

      Step 4) Extract or Summarize  the SAR to produce a narrative that references transactions made by Jill from the Dummy_Acct_1. Include both deposits and withdrawals where possible.

      {"Dummy_Acct_1": "John sends $4000  from Acct #345723 to Jill's account at  Chase Bank on Jan 15,2025. Jill deposited $3000 in Cash into her Acct at Chase Bank on Jan 17,2025  and  then wired $2000 from that account to her Acct #12345 at Bank of America on Jan 19,2025. Jill sends $1000 from her Acct at Chase Bank to Acct #98765 by Wire on Feb 7,2025." }

      Step 5) The narrative above describes four set of Trxns that are fully specified. So they should be included under four trxn sets.

      The first set is the $4000 transfer from  Acct #345723 to Jill's Account at Chase Bank on Jan 15,2025.
      The second set is the $3000 cash deposit Jill made into her acct at Chase Bank on Jan 17,2025.
      The third set is the $2000 wire transfer from  Jill's Acct at Chase Bank to Acct #1235 at bank of America on jan 19,2025
      The fourth set is the $1000 wire from Jill's Acct at Chase Bank to Acct #98765 on Feb 7,2025 

      {"Dummy_Acct_1": 
          {"Trxn_Set_1": "John sends $4000  from Acct #345723 to Jill's account at  Chase Bank on Jan 15,2025",
           "Trxn_Set_2": "Jill deposited $3000 in Cash into her Acct at Chase Bank on Jan 17,2025  on Jan 17,2025" ,
           "Trxn_Set_3": "Jill wired $2000 from her account at Chase Bank  to her Acct #12345 at Bank of America on Jan 19,2025" ,
           "Trxn_Set_4": "Jill sends $1000 from her Acct at Chase Bank to Acct #98765 by Wire on Feb 7,2025." }

      Consolidate narratives for all accounts. Return this information  as Python Dictionary. Return only the final Python dictionary. Do not include any extra commentary, code fences, or text outside the dictionary.

      Narratives = {"345723": 
                    { "Trxn_Set_1":"John deposited $5000 each in Cash into Acct #345723 at Bank of America on Jan 15,2025. 
                      "Trxn_Set_2": John sends $4000  from Acct #345723 to Jill's account at  Chase on Jan 15,2025" } },
                   "98765": 
                       {"Trxn_Set_1": "John sends $2000 from Acct #99999 to Account #98765 on Feb 1,2025" } ,
                   "12345": 
                        {"Trxn_Set_1": "Jill wired $2000 from her Acct at Chase Bank to her Acct #12345 at Bank of America on Jan 19,2025" },
                   "99999": 
                        {"Trxn_Set_1": "John sends $2000 from Acct #99999 to Account #98765 on Feb 1,2025" } },
                   "Dummy_Acct_1":
                       {"Trxn_Set_1": "John sends $4000  from Acct #345723 to Jill's account at  Chase Bank on Jan 15,2025",
                        "Trxn_Set_2": "Jill deposited $3000 in Cash into her Acct at Chase Bank on Jan 17,2025 " ,
                        "Trxn_Set_3": "Jill wired $2000 from her account at Chase Bank  to her Acct #12345 at Bank of America on Jan 19,2025"  ,
                        "Trxn_Set_4": "Jill sends $1000 from her Acct at Chase Bank to Acct #98765 by Wire on Feb 7,2025." 
                        }


      ----


    ''',
    llm_config=llm_config_large,
    code_execution_config=False,
    human_input_mode="NEVER",
)





In [14]:
sar_agent = ConversableAgent(
    name="SAR_Agent",
    system_message="You are assigned a SAR from which to extract transactions. You do this by sharing this SAR with other assistant agents who "
                   "work on the SAR and extract useful information.      ",
    llm_config=llm_config_mini,
    human_input_mode="NEVER",
)





Read in training sar files

In [15]:
import os
train_sars = []
data_dir = "../data/input"

for filename in os.listdir(data_dir):
        if 'train' in filename  and filename.endswith('.txt') :
            file_path = os.path.join(data_dir, filename)
            with open(file_path,'r') as file:
                content = file.read()
                train_sars.append(content)

In [16]:
len(train_sars)

4

In [17]:
message = train_sars[0]

In [18]:
ee_agent_summary_prompt = '''
 Return the extracted entity information including Dummy Account IDs and Dummy Entities as a Python dictionary only.Do not include any extra commentary, code fences, or text outside the dictionary.
        {
          "Entities": {
            "Individuals": <List of Individuals>,
            "Organizations": <List of Organizations>,
            "Financial_Institutions": <List of Financial Institutions>
                      },
          "Account_IDs": <List of Account IDs>,
          "Acct_to_FI": {
            "<Acct_ID_1> : "<Financial_Institution_1>",
            "<Acct_ID_2>" : "<Financial_Institution_2>",
            "<Acct_ID_3>" : "<Financial_Institution_3>"
          },
          "Acct_to_Cust": {
            "<Acct_ID_1>": "<Individual_1>",
            "<Acct_ID_2>": "<Individual_2>",
            "<Acct_ID_3>": "<Organization_1>"
          }
        }
'''

In [19]:
er_agent_summary_prompt = '''
                    Return the extracted information as a Python dictionary only. Do not include any extra commentary, code fences, or text outside the dictionary.
                  { "FI_to_Acct_to_Cust" :{<Financial Institution 1>: { <ACCT_1> : <CUST_ID_1> , <ACCT_2> : <CUST_ID_2>, <ACCT_3> : <CUST_ID_3> }
                                           <Financial Institution 2>: { <ACCT_4> : <CUST_ID_4> , <ACCT_5> : <CUST_ID_5>} } }
                          '''

In [20]:
ne_agent_summary_prompt = ''' Return the extracted information as a Python dictionary only. Do not include any extra commentary, code fences, or text outside the dictionary.
                             {"Narratives": {<Acct_1>: {<Trxn_Set_ID> : <Excerpt relevant to Trxn Set of <Acct_1>},
                                             <Acct_2>: {<Trxn_Set_ID> : <Excerpt relevant to Trxn Set of <Acct_2>},
                                             <Acct_3>: {<Trxn_Set_ID> : <Excerpt relevant to Trxn Set of <Acct_3>} }
                            '''

Combine to create a sequential chat

In [23]:
chat_results = sar_agent.initiate_chats(
      [
        {
            "recipient": entity_extraction_agent,
            "message": message,
            "max_turns": 1,
            "summary_method": "reflection_with_llm",
            "summary_args": {
                "summary_prompt" : ee_agent_summary_prompt
            },
        },
        {
            "recipient": entity_resolution_agent,
            "message": message,
            "max_turns": 1,
            "summary_method": "reflection_with_llm",
            "summary_args": {
                "summary_prompt" : er_agent_summary_prompt
                                },
        },
        {
            "recipient": narrative_extraction_agent,
            "message": message,
            "max_turns": 1,
            "summary_method": "reflection_with_llm",
            "summary_args": {
                "summary_prompt" : ne_agent_summary_prompt
                                },
        },

    ]


)

[34m
********************************************************************************[0m
[34mStarting a new chat....[0m
[34m
********************************************************************************[0m
[33mSAR_Agent[0m (to Entity_Extraction_Agent):

Investigation case number: A5678910. The customer, a grocery store and  its owner, are suspected of intentionally structuring cash deposits to  circumvent federal reporting requirements.  The customer is also  engaged in activity indicative of an informal value transfer operation: deposits of bulk cash, third party out of state personal checks and money 
orders, and engaging in aggregate wire transfers to Dubai, UAE.  The type and volume of activity observed is non-commensurate with the customer’s expected business volume and deviates from the normal volume of similar types of businesses located in the same area as the customer.  Investigative activities are continuing. Our bank has elected to 
directly contact law enforcemen

In [24]:
for chat_result in chat_results:
    print(chat_result.summary)
    print("\n")

{
  "Entities": {
    "Individuals": ["John Doe"],
    "Organizations": ["Acme, Inc.", "Kulkutta Building Supply Company"],
    "Financial_Institutions": ["Bank of Anan"]
  },
  "Account_IDs": ["12345-6789", "23456-7891", "3489728", "Dummy_Acct_1"],
  "Acct_to_FI": {
    "12345-6789": "Dummy_Bank_1",
    "23456-7891": "Dummy_Bank_1",
    "3489728": "Bank of Anan",
    "Dummy_Acct_1": "Dummy_Bank_1"
  },
  "Acct_to_Cust": {
    "12345-6789": "John Doe",
    "23456-7891": "Acme, Inc.",
    "3489728": "Kulkutta Building Supply Company",
    "Dummy_Acct_1": "Dummy_Customer"
  }
}


{
  "FI_to_Acct_to_Cust": {
    "Dummy_Bank_1": {
      "12345-6789": "CUST_001",
      "23456-7891": "CUST_002",
      "Dummy_Acct_1": "CUST_003"
    },
    "Bank of Anan": {
      "3489728": "CUST_004"
    }
  }
}


{
  "Narratives": {
    "12345-6789": {
      "Trxn_Set_1": "Between January 2 and March 17, 2003, 13 deposits consisting of cash, checks, and money orders were made into John Doe's personal accoun

Extract results for consumption by trxn generation model

In [25]:
print(chat_results[2].summary)

{
  "Narratives": {
    "12345-6789": {
      "Trxn_Set_1": "Between January 2 and March 17, 2003, 13 deposits consisting of cash, checks, and money orders were made into John Doe's personal account #12345-6789 totaling approximately $50,000. Individual amounts ranged between $1,500 and $9,500, often occurring on consecutive business days. A number of third-party out-of-state checks and money orders were also deposited into the account."
    },
    "23456-7891": {
      "Trxn_Set_1": "Between January 17 and March 21, 2003, John Doe originated nine wires totaling $225,000 from the business checking account #23456-7891. Each wire was $25,000, conducted at the end of each week, and remitted to the Bank of Anan in Dubai, UAE, benefiting Kulkutta Building Supply Company, account #3489728.",
      "Trxn_Set_2": "A review of deposit activity on the Acme, Inc. account from January 2 to March 17, 2003, revealed 33 deposits consisting of cash, checks, and money orders totaling approximately $275

In [26]:
results0 = chat_results[0].summary
results1 = chat_results[1].summary 
results2 = chat_results[2].summary

In [27]:
import ast
results0_dict = ast.literal_eval(results0)
results1_dict = ast.literal_eval(results1)
results2_dict = ast.literal_eval(results2)

In [28]:
results0_dict, results1_dict,results2_dict

({'Entities': {'Individuals': ['John Doe'],
   'Organizations': ['Acme, Inc.', 'Kulkutta Building Supply Company'],
   'Financial_Institutions': ['Bank of Anan']},
  'Account_IDs': ['12345-6789', '23456-7891', '3489728', 'Dummy_Acct_1'],
  'Acct_to_FI': {'12345-6789': 'Dummy_Bank_1',
   '23456-7891': 'Dummy_Bank_1',
   '3489728': 'Bank of Anan',
   'Dummy_Acct_1': 'Dummy_Bank_1'},
  'Acct_to_Cust': {'12345-6789': 'John Doe',
   '23456-7891': 'Acme, Inc.',
   '3489728': 'Kulkutta Building Supply Company',
   'Dummy_Acct_1': 'Dummy_Customer'}},
 {'FI_to_Acct_to_Cust': {'Dummy_Bank_1': {'12345-6789': 'CUST_001',
    '23456-7891': 'CUST_002',
    'Dummy_Acct_1': 'CUST_003'},
   'Bank of Anan': {'3489728': 'CUST_004'}}},
 {'Narratives': {'12345-6789': {'Trxn_Set_1': "Between January 2 and March 17, 2003, 13 deposits consisting of cash, checks, and money orders were made into John Doe's personal account #12345-6789 totaling approximately $50,000. Individual amounts ranged between $1,500 and 

In [29]:
combined_dict_0 = {**results0_dict,**results1_dict,**results2_dict}

In [30]:
combined_dict_0

{'Entities': {'Individuals': ['John Doe'],
  'Organizations': ['Acme, Inc.', 'Kulkutta Building Supply Company'],
  'Financial_Institutions': ['Bank of Anan']},
 'Account_IDs': ['12345-6789', '23456-7891', '3489728', 'Dummy_Acct_1'],
 'Acct_to_FI': {'12345-6789': 'Dummy_Bank_1',
  '23456-7891': 'Dummy_Bank_1',
  '3489728': 'Bank of Anan',
  'Dummy_Acct_1': 'Dummy_Bank_1'},
 'Acct_to_Cust': {'12345-6789': 'John Doe',
  '23456-7891': 'Acme, Inc.',
  '3489728': 'Kulkutta Building Supply Company',
  'Dummy_Acct_1': 'Dummy_Customer'},
 'FI_to_Acct_to_Cust': {'Dummy_Bank_1': {'12345-6789': 'CUST_001',
   '23456-7891': 'CUST_002',
   'Dummy_Acct_1': 'CUST_003'},
  'Bank of Anan': {'3489728': 'CUST_004'}},
 'Narratives': {'12345-6789': {'Trxn_Set_1': "Between January 2 and March 17, 2003, 13 deposits consisting of cash, checks, and money orders were made into John Doe's personal account #12345-6789 totaling approximately $50,000. Individual amounts ranged between $1,500 and $9,500, often occur

In [31]:
import copy
results2_dict_ = copy.copy(results2_dict)

Retain only narrative for account 3489728 for simplicity.

In [55]:
keys_to_keep = {'23456-7891'}
results2_dict_new = {k: v for k, v in results2_dict["Narratives"].items() if k in keys_to_keep}
print(results2_dict_new)  # {'a': 1, 'c': 3}

{'23456-7891': {'Trxn_Set_1': 'Between January 17 and March 21, 2003, John Doe originated nine wires totaling $225,000 from the business checking account #23456-7891. Each wire was $25,000, conducted at the end of each week, and remitted to the Bank of Anan in Dubai, UAE, benefiting Kulkutta Building Supply Company, account #3489728.', 'Trxn_Set_2': 'A review of deposit activity on the Acme, Inc. account from January 2 to March 17, 2003, revealed 33 deposits consisting of cash, checks, and money orders totaling approximately $275,000. Individual amounts ranged between $4,446 and $9,729, with 22 of 33 deposits ranging between $9,150 and $9,980. In nine of 13 instances where cash deposits were made to both accounts on the same day, combined deposits exceeded $10,000, prompting currency transaction reports to the IRS.'}}


In [56]:
results2_dict_["Narratives"] = results2_dict_new
results2_dict_

{'Narratives': {'23456-7891': {'Trxn_Set_1': 'Between January 17 and March 21, 2003, John Doe originated nine wires totaling $225,000 from the business checking account #23456-7891. Each wire was $25,000, conducted at the end of each week, and remitted to the Bank of Anan in Dubai, UAE, benefiting Kulkutta Building Supply Company, account #3489728.',
   'Trxn_Set_2': 'A review of deposit activity on the Acme, Inc. account from January 2 to March 17, 2003, revealed 33 deposits consisting of cash, checks, and money orders totaling approximately $275,000. Individual amounts ranged between $4,446 and $9,729, with 22 of 33 deposits ranging between $9,150 and $9,980. In nine of 13 instances where cash deposits were made to both accounts on the same day, combined deposits exceeded $10,000, prompting currency transaction reports to the IRS.'}}}

In [57]:
combined_dict = {**results0_dict,**results1_dict,**results2_dict_}
combined_dict

{'Entities': {'Individuals': ['John Doe'],
  'Organizations': ['Acme, Inc.', 'Kulkutta Building Supply Company'],
  'Financial_Institutions': ['Bank of Anan']},
 'Account_IDs': ['12345-6789', '23456-7891', '3489728', 'Dummy_Acct_1'],
 'Acct_to_FI': {'12345-6789': 'Dummy_Bank_1',
  '23456-7891': 'Dummy_Bank_1',
  '3489728': 'Bank of Anan',
  'Dummy_Acct_1': 'Dummy_Bank_1'},
 'Acct_to_Cust': {'12345-6789': 'John Doe',
  '23456-7891': 'Acme, Inc.',
  '3489728': 'Kulkutta Building Supply Company',
  'Dummy_Acct_1': 'Dummy_Customer'},
 'FI_to_Acct_to_Cust': {'Dummy_Bank_1': {'12345-6789': 'CUST_001',
   '23456-7891': 'CUST_002',
   'Dummy_Acct_1': 'CUST_003'},
  'Bank of Anan': {'3489728': 'CUST_004'}},
 'Narratives': {'23456-7891': {'Trxn_Set_1': 'Between January 17 and March 21, 2003, John Doe originated nine wires totaling $225,000 from the business checking account #23456-7891. Each wire was $25,000, conducted at the end of each week, and remitted to the Bank of Anan in Dubai, UAE, bene

Convert back to a string

In [58]:
import json
trxn_extraction_inputs = json.dumps(combined_dict,indent=2)
trxn_extraction_inputs

'{\n  "Entities": {\n    "Individuals": [\n      "John Doe"\n    ],\n    "Organizations": [\n      "Acme, Inc.",\n      "Kulkutta Building Supply Company"\n    ],\n    "Financial_Institutions": [\n      "Bank of Anan"\n    ]\n  },\n  "Account_IDs": [\n    "12345-6789",\n    "23456-7891",\n    "3489728",\n    "Dummy_Acct_1"\n  ],\n  "Acct_to_FI": {\n    "12345-6789": "Dummy_Bank_1",\n    "23456-7891": "Dummy_Bank_1",\n    "3489728": "Bank of Anan",\n    "Dummy_Acct_1": "Dummy_Bank_1"\n  },\n  "Acct_to_Cust": {\n    "12345-6789": "John Doe",\n    "23456-7891": "Acme, Inc.",\n    "3489728": "Kulkutta Building Supply Company",\n    "Dummy_Acct_1": "Dummy_Customer"\n  },\n  "FI_to_Acct_to_Cust": {\n    "Dummy_Bank_1": {\n      "12345-6789": "CUST_001",\n      "23456-7891": "CUST_002",\n      "Dummy_Acct_1": "CUST_003"\n    },\n    "Bank of Anan": {\n      "3489728": "CUST_004"\n    }\n  },\n  "Narratives": {\n    "23456-7891": {\n      "Trxn_Set_1": "Between January 17 and March 21, 2003, J

In [59]:
print(trxn_extraction_inputs)

{
  "Entities": {
    "Individuals": [
      "John Doe"
    ],
    "Organizations": [
      "Acme, Inc.",
      "Kulkutta Building Supply Company"
    ],
    "Financial_Institutions": [
      "Bank of Anan"
    ]
  },
  "Account_IDs": [
    "12345-6789",
    "23456-7891",
    "3489728",
    "Dummy_Acct_1"
  ],
  "Acct_to_FI": {
    "12345-6789": "Dummy_Bank_1",
    "23456-7891": "Dummy_Bank_1",
    "3489728": "Bank of Anan",
    "Dummy_Acct_1": "Dummy_Bank_1"
  },
  "Acct_to_Cust": {
    "12345-6789": "John Doe",
    "23456-7891": "Acme, Inc.",
    "3489728": "Kulkutta Building Supply Company",
    "Dummy_Acct_1": "Dummy_Customer"
  },
  "FI_to_Acct_to_Cust": {
    "Dummy_Bank_1": {
      "12345-6789": "CUST_001",
      "23456-7891": "CUST_002",
      "Dummy_Acct_1": "CUST_003"
    },
    "Bank of Anan": {
      "3489728": "CUST_004"
    }
  },
  "Narratives": {
    "23456-7891": {
      "Trxn_Set_1": "Between January 17 and March 21, 2003, John Doe originated nine wires totaling $225,

Construct the prompt to pass to the trxn generation agent

In [79]:
import sys
import os

# Get the parent directory and add it to sys.path
parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)



In [81]:
def split_dictionary_into_subnarratives(data: dict) -> list:
    """
    Given the input dictionary 'data', this function returns a list of new dictionaries.
    Each returned dictionary retains the same fields/keys as the original dictionary,
    except that the 'Narrative' field is narrowed down to exactly one AccountID
    and one Transaction Set.

    :param data: The original dictionary containing 'Entities', 'Account_IDs',
                 'Acct_to_FI', 'Acct_to_Cust', 'FI_to_Acct_to_Cust', and 'Narrative'.
    :return: A list of dictionaries, each having exactly one Narrative entry
             corresponding to one (AccountID, Trxn_Set) pair.
    """
    results = []
    original_narrative = data.get("Narratives", {})

    for acct_id, trxn_sets in original_narrative.items():
        for trxn_set_label, narration_text in trxn_sets.items():
            # Copy all top-level fields except Narrative
            new_dict = {
                "Entities": data["Entities"],
                "Account_IDs": data["Account_IDs"],
                "Acct_to_FI": data["Acct_to_FI"],
                "Acct_to_Cust": data["Acct_to_Cust"],
                "FI_to_Acct_to_Cust": data["FI_to_Acct_to_Cust"],
                # Narrow the Narrative down to one (acct_id, trxn_set_label)
                "Narratives": {
                    acct_id: {
                        trxn_set_label: narration_text
                    }
                }
            }
            results.append(new_dict)

    return results

In [82]:
#from utils import split_dictionary_into_subnarratives
split_dictionary_into_subnarratives(combined_dict)

[{'Entities': {'Individuals': ['John Doe'],
   'Organizations': ['Acme, Inc.', 'Kulkutta Building Supply Company'],
   'Financial_Institutions': ['Bank of Anan']},
  'Account_IDs': ['12345-6789', '23456-7891', '3489728', 'Dummy_Acct_1'],
  'Acct_to_FI': {'12345-6789': 'Dummy_Bank_1',
   '23456-7891': 'Dummy_Bank_1',
   '3489728': 'Bank of Anan',
   'Dummy_Acct_1': 'Dummy_Bank_1'},
  'Acct_to_Cust': {'12345-6789': 'John Doe',
   '23456-7891': 'Acme, Inc.',
   '3489728': 'Kulkutta Building Supply Company',
   'Dummy_Acct_1': 'Dummy_Customer'},
  'FI_to_Acct_to_Cust': {'Dummy_Bank_1': {'12345-6789': 'CUST_001',
    '23456-7891': 'CUST_002',
    'Dummy_Acct_1': 'CUST_003'},
   'Bank of Anan': {'3489728': 'CUST_004'}},
  'Narratives': {'23456-7891': {'Trxn_Set_1': 'Between January 17 and March 21, 2003, John Doe originated nine wires totaling $225,000 from the business checking account #23456-7891. Each wire was $25,000, conducted at the end of each week, and remitted to the Bank of Anan in

In [72]:
combined_dict.get("Narratives")

{'23456-7891': {'Trxn_Set_1': 'Between January 17 and March 21, 2003, John Doe originated nine wires totaling $225,000 from the business checking account #23456-7891. Each wire was $25,000, conducted at the end of each week, and remitted to the Bank of Anan in Dubai, UAE, benefiting Kulkutta Building Supply Company, account #3489728.',
  'Trxn_Set_2': 'A review of deposit activity on the Acme, Inc. account from January 2 to March 17, 2003, revealed 33 deposits consisting of cash, checks, and money orders totaling approximately $275,000. Individual amounts ranged between $4,446 and $9,729, with 22 of 33 deposits ranging between $9,150 and $9,980. In nine of 13 instances where cash deposits were made to both accounts on the same day, combined deposits exceeded $10,000, prompting currency transaction reports to the IRS.'}}

In [74]:
results = []
original_narrative = combined_dict.get("Narratives", {})
original_narrative

{'23456-7891': {'Trxn_Set_1': 'Between January 17 and March 21, 2003, John Doe originated nine wires totaling $225,000 from the business checking account #23456-7891. Each wire was $25,000, conducted at the end of each week, and remitted to the Bank of Anan in Dubai, UAE, benefiting Kulkutta Building Supply Company, account #3489728.',
  'Trxn_Set_2': 'A review of deposit activity on the Acme, Inc. account from January 2 to March 17, 2003, revealed 33 deposits consisting of cash, checks, and money orders totaling approximately $275,000. Individual amounts ranged between $4,446 and $9,729, with 22 of 33 deposits ranging between $9,150 and $9,980. In nine of 13 instances where cash deposits were made to both accounts on the same day, combined deposits exceeded $10,000, prompting currency transaction reports to the IRS.'}}

In [75]:
data = combined_dict
results = []
for acct_id, trxn_sets in original_narrative.items():
    for trxn_set_label, narration_text in trxn_sets.items():
        # Copy all top-level fields except Narrative
        new_dict = {
            "Entities": data["Entities"],
            "Account_IDs": data["Account_IDs"],
            "Acct_to_FI": data["Acct_to_FI"],
            "Acct_to_Cust": data["Acct_to_Cust"],
            "FI_to_Acct_to_Cust": data["FI_to_Acct_to_Cust"],
            # Narrow the Narrative down to one (acct_id, trxn_set_label)
            "Narratives": {
                acct_id: {
                    trxn_set_label: narration_text
                }
            }
        }
        results.append(new_dict)

In [77]:
results[0]

{'Entities': {'Individuals': ['John Doe'],
  'Organizations': ['Acme, Inc.', 'Kulkutta Building Supply Company'],
  'Financial_Institutions': ['Bank of Anan']},
 'Account_IDs': ['12345-6789', '23456-7891', '3489728', 'Dummy_Acct_1'],
 'Acct_to_FI': {'12345-6789': 'Dummy_Bank_1',
  '23456-7891': 'Dummy_Bank_1',
  '3489728': 'Bank of Anan',
  'Dummy_Acct_1': 'Dummy_Bank_1'},
 'Acct_to_Cust': {'12345-6789': 'John Doe',
  '23456-7891': 'Acme, Inc.',
  '3489728': 'Kulkutta Building Supply Company',
  'Dummy_Acct_1': 'Dummy_Customer'},
 'FI_to_Acct_to_Cust': {'Dummy_Bank_1': {'12345-6789': 'CUST_001',
   '23456-7891': 'CUST_002',
   'Dummy_Acct_1': 'CUST_003'},
  'Bank of Anan': {'3489728': 'CUST_004'}},
 'Narratives': {'23456-7891': {'Trxn_Set_1': 'Between January 17 and March 21, 2003, John Doe originated nine wires totaling $225,000 from the business checking account #23456-7891. Each wire was $25,000, conducted at the end of each week, and remitted to the Bank of Anan in Dubai, UAE, bene

In [78]:
results[1]

{'Entities': {'Individuals': ['John Doe'],
  'Organizations': ['Acme, Inc.', 'Kulkutta Building Supply Company'],
  'Financial_Institutions': ['Bank of Anan']},
 'Account_IDs': ['12345-6789', '23456-7891', '3489728', 'Dummy_Acct_1'],
 'Acct_to_FI': {'12345-6789': 'Dummy_Bank_1',
  '23456-7891': 'Dummy_Bank_1',
  '3489728': 'Bank of Anan',
  'Dummy_Acct_1': 'Dummy_Bank_1'},
 'Acct_to_Cust': {'12345-6789': 'John Doe',
  '23456-7891': 'Acme, Inc.',
  '3489728': 'Kulkutta Building Supply Company',
  'Dummy_Acct_1': 'Dummy_Customer'},
 'FI_to_Acct_to_Cust': {'Dummy_Bank_1': {'12345-6789': 'CUST_001',
   '23456-7891': 'CUST_002',
   'Dummy_Acct_1': 'CUST_003'},
  'Bank of Anan': {'3489728': 'CUST_004'}},
 'Narratives': {'23456-7891': {'Trxn_Set_2': 'A review of deposit activity on the Acme, Inc. account from January 2 to March 17, 2003, revealed 33 deposits consisting of cash, checks, and money orders totaling approximately $275,000. Individual amounts ranged between $4,446 and $9,729, with 

In [84]:
trxn_extraction_inputs1 = json.dumps(results[0],indent=2)
trxn_extraction_inputs2 = json.dumps(results[1],indent=2)

In [85]:
print(trxn_extraction_inputs1)

{
  "Entities": {
    "Individuals": [
      "John Doe"
    ],
    "Organizations": [
      "Acme, Inc.",
      "Kulkutta Building Supply Company"
    ],
    "Financial_Institutions": [
      "Bank of Anan"
    ]
  },
  "Account_IDs": [
    "12345-6789",
    "23456-7891",
    "3489728",
    "Dummy_Acct_1"
  ],
  "Acct_to_FI": {
    "12345-6789": "Dummy_Bank_1",
    "23456-7891": "Dummy_Bank_1",
    "3489728": "Bank of Anan",
    "Dummy_Acct_1": "Dummy_Bank_1"
  },
  "Acct_to_Cust": {
    "12345-6789": "John Doe",
    "23456-7891": "Acme, Inc.",
    "3489728": "Kulkutta Building Supply Company",
    "Dummy_Acct_1": "Dummy_Customer"
  },
  "FI_to_Acct_to_Cust": {
    "Dummy_Bank_1": {
      "12345-6789": "CUST_001",
      "23456-7891": "CUST_002",
      "Dummy_Acct_1": "CUST_003"
    },
    "Bank of Anan": {
      "3489728": "CUST_004"
    }
  },
  "Narratives": {
    "23456-7891": {
      "Trxn_Set_1": "Between January 17 and March 21, 2003, John Doe originated nine wires totaling $225,

In [86]:
print(trxn_extraction_inputs2)

{
  "Entities": {
    "Individuals": [
      "John Doe"
    ],
    "Organizations": [
      "Acme, Inc.",
      "Kulkutta Building Supply Company"
    ],
    "Financial_Institutions": [
      "Bank of Anan"
    ]
  },
  "Account_IDs": [
    "12345-6789",
    "23456-7891",
    "3489728",
    "Dummy_Acct_1"
  ],
  "Acct_to_FI": {
    "12345-6789": "Dummy_Bank_1",
    "23456-7891": "Dummy_Bank_1",
    "3489728": "Bank of Anan",
    "Dummy_Acct_1": "Dummy_Bank_1"
  },
  "Acct_to_Cust": {
    "12345-6789": "John Doe",
    "23456-7891": "Acme, Inc.",
    "3489728": "Kulkutta Building Supply Company",
    "Dummy_Acct_1": "Dummy_Customer"
  },
  "FI_to_Acct_to_Cust": {
    "Dummy_Bank_1": {
      "12345-6789": "CUST_001",
      "23456-7891": "CUST_002",
      "Dummy_Acct_1": "CUST_003"
    },
    "Bank of Anan": {
      "3489728": "CUST_004"
    }
  },
  "Narratives": {
    "23456-7891": {
      "Trxn_Set_2": "A review of deposit activity on the Acme, Inc. account from January 2 to March 17, 2

# Phase 2 - Convert Narratives to Trxns

Agent that synthesizes transactions when there are only a few trxns to synthesize.

In [129]:
trxn_generation_agent = ConversableAgent(
    name="trxn_generation_agent",
    system_message='''
    You are an AI Agent tasked with synthesizing transactions from a narrative you are provided. Your strength is synthesizing a small number of transactions,
    typically 10 or fewer transactions, especially when transaction amounts and dates are explicitly listed.

      You will have the following four pieces of information.

      
      1) A Dictionary called Narrative with Account IDs as key  and  narratives describing transactions made by that account as the values. </n>
      2) The mapping between Individuals and Organizations referenced in the narrative to Account IDs, given by the dictionary Acct_to_Cust. Note, account IDs starting with the prefix "Dummy_" 
         are placeholders for account IDs that are referenced but have not been explicitly identified in the narrative. </n>
      3) The mapping between Financial Institutions to Account IDs given by the dictionary Acct_to_FI. If the Financial institution for certain accounts are not specified in the narrative,
        a dummy financial institution starting with "Dummy_Bank_" will be used for them.</n>
      4) A mapping between Financial institution, Customer IDs and Accounts owned by those Customer IDs given by the dictionary FI_to_Acct_to_Cust
      
      For the narrative corresponding  to each account ID, do the following:

      Step 1) Identify the number of transactions described in the narrative. Note this in a scratch pad for verification later.

      Step 2) For each transaction involving the account ID,  

      a) Identify the transaction channel or product used (i.e. Trxn_Channel). This could be Wire, Cash, Checks or something similar. If this is missing, you can make a reasonable assumption.

      b) Identify the Account ID of the account sending the trxn( i.e. Originator_Account_ID) , the Customer ID corresponding to the account (i.e. Originator_Customer_ID),
         the Individual Or Organization originating the transaction (i.e. Originator_Name).

      c) Identify the Account ID of the account receiving the trxn (i.e. Beneficiary_Account_ID), the Customer ID corresponding to the account (i.e. Beneficiary_Customer_ID),
         the Beneficiary_Name which is the Individual Or Organization who is the Beneficiary of the transaction. Note that for cash transactions, if only information on the originator or benficary are available, you can assume the originator and beneficiary are the same.

      d) Identify the date of the transaction (i.e. Trxn_Date). If this is missing, you can make reasonable assumptions.

      f) Identify the amount of the transaction (i.e. Trxn_Amount)

      e) If this is a Cash or Check or similar transaction, determine the Branch or ATM Location where the transaction was conducted (i.e. Branch_or_ATM_Location). If this is not specified, it can be considered missing.

      Ensure the following are extracted.

      -Originator_Name
      -Originator_Account_ID
      -Originator_Customer_ID
      -Beneficiary_Name
      -Beneficiary_Account_ID
      -Beneficiary_Customer_ID
      -Trxn_Channel
      -Trxn_Date in yyyy-mm-dd format
      -Trxn_Amount
      -Branch_or_ATM_Location

      If any other information is missing and can't be reasonably assumed, use an empty string "" to indicate it is missing.

      Step 3) Ensure the number of transactions extracted in Step 2 are the same as that noted in Step 1. if this is not the case, repeat  Step 2.

      Step 4) Return the transactions as a Python Dictionary with the key being a Trxn ID and value being the transaction attributes

      An example is given below, demarcated by the delimiter ----..

      ----

      Narrative = {"345723": "John deposited $5000 in Cash into Acct #345723 at the Main Road, NY Branch of Bank of America on Jan 4, 2024.
                    John sends $3000 to Acme Inc's account at Bank of America by Wire on Jan 6, 2024 . John  wrote a check to Jill from Acct #345723 on Jan 8,2024 for $1,000"}

      Acct_to_Cust = {"345723": "John", "Dummy_001":"Jill", "98765":"Acme Inc"}
      Acct_to_FI = {"345723":"Bank of America","98765":"Bank of America", "Dummy_001":"Chase Bank" }     
      FI_to_Acct_to_Cust = {"Bank of America": {"345723":"CUST_001", "98765":"CUST_002"}, "Chase Bank": {"Dummy_001": "CUST_003"}}

      
      Step 1) Identify the number of transactions described in the narrative. Note this is a scratch pad for verification later.
        There are three trxns described in the narrative. A $5000 deposit, A $3000 transfer and a $1000 transfer.

      Step 2) For each transaction, identify the required attributes.

      For the first transaction, the Beneficiary_Name is John as the money is being deposited into John's account. The Beneficiary_Account_ID is #345723. From FI_to_Acct_to_Cust, 
      The Beneficiary Customer ID is CUST_001.

      The Originator_Name is John, but this is a cash deposit , so there is no Account ID specified from which the cash originates.
      Given this is a cash deposit and information on the Originator is unavailable, it can be assumed to be the same the Beneficary Information.

      The Trxn Channel is Cash as it is a cash deposit.
      The Trxn Date is  2024-01-04.
      The Trxn Amount is $5,000.
      The Branch was Main Road , NY

      For the second transaction, the Originator_Name is John. Although no account ID is specified, it is a wire transaction following a cash transaction, 
      so it has to come from John's account #345723.The Originator_Account_ID is #345723. The Originator Customer ID is CUST_001 as per FI_to_Acct_to_Cust.

      The Beneficiary_Name is Acme Inc. The Beneficiary_Account_ID is #98765 as this account belongs to Acme Inc at Bank of America.
      From FI_to_Acct_to_Cust, The Beneficiary Customer ID is CUST_002.


      The Trxn Channel is Wire.
      The Trxn Date is  2024-01-06.
      The Trxn Amount is $3,000.
      The Branch is Missing as it is a wire transaction.

      For the third transaction, the Originator_Name is John. The Originator_Account_ID is 345723 . The Originator Customer ID is CUST_001 as per FI_to_Acct_to_Cust.

      The Beneficiary_Name is Jill. The Beneficiary_Account_ID must be #Dummy_001 as the information provided in Acct_to_Cust and Acct_to_FI indicates that this is Jill's account at Chase Bank.
      From FI_to_Acct_to_Cust, The Beneficiary Customer ID is CUST_003.


      The Trxn Channel is Check
      The Trxn Date is  2024-01-08.
      The Trxn Amount is $1,000.
      The Branch is Missing as it is a Check.

      Step 3) Ensure the number of transactions extracted in Step 2 are the same as that noted in Step 1. if this is not the case, go back to Step 2.

      Three transactions have been identified, which is the same as the number of transaction in scratch pad from Step 1)

      Step 4) Return the transactions as a Python Dictionary with the key being a Trxn ID and value being the transaction attributes

      Now the transactions can be combined into a Python Dictionary with a key being a Trxn ID staring from 1.

      Trxns = {"345723": {1 : {"Originator_Name": "John", "Originator_Account_ID": "345723", "Originator_Customer_ID":"CUST_001","Beneficiary_Name": "John","Beneficiary_Account_ID":"345723",
                "Beneficiary_Customer_ID":"CUST_001" ,"Trxn_Channel":"Cash", "Trxn_Date":"2024-01-04", "Trxn_Amount":5000, "Branch_or_ATM_Location": "Main Road,NY"
              },
                2: {"Originator_Name": "John", "Originator_Account_ID": "345723", "Originator_Customer_ID":"CUST_001","Beneficiary_Name": "Acme,Inc","Beneficiary_Account_ID":"98765",
                "Beneficiary_Customer_ID":"CUST_002" ,"Trxn_Channel":"Wire", "Trxn_Date":"2024-01-06", "Trxn_Amount":3000, "Branch_or_ATM_Location": ""
              },
              3: {"Originator_Name": "John", "Originator_Account_ID": "345723", "Originator_Customer_ID":"CUST_001","Beneficiary_Name": "Jill","Beneficiary_Account_ID":"Dummy_001",
                "Beneficiary_Customer_ID":"CUST_003" ,"Trxn_Channel":"Check", "Trxn_Date":"2024-01-08", "Trxn_Amount":1000, "Branch_or_ATM_Location": ""

          }
      }

      ----


    ''',
    llm_config=llm_config_mini,
    code_execution_config=False,
    description = "An AI Assistant who is good at generating a small number of transactions, typically less than 10 trxns, "
    "especially when the precise trxn amount  and date of each trxn is known",
    human_input_mode="NEVER",
)





Create a tool to help synthesize transactions

In [130]:
from typing import List, Literal
from typing_extensions import Annotated

import numpy as np
from datetime import datetime, timedelta
import random

Channels_allowed = Literal["Wire","Cash","Check"]
def generate_transactions(
        Originator_Name:Annotated[str, "Entity or Customer originating the transactions"],
        Originator_Account_ID:Annotated[str, "Account  of Entity or Customer originating the transactions"],
        Originator_Customer_ID:Annotated[str, "Customer ID of Entity or Customer originating the transactions"],
        Beneficiary_Name:Annotated[str, "Customer ID of Entity or Customer  receiving the transactions"], 
        Beneficiary_Account_ID:Annotated[str, "Account of Entity or Customer  receiving the transactions"],
        Beneficiary_Customer_ID:Annotated[str, "Customer ID of Entity or Customer receiving the transactions"],
        Trxn_Channel:Annotated[List[Channels_allowed], "Transaction Channels used to make the transactions."],
        Start_Date:Annotated[str, "Date on which the first transaction was made"], 
        End_Date:Annotated[str, "Date on which the last transaction was made"],
        Min_Ind_Trxn_Amt:Annotated[float, "The smallest transaction amount"],
        Max_Ind_Trxn_Amt:Annotated[float, "The largest transaction amount"],
        Branch_or_ATM_Location:Annotated[str, "The location where transaction was originated or received"],
        N_transactions:Annotated[int, "The number of transactions made between the Originator and Beneficary"]) -> dict:
    
    Start_Date = datetime.strptime(Start_Date,"%Y-%m-%d")
    End_Date = datetime.strptime(End_Date,"%Y-%m-%d")
    trxns = {} #Dictionary to hold transactions
    trxn_channels = random.choices(Trxn_Channel, k = N_transactions)
    
    sample_deltas  =  random.choices(range((End_Date - Start_Date).days),k = N_transactions) #Get random number of days to be added to get new dates
    trxn_dates = [   Start_Date + timedelta(delta) for delta in sample_deltas] # TO DO: Add start and end date to the list
    #Convert back to string
    trxn_dates = [trxn_date.strftime("%Y-%m-%d") for trxn_date in trxn_dates]
    trxn_amounts = np.round(np.random.uniform(low=Min_Ind_Trxn_Amt,high=Max_Ind_Trxn_Amt,size = N_transactions),2)

    for i in range(N_transactions):
        trxns[f"{i+1}"] = {"Originator_Name": Originator_Name , "Originator_Account_ID": Originator_Account_ID,"Originator_Customer_ID": Originator_Customer_ID,
                            "Beneficiary_Name": Beneficiary_Name, "Beneficiary_Account_ID": Beneficiary_Account_ID,"Beneficiary_Customer_ID": Beneficiary_Customer_ID,
                             "Trxn_Channel": trxn_channels[i], "Trxn_Date": trxn_dates[i], "Trxn_Amount":trxn_amounts[i],
                              "Branch_or_ATM_Location": Branch_or_ATM_Location }

    return trxns

In [131]:
from autogen.agentchat.contrib.gpt_assistant_agent import GPTAssistantAgent
from autogen.function_utils import get_function_schema

#Assistant API Tool Schema for Trxn Generation
generate_transactions_schema = get_function_schema(
    generate_transactions,
    name = "generate_transactions",
    description = " A function for generating transactions when a large number of transactions have to be synthesizes"

)

Create an agent that can use this tool.

In [132]:
trxn_generation_agent_gpt = GPTAssistantAgent(
    name = "trxn_generation_agent_gpt",
    instructions = """
    You are an AI Agent tasked with synthesizing transactions from a narrative you are provided. Your strength is synthesizing transactions when a 
    larger number of transactions; typically more than 10 transactions; are made between two entities, especially 
    when a range of trxn amounts and dates are specified. 
    When invoked, use the provided function to generate transactions.

      You will have the following four pieces of information.

      
      1) A Dictionary called Narrative with Account IDs as key  and  narratives describing transactions made by that account as the values. </n>
      2) The mapping between Individuals and Organizations referenced in the narrative to Account IDs, given by the dictionary Acct_to_Cust. Note, account IDs starting with the prefix "Dummy_" 
         are placeholders for account IDs that are referenced but have not been explicitly identified in the narrative. </n>
      3) The mapping between Financial Institutions to Account IDs given by the dictionary Acct_to_FI. If the Financial institution for certain accounts are not specified in the narrative,
        a dummy financial institution starting with "Dummy_Bank_" will be used for them.</n>
      4) A mapping between Financial institution, Customer IDs and Accounts owned by those Customer IDs given by the dictionary FI_to_Acct_to_Cust
      
      For the narrative corresponding  to each account ID, do the following:

      Step 1) Identify the number of transactions described in the narrative. This is N_transactions.

      Step 2) Identify the transaction channels or product used. This could be Wire, Cash, Checks or something similar.Record this as a list. If this is missing, you can make a reasonable assumption.

      Step 3) Identify the Beneficiary Account ID, the Customer ID corresponding to the account, the Beneficiary_Name which is the Individual Or Organization which is the 
      Beneficiary of the transaction. Note that for cash transactions, if only information on the originator or beneficary are available, you can assume the originator and beneficiary are the same.

      Step 4) Identify the Originator Account ID, the Customer ID corresponding to the account, the Originating_Name which is the Individual Or Organization originating
      the transaction.

      Step 5) Identify the first and last dates the transactions. If this is missing, you can make reasonable assumptions.

      Step 6) Identify the minimum and maximum amount of the transactions.

      Step 7) If this is a Cash or Check or similar transaction, determine the Branches or ATM Locations where the transaction was conducted. 

      Step 8) Ensure the following are extracted.

      -N_transactions
      -Originator_Name
      -Originator_Account_ID
      -Originator_Customer_ID
      -Beneficiary_Name
      -Beneficiary_Account_ID
      -Beneficiary_Customer_ID
      -Trxn_Channel or List of Trxn Channels
      -First Trxn_Date and Last Trxn Date in yyyy-mm-dd format
      -Min Trxn_Amount and Max Trxn_Amount
      -Branch or ATM Location or List of such locations

      If any information is missing and can't be reasonably assumed, use an empty string "" to indicate it is missing.

      Step 9) Call the function `generate_transactions`  by passing the specified number of transactions and attributes collected above..

      An example is given below, demarcated by the delimiter ----..

      ----

      Narrative = {"345723": "John transferred a total of $100,000 from Acct #345723 at Bank of America  to Jill between Jan 1, 2024 and July 4,2024.
                              There were 10 trns, a mix of Wire and Checks, each transactions was $10,000"}

                    Acct_to_Cust = {"345723": "John", "Dummy_001":"Jill", "98765":"Acme Inc"}
                    Acct_to_FI = {"345723":"Bank of America","98765":"Bank of America", "Dummy_001":"Chase Bank" } 
                    FI_to_Acct_to_Cust =  {"Bank of America": {"345723":"CUST_001", "98765":"CUST_002"}, "Chase Bank": {"Dummy_001": "CUST_003"}}

      
        Step 1) Identify the number of transactions described in the narrative. This is N_transactions.
            There are a total of 10 transactions described, so N_transactions = 10.

        Step 2) Identify the transaction channels or product used.
            The transactions are a mix of Wires and Checks. So create a list ["Wire","Check"]


        Step 3) The Beneficary of the trxns is Jill.From the narrative, the Beneficary_Name is Jill. The Beneficiary Account ID is not specified.However, From Acct_to_Cust, it is noted that Jill's account is Dummy_001. From Acct_to_FI, this account is at Chase Bank. From FI_to_Cust_to_Acct, the Customer ID corresponding to this account is CUST_003
        

        Step 4) The Originator of the trxns is John. From the narrative, the Originator name is John and the originating account is 345723. From Acct_to_FI, this account is at Bank of America. From FI_to_Acct_to_Cust, the Customer ID corresponding to this account is CUST_001
                
        Step 5) The first transaction was made on Jan 1,2024 and last transaction was made on July 4, 2024.

        Step 6) The mininmum and maximum transaction amount is $10,000

        Step 7) Since the transactions are Wire or Chek transactions, Branch or ATM Location can be considered Missing. 
        
        Step 8) All the required attributes have been gathered.

        Step 9) Call the function generate_transactions with the collected arguments. Return results as a Python Dictionary only.

      ----

    """,
    description = "An AI Assistant who is good at generating a large number of transactions, typically more than 10,"
    "especially when the precise trxn amount or dates of each trxn is NOT known", 
    overwrite_instructions=True,
    overwrite_tools=True,
    llm_config= {
        "config_list":config_list,
        "tools":[generate_transactions_schema]

    }
)







    You are an AI Agent tasked with synthesizing transactions from a narrative you are provided. Your strength is synthesizing transactions when a larger number of transactions are made between two entities  when a range of trxn amounts and dates are specified. When invoked, use the provided function to generate transactions.

      You will have the following four pieces of information.


      1) A Dictionary called Narrative with Account IDs as key  and  narratives describing transactions made by that account as the values. </n>
      2) The mapping between Individuals and Organizations referenced in the narrative to Account IDs, given by the dictionary Acct_to_Cust. Note, account IDs starting with the prefix "Dummy_" 
         are placeholders for account IDs that are referenced but have not been explicitly identified in the narrative. </n>
      3) The mapping between Financial Institutions to Account IDs given by the dictionary Acct_to_FI. If the Financial institution for certain

In [133]:
trxn_generation_agent_gpt.register_function(
    function_map={
        "generate_transactions":generate_transactions
    }
)

In [90]:
trxn_generation_agent_gpt2 = copy.copy(trxn_generation_agent_gpt)


In [134]:
sar_agent = autogen.UserProxyAgent(
    name="sar_agent",
    system_message="Share the SAR extract given so that trxns can be extracted",
    code_execution_config={
        "last_n_messages": 2,
        "work_dir": "temp",
        "use_docker": False,
    },  # Please set use_docker=True if docker is available to run the generated code. Using docker is safer than running the generated code directly.
    human_input_mode="ALWAYS",
)

In [98]:
groupchat = autogen.GroupChat(agents = [trxn_generation_agent, trxn_generation_agent_gpt],messages=[],max_round=2,allow_repeat_speaker=False)
manager = autogen.GroupChatManager(groupchat=groupchat, llm_config = llm_config_mini)





In [99]:
chat_results1 = sar_agent.initiate_chat(
    manager,
    message = trxn_extraction_inputs1,
    summary_method="reflection_with_llm",
    summary_args= {
        "summary_prompt": '''
                            Return the synthesized transactions in the following format as a Python Dictionary only.
                                 { <Acct_ID> : {<Trxn_ID> : 
                                                    {"Originator_Name": <Originator_Name>,
                                                     "Originator_Account_ID": <Originator_Account_ID>, 
                                                     "Originator_Customer_ID": <Originator_Customer_ID>,
                                                     "Beneficiary_Name": <Beneficiary_Name>,
                                                     "Beneficiary_Account_ID":<Beneficiary_Account_ID>, 
                                                     "Beneficiary_Customer_ID": <Beneficiary_Customer_ID> ,
                                                     "Trxn_Channel": <Trxn_Channel>, 
                                                     "Trxn_Date":<Trxn_Date>, 
                                                     "Trxn_Amount":<Trxn_Amount>, 
                                                     "Branch_or_ATM_Location": <Branch_or_ATM_Location> }
                             


                          '''
                   }
    )


[33msar_agent[0m (to chat_manager):

{
  "Entities": {
    "Individuals": [
      "John Doe"
    ],
    "Organizations": [
      "Acme, Inc.",
      "Kulkutta Building Supply Company"
    ],
    "Financial_Institutions": [
      "Bank of Anan"
    ]
  },
  "Account_IDs": [
    "12345-6789",
    "23456-7891",
    "3489728",
    "Dummy_Acct_1"
  ],
  "Acct_to_FI": {
    "12345-6789": "Dummy_Bank_1",
    "23456-7891": "Dummy_Bank_1",
    "3489728": "Bank of Anan",
    "Dummy_Acct_1": "Dummy_Bank_1"
  },
  "Acct_to_Cust": {
    "12345-6789": "John Doe",
    "23456-7891": "Acme, Inc.",
    "3489728": "Kulkutta Building Supply Company",
    "Dummy_Acct_1": "Dummy_Customer"
  },
  "FI_to_Acct_to_Cust": {
    "Dummy_Bank_1": {
      "12345-6789": "CUST_001",
      "23456-7891": "CUST_002",
      "Dummy_Acct_1": "CUST_003"
    },
    "Bank of Anan": {
      "3489728": "CUST_004"
    }
  },
  "Narratives": {
    "23456-7891": {
      "Trxn_Set_1": "Between January 17 and March 21, 2003, John D



[32m
Next speaker: trxn_generation_agent
[0m
[33mtrxn_generation_agent[0m (to chat_manager):

Step 1) Identify the number of transactions described in the narrative. 
There are nine transactions described in the narrative, as John Doe originated nine wires.

Step 2) For each transaction, identify the required attributes.

Given the description that all transactions occurred at the end of each week from January 17 to March 21, 2003, the approximate transaction dates can be listed as follows for each $25,000 wire:

1. January 17, 2003
2. January 24, 2003
3. January 31, 2003
4. February 7, 2003
5. February 14, 2003
6. February 21, 2003
7. February 28, 2003
8. March 7, 2003
9. March 14, 2003

For each wire transaction:

- Transaction Channel: Wire
- Originator Name: John Doe
- Originator Account ID: 23456-7891
- Originator Customer ID: CUST_002 (from FI_to_Acct_to_Cust)
- Beneficiary Name: Kulkutta Building Supply Company
- Beneficiary Account ID: 3489728
- Beneficiary Customer ID: CUS

In [138]:
#groupchat = autogen.GroupChat(agents = [trxn_generation_agent_gpt2, trxn_generation_agent_gpt],messages=[],max_round=2,allow_repeat_speaker=False)
groupchat = autogen.GroupChat(agents = [trxn_generation_agent, trxn_generation_agent_gpt],messages=[],max_round=2,allow_repeat_speaker=False)
manager = autogen.GroupChatManager(groupchat=groupchat, llm_config = llm_config_large)






In [139]:
chat_results2 = sar_agent.initiate_chat(
    manager,
    message = trxn_extraction_inputs2,
    summary_method="reflection_with_llm",
    summary_args= {
        "summary_prompt": '''
                            Return the synthesized transactions in the following format as a Python Dictionary only.
                                 { <Acct_ID> : {<Trxn_ID> : 
                                                    {"Originator_Name": <Originator_Name>,
                                                     "Originator_Account_ID": <Originator_Account_ID>, 
                                                     "Originator_Customer_ID": <Originator_Customer_ID>,
                                                     "Beneficiary_Name": <Beneficiary_Name>,
                                                     "Beneficiary_Account_ID":<Beneficiary_Account_ID>, 
                                                     "Beneficiary_Customer_ID": <Beneficiary_Customer_ID> ,
                                                     "Trxn_Channel": <Trxn_Channel>, 
                                                     "Trxn_Date":<Trxn_Date>, 
                                                     "Trxn_Amount":<Trxn_Amount>, 
                                                     "Branch_or_ATM_Location": <Branch_or_ATM_Location> }
                             


                          '''
                   }
    )


[33msar_agent[0m (to chat_manager):

{
  "Entities": {
    "Individuals": [
      "John Doe"
    ],
    "Organizations": [
      "Acme, Inc.",
      "Kulkutta Building Supply Company"
    ],
    "Financial_Institutions": [
      "Bank of Anan"
    ]
  },
  "Account_IDs": [
    "12345-6789",
    "23456-7891",
    "3489728",
    "Dummy_Acct_1"
  ],
  "Acct_to_FI": {
    "12345-6789": "Dummy_Bank_1",
    "23456-7891": "Dummy_Bank_1",
    "3489728": "Bank of Anan",
    "Dummy_Acct_1": "Dummy_Bank_1"
  },
  "Acct_to_Cust": {
    "12345-6789": "John Doe",
    "23456-7891": "Acme, Inc.",
    "3489728": "Kulkutta Building Supply Company",
    "Dummy_Acct_1": "Dummy_Customer"
  },
  "FI_to_Acct_to_Cust": {
    "Dummy_Bank_1": {
      "12345-6789": "CUST_001",
      "23456-7891": "CUST_002",
      "Dummy_Acct_1": "CUST_003"
    },
    "Bank of Anan": {
      "3489728": "CUST_004"
    }
  },
  "Narratives": {
    "23456-7891": {
      "Trxn_Set_2": "A review of deposit activity on the Acme, Inc



[32m
Next speaker: trxn_generation_agent_gpt
[0m
[35m
>>>>>>>> EXECUTING FUNCTION generate_transactions...[0m


INFO:autogen.agentchat.contrib.gpt_assistant_agent:Intermediate executing(generate_transactions, Success: True) : {'1': {'Originator_Name': 'John Doe', 'Originator_Account_ID': '23456-7891', 'Originator_Customer_ID': 'CUST_002', 'Beneficiary_Name': 'Acme, Inc.', 'Beneficiary_Account_ID': '23456-7891', 'Beneficiary_Customer_ID': 'CUST_002', 'Trxn_Channel': 'Check', 'Trxn_Date': '2003-01-22', 'Trxn_Amount': 5649.0, 'Branch_or_ATM_Location': ''}, '2': {'Originator_Name': 'John Doe', 'Originator_Account_ID': '23456-7891', 'Originator_Customer_ID': 'CUST_002', 'Beneficiary_Name': 'Acme, Inc.', 'Beneficiary_Account_ID': '23456-7891', 'Beneficiary_Customer_ID': 'CUST_002', 'Trxn_Channel': 'Check', 'Trxn_Date': '2003-03-06', 'Trxn_Amount': 9415.55, 'Branch_or_ATM_Location': ''}, '3': {'Originator_Name': 'John Doe', 'Originator_Account_ID': '23456-7891', 'Originator_Customer_ID': 'CUST_002', 'Beneficiary_Name': 'Acme, Inc.', 'Beneficiary_Account_ID': '23456-7891', 'Beneficiary_Customer_ID': 'CU

[33mtrxn_generation_agent_gpt[0m (to chat_manager):

```json
{
  "1": {
    "Originator_Name": "John Doe",
    "Originator_Account_ID": "23456-7891",
    "Originator_Customer_ID": "CUST_002",
    "Beneficiary_Name": "Acme, Inc.",
    "Beneficiary_Account_ID": "23456-7891",
    "Beneficiary_Customer_ID": "CUST_002",
    "Trxn_Channel": "Check",
    "Trxn_Date": "2003-01-22",
    "Trxn_Amount": 5649.0,
    "Branch_or_ATM_Location": ""
  },
  "2": {
    "Originator_Name": "John Doe",
    "Originator_Account_ID": "23456-7891",
    "Originator_Customer_ID": "CUST_002",
    "Beneficiary_Name": "Acme, Inc.",
    "Beneficiary_Account_ID": "23456-7891",
    "Beneficiary_Customer_ID": "CUST_002",
    "Trxn_Channel": "Check",
    "Trxn_Date": "2003-03-06",
    "Trxn_Amount": 9415.55,
    "Branch_or_ATM_Location": ""
  },
  "3": {
    "Originator_Name": "John Doe",
    "Originator_Account_ID": "23456-7891",
    "Originator_Customer_ID": "CUST_002",
    "Beneficiary_Name": "Acme, Inc.",
    "Bene

In [140]:
chat_results1.summary

'```python\n{\n    "23456-7891": {\n        "1": {\n            "Originator_Name": "John Doe",\n            "Originator_Account_ID": "23456-7891",\n            "Originator_Customer_ID": "CUST_002",\n            "Beneficiary_Name": "Kulkutta Building Supply Company",\n            "Beneficiary_Account_ID": "3489728",\n            "Beneficiary_Customer_ID": "CUST_004",\n            "Trxn_Channel": "Wire",\n            "Trxn_Date": "2003-01-17",\n            "Trxn_Amount": 25000,\n            "Branch_or_ATM_Location": ""\n        },\n        "2": {\n            "Originator_Name": "John Doe",\n            "Originator_Account_ID": "23456-7891",\n            "Originator_Customer_ID": "CUST_002",\n            "Beneficiary_Name": "Kulkutta Building Supply Company",\n            "Beneficiary_Account_ID": "3489728",\n            "Beneficiary_Customer_ID": "CUST_004",\n            "Trxn_Channel": "Wire",\n            "Trxn_Date": "2003-01-24",\n            "Trxn_Amount": 25000,\n            "Branc

In [141]:
chat_results2.summary

'```python\n{\n  "23456-7891": {\n    "1": {\n      "Originator_Name": "John Doe",\n      "Originator_Account_ID": "23456-7891",\n      "Originator_Customer_ID": "CUST_002",\n      "Beneficiary_Name": "Acme, Inc.",\n      "Beneficiary_Account_ID": "23456-7891",\n      "Beneficiary_Customer_ID": "CUST_002",\n      "Trxn_Channel": "Check",\n      "Trxn_Date": "2003-01-22",\n      "Trxn_Amount": 5649.0,\n      "Branch_or_ATM_Location": ""\n    },\n    "2": {\n      "Originator_Name": "John Doe",\n      "Originator_Account_ID": "23456-7891",\n      "Originator_Customer_ID": "CUST_002",\n      "Beneficiary_Name": "Acme, Inc.",\n      "Beneficiary_Account_ID": "23456-7891",\n      "Beneficiary_Customer_ID": "CUST_002",\n      "Trxn_Channel": "Check",\n      "Trxn_Date": "2003-03-06",\n      "Trxn_Amount": 9415.55,\n      "Branch_or_ATM_Location": ""\n    },\n    "3": {\n      "Originator_Name": "John Doe",\n      "Originator_Account_ID": "23456-7891",\n      "Originator_Customer_ID": "CUST_0

In [101]:
cleaned_trxns1 = chat_results1.summary.split('```',2)[1]
trxns1 = cleaned_trxns1.strip('```python').strip('```')

In [94]:
cleaned_trxns2 = chat_results2.summary.split('```',2)[1]
trxns2 = cleaned_trxns2.strip('```python').strip('```')

In [102]:
print(trxns1)


{
    "23456-7891": {
        "1": {
            "Originator_Name": "John Doe",
            "Originator_Account_ID": "23456-7891",
            "Originator_Customer_ID": "CUST_002",
            "Beneficiary_Name": "Kulkutta Building Supply Company",
            "Beneficiary_Account_ID": "3489728",
            "Beneficiary_Customer_ID": "CUST_004",
            "Trxn_Channel": "Wire",
            "Trxn_Date": "2003-01-17",
            "Trxn_Amount": 25000,
            "Branch_or_ATM_Location": ""
        },
        "2": {
            "Originator_Name": "John Doe",
            "Originator_Account_ID": "23456-7891",
            "Originator_Customer_ID": "CUST_002",
            "Beneficiary_Name": "Kulkutta Building Supply Company",
            "Beneficiary_Account_ID": "3489728",
            "Beneficiary_Customer_ID": "CUST_004",
            "Trxn_Channel": "Wire",
            "Trxn_Date": "2003-01-24",
            "Trxn_Amount": 25000,
            "Branch_or_ATM_Location": ""
        },
 

In [95]:
print(trxns2)


{
    "23456-7891": {
        "Trxn_1": {
            "Originator_Name": "Acme, Inc.",
            "Originator_Account_ID": "23456-7891",
            "Originator_Customer_ID": "CUST_002",
            "Beneficiary_Name": "Acme, Inc.",
            "Beneficiary_Account_ID": "23456-7891",
            "Beneficiary_Customer_ID": "CUST_002",
            "Trxn_Channel": "Check",
            "Trxn_Date": "2003-01-31",
            "Trxn_Amount": 5332.58,
            "Branch_or_ATM_Location": ""
        },
        "Trxn_2": {
            "Originator_Name": "Acme, Inc.",
            "Originator_Account_ID": "23456-7891",
            "Originator_Customer_ID": "CUST_002",
            "Beneficiary_Name": "Acme, Inc.",
            "Beneficiary_Account_ID": "23456-7891",
            "Beneficiary_Customer_ID": "CUST_002",
            "Trxn_Channel": "Cash",
            "Trxn_Date": "2003-01-17",
            "Trxn_Amount": 6752.8,
            "Branch_or_ATM_Location": ""
        },
        "Trxn_3": {
 

In [166]:
import ast
trxns_dict1 = ast.literal_eval(trxns1)
trxns_dict2 = ast.literal_eval(trxns2)

In [167]:
trxns_dict1

{'23456-7891': {'1': {'Originator_Name': 'John Doe',
   'Originator_Account_ID': '23456-7891',
   'Originator_Customer_ID': 'CUST_002',
   'Beneficiary_Name': 'Kulkutta Building Supply Company',
   'Beneficiary_Account_ID': '3489728',
   'Beneficiary_Customer_ID': 'CUST_004',
   'Trxn_Channel': 'Wire',
   'Trxn_Date': '2003-01-17',
   'Trxn_Amount': 25000,
   'Branch_or_ATM_Location': ''},
  '2': {'Originator_Name': 'John Doe',
   'Originator_Account_ID': '23456-7891',
   'Originator_Customer_ID': 'CUST_002',
   'Beneficiary_Name': 'Kulkutta Building Supply Company',
   'Beneficiary_Account_ID': '3489728',
   'Beneficiary_Customer_ID': 'CUST_004',
   'Trxn_Channel': 'Wire',
   'Trxn_Date': '2003-01-24',
   'Trxn_Amount': 25000,
   'Branch_or_ATM_Location': ''},
  '3': {'Originator_Name': 'John Doe',
   'Originator_Account_ID': '23456-7891',
   'Originator_Customer_ID': 'CUST_002',
   'Beneficiary_Name': 'Kulkutta Building Supply Company',
   'Beneficiary_Account_ID': '3489728',
   'Ben

In [168]:
import pandas as pd
# Flatten the dictionary into a list of rows
flattened_data = []
trxn_set = 1
for key, inner_dict in trxns_dict1.items():
    for transaction_id, transaction_details in inner_dict.items():
        # Add the transaction ID and account ID to the details
        transaction_details['Transaction_Set'] = trxn_set
        transaction_details['Account_ID'] = key
        flattened_data.append(transaction_details)

# Convert the flattened data to a DataFrame
df = pd.DataFrame(flattened_data)

In [169]:
df

Unnamed: 0,Originator_Name,Originator_Account_ID,Originator_Customer_ID,Beneficiary_Name,Beneficiary_Account_ID,Beneficiary_Customer_ID,Trxn_Channel,Trxn_Date,Trxn_Amount,Branch_or_ATM_Location,Transaction_Set,Account_ID
0,John Doe,23456-7891,CUST_002,Kulkutta Building Supply Company,3489728,CUST_004,Wire,2003-01-17,25000,,1,23456-7891
1,John Doe,23456-7891,CUST_002,Kulkutta Building Supply Company,3489728,CUST_004,Wire,2003-01-24,25000,,1,23456-7891
2,John Doe,23456-7891,CUST_002,Kulkutta Building Supply Company,3489728,CUST_004,Wire,2003-01-31,25000,,1,23456-7891
3,John Doe,23456-7891,CUST_002,Kulkutta Building Supply Company,3489728,CUST_004,Wire,2003-02-07,25000,,1,23456-7891
4,John Doe,23456-7891,CUST_002,Kulkutta Building Supply Company,3489728,CUST_004,Wire,2003-02-14,25000,,1,23456-7891
5,John Doe,23456-7891,CUST_002,Kulkutta Building Supply Company,3489728,CUST_004,Wire,2003-02-21,25000,,1,23456-7891
6,John Doe,23456-7891,CUST_002,Kulkutta Building Supply Company,3489728,CUST_004,Wire,2003-02-28,25000,,1,23456-7891
7,John Doe,23456-7891,CUST_002,Kulkutta Building Supply Company,3489728,CUST_004,Wire,2003-03-07,25000,,1,23456-7891
8,John Doe,23456-7891,CUST_002,Kulkutta Building Supply Company,3489728,CUST_004,Wire,2003-03-14,25000,,1,23456-7891


In [170]:
# Reorder the columns to make Transaction_ID and Account_ID the first two columns
column_order = ['Transaction_Set', 'Account_ID'] + [col for col in df.columns if col not in ['Transaction_Set', 'Account_ID']]
df = df[column_order]

In [171]:
df

Unnamed: 0,Transaction_Set,Account_ID,Originator_Name,Originator_Account_ID,Originator_Customer_ID,Beneficiary_Name,Beneficiary_Account_ID,Beneficiary_Customer_ID,Trxn_Channel,Trxn_Date,Trxn_Amount,Branch_or_ATM_Location
0,1,23456-7891,John Doe,23456-7891,CUST_002,Kulkutta Building Supply Company,3489728,CUST_004,Wire,2003-01-17,25000,
1,1,23456-7891,John Doe,23456-7891,CUST_002,Kulkutta Building Supply Company,3489728,CUST_004,Wire,2003-01-24,25000,
2,1,23456-7891,John Doe,23456-7891,CUST_002,Kulkutta Building Supply Company,3489728,CUST_004,Wire,2003-01-31,25000,
3,1,23456-7891,John Doe,23456-7891,CUST_002,Kulkutta Building Supply Company,3489728,CUST_004,Wire,2003-02-07,25000,
4,1,23456-7891,John Doe,23456-7891,CUST_002,Kulkutta Building Supply Company,3489728,CUST_004,Wire,2003-02-14,25000,
5,1,23456-7891,John Doe,23456-7891,CUST_002,Kulkutta Building Supply Company,3489728,CUST_004,Wire,2003-02-21,25000,
6,1,23456-7891,John Doe,23456-7891,CUST_002,Kulkutta Building Supply Company,3489728,CUST_004,Wire,2003-02-28,25000,
7,1,23456-7891,John Doe,23456-7891,CUST_002,Kulkutta Building Supply Company,3489728,CUST_004,Wire,2003-03-07,25000,
8,1,23456-7891,John Doe,23456-7891,CUST_002,Kulkutta Building Supply Company,3489728,CUST_004,Wire,2003-03-14,25000,


In [172]:
import pandas as pd
# Flatten the dictionary into a list of rows
flattened_data = []
trxn_set = 2
for key, inner_dict in trxns_dict2.items():
    for transaction_id, transaction_details in inner_dict.items():
        # Add the transaction ID and account ID to the details
        transaction_details['Transaction_Set'] = trxn_set
        transaction_details['Account_ID'] = key
        flattened_data.append(transaction_details)

# Convert the flattened data to a DataFrame
df2 = pd.DataFrame(flattened_data)

In [173]:
# Reorder the columns to make Transaction_ID and Account_ID the first two columns
# Reorder the columns to make Transaction_ID and Account_ID the first two columns
column_order = ['Transaction_Set', 'Account_ID'] + [col for col in df.columns if col not in ['Transaction_Set', 'Account_ID']]
df2 = df2[column_order]


In [174]:
df2

Unnamed: 0,Transaction_Set,Account_ID,Originator_Name,Originator_Account_ID,Originator_Customer_ID,Beneficiary_Name,Beneficiary_Account_ID,Beneficiary_Customer_ID,Trxn_Channel,Trxn_Date,Trxn_Amount,Branch_or_ATM_Location
0,2,23456-7891,"Acme, Inc.",23456-7891,CUST_002,"Acme, Inc.",23456-7891,CUST_002,Check,2003-01-31,5332.58,
1,2,23456-7891,"Acme, Inc.",23456-7891,CUST_002,"Acme, Inc.",23456-7891,CUST_002,Cash,2003-01-17,6752.8,
2,2,23456-7891,"Acme, Inc.",23456-7891,CUST_002,"Acme, Inc.",23456-7891,CUST_002,Check,2003-03-04,7762.77,
3,2,23456-7891,"Acme, Inc.",23456-7891,CUST_002,"Acme, Inc.",23456-7891,CUST_002,Cash,2003-01-29,8457.15,
4,2,23456-7891,"Acme, Inc.",23456-7891,CUST_002,"Acme, Inc.",23456-7891,CUST_002,Cash,2003-02-09,4597.04,
5,2,23456-7891,"Acme, Inc.",23456-7891,CUST_002,"Acme, Inc.",23456-7891,CUST_002,Cash,2003-03-04,5605.05,
6,2,23456-7891,"Acme, Inc.",23456-7891,CUST_002,"Acme, Inc.",23456-7891,CUST_002,Check,2003-01-29,7761.26,
7,2,23456-7891,"Acme, Inc.",23456-7891,CUST_002,"Acme, Inc.",23456-7891,CUST_002,Cash,2003-02-20,6341.96,
8,2,23456-7891,"Acme, Inc.",23456-7891,CUST_002,"Acme, Inc.",23456-7891,CUST_002,Money Order,2003-02-22,9106.82,
9,2,23456-7891,"Acme, Inc.",23456-7891,CUST_002,"Acme, Inc.",23456-7891,CUST_002,Money Order,2003-01-24,4863.2,


In [175]:
df_final = pd.concat([df,df2])
df_final["Transaction_ID"] = range(1, len(df_final) + 1)

In [176]:
df_final

Unnamed: 0,Transaction_Set,Account_ID,Originator_Name,Originator_Account_ID,Originator_Customer_ID,Beneficiary_Name,Beneficiary_Account_ID,Beneficiary_Customer_ID,Trxn_Channel,Trxn_Date,Trxn_Amount,Branch_or_ATM_Location,Transaction_ID
0,1,23456-7891,John Doe,23456-7891,CUST_002,Kulkutta Building Supply Company,3489728,CUST_004,Wire,2003-01-17,25000.0,,1
1,1,23456-7891,John Doe,23456-7891,CUST_002,Kulkutta Building Supply Company,3489728,CUST_004,Wire,2003-01-24,25000.0,,2
2,1,23456-7891,John Doe,23456-7891,CUST_002,Kulkutta Building Supply Company,3489728,CUST_004,Wire,2003-01-31,25000.0,,3
3,1,23456-7891,John Doe,23456-7891,CUST_002,Kulkutta Building Supply Company,3489728,CUST_004,Wire,2003-02-07,25000.0,,4
4,1,23456-7891,John Doe,23456-7891,CUST_002,Kulkutta Building Supply Company,3489728,CUST_004,Wire,2003-02-14,25000.0,,5
5,1,23456-7891,John Doe,23456-7891,CUST_002,Kulkutta Building Supply Company,3489728,CUST_004,Wire,2003-02-21,25000.0,,6
6,1,23456-7891,John Doe,23456-7891,CUST_002,Kulkutta Building Supply Company,3489728,CUST_004,Wire,2003-02-28,25000.0,,7
7,1,23456-7891,John Doe,23456-7891,CUST_002,Kulkutta Building Supply Company,3489728,CUST_004,Wire,2003-03-07,25000.0,,8
8,1,23456-7891,John Doe,23456-7891,CUST_002,Kulkutta Building Supply Company,3489728,CUST_004,Wire,2003-03-14,25000.0,,9
0,2,23456-7891,"Acme, Inc.",23456-7891,CUST_002,"Acme, Inc.",23456-7891,CUST_002,Check,2003-01-31,5332.58,,10
