# Approach 1 - Using gpt-4o-mini

In [316]:
import os
from dotenv import load_dotenv
load_dotenv()
api_key = os.getenv("OPEN_API_KEY")

In [317]:
llm_config_mini = {"model": "gpt-4o-mini"}
llm_config_large = {"model": "gpt-4o"}

In [318]:
import autogen
config_list = autogen.config_list_from_dotenv(
    dotenv_file_path="../.env",
    model_api_key_map={
        "gpt-4o": "OPENAI_API_KEY", 
        "gpt-4o-mini": "OPENAI_API_KEY"
    },
    filter_dict={"model":["gpt-4o-mini"]}
)

## Phase 1 - Extract Narratives and other relevant information

### Define Agents

In [320]:
from autogen import ConversableAgent

In [321]:
entity_extraction_message = \
'''You are an Entity Extraction Agent, an AI Assistant tasked with extracting entitites from SAR Narratives.
    
    Using the SAR NARRATIVE provided, do the following:

    Step 1) Extract the main entities described. Entities could be Individuals or Organizations or Financial Institutions. Return this in the form of a dictionary called Entities
            Entities = { "Individuals" : <List of Individuals> , "Organizations": <List of Organizations>, "Financial_Institutions": <List of Financial Instituions >} 

    Step 2) Extract the account IDs described. Account IDs could be numeric or alphanumeric.If an Account ID is missing, create a Dummy account ID with the prefix "Dummy_".
            e.g. Dummy_Acct_1, Dummy_Acct_2 and so on. Put this in a list called Account_IDs.
            Note the reason for creating a dummy account in the dictionary Reason_for_Dummy_Accounts

    Step 3) Map the extracted Account IDs to Financial institutions where the account is held. If the name of the institution is not specified, create a Dummy institution name e.g Dummy_Bank_1, Dummy_Bank_2 etc.
            Return this in the form of a dictionary  called Acct_to_FI E.g. {<Account_ID> : <Financial Instituion>}
            Add the reason for creating a dummy financial institution to the dictionary Reason_for_Dummy_Banks

    Step 4) Map the extracted account IDs to Individuals or Organizations who own the account. Return this in the form of a dictionary called Acct_to_Cust E.g. {<Account_ID>: <Entity_Name>}

    An example is given below.

    Narrative:

    John deposited $5000 in Cash into Acct #345723 at Bank of America. John sends $3000 to Jill's account at  Chase. Jill deposited $3000 in Cash into her Acct at Chase Bank.
    John and Jill own a business Acme Inc that has a  Business account, Account #98765 . John sends $2000 from Acct #345723 to Account #98765. Jill sends $1000 from 
    her Acct at Chase Bank to Acct #98765.

    Step 1) Extract the main entities described. Identify the Individuals , Organizations and Financial Instituions mentioned. 
    Individuals and Organizations are entities that usually conduct transactions through Financial institutions. Here, John and Jill are Individuals. Acme Inc is an Organization.
    Financial institutions are institutions such as banks or insurance companies that offer financial services to customers. Hence Bank of America and Chase Bank are Financial institutions.

    Record this as a dictionary named Entities.

    Entities = {"Individuals": ["John", "Jill"],"Organizations":["Acme Inc"],"Financial Institutions":["Chase"," Bank of America"]}}

    Step 2) Extract the account IDs described. Accounts mentioned in the narrative are #345723,#98765. There is also an account at Chase Bank belonging to Jill where
    she deposted $3000 that is missing an account ID. Assign a dummy account ID Dummy_Acct_1 to this account.

    Record this in a list named Account_IDs
    Account_IDs = ["345723","98765","Dummy_Acct_1"]

    Note the reason for creating the Dummy Account in a dictionary Reason_for_Dummy_Accounts
    Reason_for_Dummy_Accounts  =  {"Dummy_Acct_1": There is  an account at Chase Bank belonging to Jill where she deposted $3000 that is missing an account ID }

    Step 3) Map the Account IDs extracted in Step 2 to Financial Instititions where they are held. Account ID #345723 is held at bank of America.Account ID Dummy_001 is held at Chase Bank.
    Account ID #98765 is referenced but the Financial instituion where it is held is not specified. So it can be assumed to be held at "Dummy_Bank_1"

    Record this in a dictionary named FIs_to_Accts.
    FIs_to_Accts = {"Bank of America":["345723"],"Chase Bank":["Dummy_Acct_1"],"Dummy_Bank_1":"98765"}

    Note the reason for creating the Dummy Bank in a dictionary Reason_for_Dummy_Accounts.
    Reason_for_Dummy_Banks  =  {"Dummy_Bank_1": Account ID #98765 is referenced but the Financial instituion where it is held is not specified }

    
    Step 4) Map the extracted Account IDs extracted in Step 2 to Individuals or Organizations that are customers of the Financial Institutions. 
    Account ID #345723 belongs to John. 
    Account ID #98765 belongs to Acme Inc. 
    Account ID Dummy_Acct_1 belongs to Jill.
    

    Record this in  a dictionary named  Acct_to_Cust

    Acct_to_Cust = {"345723": "John, "Dummy_Acct_1" : "Jill","98765": "Acme Inc"}

    '''

In [351]:
entity_extraction_agent = ConversableAgent(
    name="Entity_Extraction_Agent",
    system_message=entity_extraction_message,
    llm_config=llm_config_mini,
    code_execution_config=False,
    human_input_mode="NEVER",
    #is_termination_msg = lambda msg: any(keyword in msg["content"].lower() for keyword in ("satisfactory"))
)





In [352]:
entity_extraction_reflection_message = f'''
    Determine if the steps carried out by the entity extraction agent are consistent with the instructions below. Your feedback should consider everything 
    in the instructions given below  and should not include any comments not explicitly covered in instructions below: 
    
    INSTRUCTIONS: \n {entity_extraction_message}

    
    More specifically, consider the following while reviewing each step.

    Review Step 1)

        Ensure that all Individuals, Organizations and Financial Institutions mentioned in the SAR narrative have been identified. If all Individuals, Organizations and
        Financial Institutions have been identified, the feedback should simply be: SATISFACTORY. No other comment should be added.

    Review Step 2)

        Ensure that all the Account IDs mentioned in the SAR narrative have been identified. Review the reason for creating Dummy Accounts in Reason_for_Dummy_Accounts
        If all Account IDs have been correctly identified, the feedback should simply be: SATISFACTORY.  No other comment should be added.


    Review Step 3)

        Ensure that all Account IDs identified  in the list Account_IDs have been mapped to a Financial Institution (FI). Review the reason for creating Dummy Banks in Reason_for_Dummy_Banks
        If the mapping is accurate, the feedback should simply be: SATISFACTORY.  No other comment should be added.

                        
    Review Step 4)

        Enure that all Account IDs identified in the list Account_IDs have been mapped to Individuals or Organizations .
        If the mapping is accurate, the feedback should simply be: SATISFACTORY.  No other comment should be added.
    
    Provide feedback for each step as a JSON only.

        Step 1: <Feedback for Step 1>,
        Step 2: <Feedback for Step 2> ,
        Step 3: <Feedback for Step 3>,
        Step 4: <Feedback for Step 4>
    



'''

In [353]:
# Reflection Agent for entity extraction
reflection_agent_ee  = ConversableAgent(
    name = "reflection_agent_ee",
    description = '''
                     This agent should reflect on the work and output of the Entity Extraction Agent . It will review each of the steps carried out by the Entity Extraction
                     Agent and provide specific feedback for improvement if any.   
                  ''',
    system_message = entity_extraction_reflection_message,
    llm_config = llm_config_mini)





In [354]:
# Reflection Agent for entity extraction
reflection_agent_ee_old2  = ConversableAgent(
    name = "reflection_agent_ee",
    description = '''
                     This agent should reflect on the information shared by the Entity Extraction Agent only . It will review each of the steps carried out by the Entity Extraction
                     Agent and provide specific feedback for improvement if any.   
                  ''',
    system_message ='''
                        Review each of the steps carried out by the Entity Extraction Agent in the conext of the provided SAR NARRATIVE. 
                        Note that Dummy Banks e.g. Dummy_Bank_1 and Dummy Accounts e.g. Dummy_Acct_1 are expected in the response from the Entity Extraction Agent.They are placeholders 
                        for account IDs or Financial Institutions not explicitly mentioned in the narrative.

                        Review Step 1)

                            Ensure that all Individuals, Organizations and Financial Institutions mentioned in the SAR narrative have been identified. If all Individuals, Organizations and
                            Financial Institutions have been identified, the feedback should simply be: SATISFACTORY. No other comment should be added.

                        Review Step 2)

                            Ensure that all the Account IDs mentioned in the SAR narrative have been identified. Ensure that any Dummy Accounts identified are relevant and not duplicative.
                            If all Account IDs have been correctly identified, the feedback should simply be: SATISFACTORY.  No other comment should be added.


                        Review Step 3)

                            Ensure that all Account IDs identified have been mapped to a Financial Institution. 
                            Ensure that if the FI where an account is held is not mentioned, it is identified as a Dummy_Bank e.g. Dummy_Bank_1.
                            Ensure that if multiple Dummy_Banks have been identified, the SAR narrative refers to different institutions where the accounts
                            are held. If the mapping is accurate, the feedback should simply be: SATISFACTORY.  No other comment should be added.

                        
                        Review Step 4)

                            Enure that all Account IDs have been mapped to Individuals or Organizations referenced in the SAR narrative.
                            If the mapping is accurate, the feedback should simply be: SATISFACTORY.  No other comment should be added.
                        
                        Provide feedback for each step as a JSON.

                        {
                            Step 1: <Feedback>,
                            Step 2: <Feedback> ,
                            Step 3: <Feedback>,
                            Step 4: <Feedback>
                        }

                    ''',
    llm_config = llm_config_mini
)





In [81]:
# Reflection Agent for entity extraction
reflection_agent_ee_old  = ConversableAgent(
    name = "reflection_agent_ee",
    description = '''
                     This agent should reflect on the information shared by the Entity Extraction Agent only . It can review Account IDs,
                     mapping between Financial institution to Account IDs and mapping between Customers (Individuals or Organizations)
                     to Account IDs.   
                  ''',
    system_message ='''
                        You receive the following information to review:
                            "Account IDs" : List of Account IDs mentioned in the Suspicious Aactivity Repot (SAR)
                            "FIs_to_Accts" : Mapping between above Account IDs and Financial instituions(FI) they belong to. If the name of the FI is not explicitly mentioned, it will
                                             be identified as a Dummy_Bank. 
                            "Customers_to_Accts": Mapping between customer names and accounts they own.


                        You will ONLY review the above three pieces of information for accuracy in the context of the provided SAR and provide feedback if any information is missing or incorrect. 
                        This includes
                        1) Ensure all Account IDs mentioned in the SAR Narrative have been captured. Dummy Accounts have been created only where necessary. If the same account is being referenced
                        at multiple points, ensure unnecessary Dummy Accounts have not been created. 

                        2) Ensure Account IDs have been mapped to the correct FI. Ensure Dummy_Banks are created only where needed. If multiple accounts are referenced, they might belong to
                        the same FI.

                        3) Ensure that the Account ID is mapped to the right Individual or Organization who owns the account.
                        
                        If the information is satisfactory, just return the word SATISFACTORY.

                    ''',
    llm_config = llm_config_mini
)





In [355]:
entity_resolution_agent = ConversableAgent(
    name="entity_resolution_agent",
    system_message='''
    You are an AI Agent tasked with assigning Customer IDs to entities described in a SAR and mapping Account IDs to these customer IDs.

    You will have the following four pieces of information.

    
    1) List of Account IDs given by the list Account_IDs. </n>
    2) The mapping between Individuals and Organizations to Account IDs , given by the dictionary Customers_to_Accts. Note account IDs starting with the prefix "Dummy_" </n>
    3) The mapping between Financial Institutions to Account IDs given by the dictionary FIs_to_Accts. If the Financial institution for certain accounts are not specified, a dummy financial institution
      "Dummy_Bank" will be used for them.</n>
    4) SAR narrative 
    
    Using this information, do the following:

    Step 1)  For each Financial institution in FIs_to_Accts, identify the accounts at that institution. Note, account IDs starting with the prefix "Dummy_" are placeholders for account IDs that
             have not been identified in the narrative while FIs starting with the prefix Dummy e.g. Dummy_Bank 1 are placeholers for Is whose name is not mentioned in the SAR narrative.

    Step 2)  Identify the owners of the accounts from  Customers_to_Accts 

    Step 3)  If an Individual is a beneficial owner of an Organization and the Organizations's account, note this in a scratch pad.

    Step 4)  For each Financial institution, map each account ID to a Customer ID (E.g. CUST_001, CUST_002). If multiple accounts are owned by the same Individual or Organization, map them to the same Customer ID

    Return this final mapping between Customer IDs and Account IDs in the form of a JSON

    An example is given below, demarcated by the delimiter ----.

    ----

    1) Account_IDs = ["345723","98765","Dummy_001"]
    
    2) Customers_to_Accts = {"John":["345723"],"Jill":["Dummy_001"],"Acme Inc":"98765"}

    3) FIs_to_Accts =  {"Bank of America":["345723","98765"],"Chase Bank":["Dummy_001"]}

    4) Narrative: </n>
    John deposited $5000 in Cash into Acct #345723 at Bank of America. John sends $3000 to Jill's account at Chase. Jill deposited $3000 in Cash into her Acct at Chase Bank.
    John and Jill own a business Acme Inc that has a  Business account, Account #98765 at Bank of America. John sends $2000 from Acct #345723 to Account #98765. Jill sends $1000 from 
    her Acct at Chase Bank to Acct #98765.

    
    Step 1) For each Financial institution in FIs_to_Accts, identify the accounts at that institution. 

    At Bank of America, there are two accounts #345723 and #98765
    At Chase Bank, there is one account #Dummy_001

    Step 2) Identify the owners of the accounts from Customers_to_Accts 

    Account  #345723 is owned by John
    Account #Dummy_001 is owned by Jill.
    Account #98765 is owned by Acme Inc

    Step 3)If an Individual is a beneficial owner of an Organization and the Organizations's account, note this in a scratch pad. If two individuals are join owners of an account, note this in a scratch pad.

    John and Jill are co-owners of the business Acme Inc and therefore its accounts.

    Step 4) For each Financial institution, map each account ID to a Customer ID (E.g. CUST_001, CUST_002). If multiple accounts are owned by the same Individual or Organizations, map them to the
    same Customer ID

    At Bank of America, the account #345723 is owned by John, the account 98765 is owned by Acme Inc. So these two account IDs can be mapped to two customer IDs. The Account Dummy_001 
    is an account at Chase as per FIs_to_Accts and is owned by Jill as per Customers_to_Accts. This can be assigned to a different customer ID

    {"Bank of America": {"CUST_001": ["345723"], "CUST_002":["98765"]},
    "Chase Bank": {"CUST_003":["Dummy_001"]}}
    
    Return this as a JSON.
    ----



    ''',
    llm_config=llm_config_mini,
    code_execution_config=False,
    human_input_mode="NEVER",
    is_termination_msg = lambda msg: any(keyword in msg["content"].lower() for keyword in ("satisfactory", "accurate"))
)





In [356]:
# Reflection Agent for entity resolution
reflection_agent_er  = ConversableAgent(
    name = "reflection_agent_er",
    description = '''
                    This agent should reflect on the information shared by the Entity Resolution Agent only. It can specifically review 
                    FI_to_Customer_to_Acct mapping to ensure Customer IDs have been assigned correctly. 
                  ''',
    system_message ='''
                        You receive the following information to review:
                            "Financial Institutions(FI) to Accts" : Mapping between FIs to accounts at that instituion
                            "Customers_to_Accts": Mapping between customer names and accounts they own.
                            "FI_to_Customer_to_Acct": Mapping between Financial Instituion to Customer ID and accounts to Customer ID

                        You will review this information for correctness in the context of the provided SAR and provide feedback if any information is missing or incorrect.
                        You will specifically review FI_to_Customer_to_Acct to examine whether Customer IDs have been assigned correctly to customers, and accounts have been mapped
                        correctly to Customer IDs. Customer IDs should be unique to each customer at an institution. Ensure all the mappings provided are consistent.

                        If the information is  correct ot satisfactory, just return the word SATISFACTORY.

                    ''',
    llm_config = llm_config_mini
)





In [357]:
narrative_extraction_agent = ConversableAgent(
    name="narrative_extraction_agent",
    system_message='''
    You are an AI Agent tasked with extracting or summarizing  parts of a narrative that describe activity conducted by certain accounts.

    You will have the following three pieces of information.

    1)  SAR narrative

    2)  The mapping between Individuals and Organizations referenced in the narrative to Account IDs which is given by the dictionary Customers_to_Accts

    3) List of Account IDs given by a list Account_IDs
    
    Using the SAR narrative shared by the user, Do the following:

    Step 1) Identify an Account ID from the list of Account IDs.Note that account IDs starting with the prefix "Dummy_" are placeholders for account IDs that have not been explicitly described in the narrative.

    Step 2)  From the dictionary Customers_to_Accts, Identify the  individual and Organization  who owns the account.

    Step 3) Extract or summarize the narrative which describes transactions involving this account.

    An example is given below, demarcated by the delimiter ----.

    ----

    1) SAR Narrative:
        John deposited $5000 in Cash into Acct #345723 at Bank of America. John sends $3000 to Jill's account at Chase. Jill deposited $3000 in Cash into her Acct at Chase Bank.
        John and Jill own a business Acme Inc that has a  Business account, Account #98765 at Bank of America. John sends $2000 from Acct #345723 to Account #98765. Jill sends $1000 from 
        her Acct at Chase Bank to Acct #98765.
    
    2) Customers_to_Accts = {"John":["345723"],"Jill":["Dummy_001"],"Acme Inc":"98765"}

    3) Account_IDs = ["345723","98765","Dummy_001"]

    
    Step 1) The first account ID  is 345723

    Step 2) This Account ID is owned by John.

    Step 3)Extract the part of the narrative that references the Account 345723

    {"345723": "John deposited $5000 in Cash into Acct #345723 at Bank of America. John sends $3000 to Jill's account at Chase.John sends $2000 from Acct #345723 to Account #98765" }

    
    Now repeat the process for the second account ID

    Step 1) The second  account ID  is 98765

    Step 2) This Account ID is owned by Acme, Inc.

    Step 3)Extract the part of the narrative that references the Account 98765

    {"98765": "John sends $2000 from Acct #345723 to Account #98765" }

    Now repeat the process for the third Account ID.

    Step 1) The third  account ID  is Dummy_001. Given the acocunt ID starts with Dummy_ , there won't be direct references to this account ID in the narrative. 

    Step 2) This Account ID is owned by Jill. Given the Account ID is dummy, pay attention to transactions made by Jill,the owner of the dummy account.

    Step 3) Extract the part of the narrative that references the Account Dummy_001

    {"Dummy_001": "John sends $3000 to Jill's account at Chase. Jill deposited $3000 in Cash into her Acct at Chase Bank." }

    Consolidate narratives for all accounts.

     Narrative : {"345723": "John deposited $5000 in Cash into Acct #345723 at Bank of America. John sends $3000 to Jill's account at Chase.John sends $2000 from Acct #345723 to Account #98765",
      "98765": "John sends $2000 from Acct #345723 to Account #98765",
      "Dummy_001": "John sends $3000 to Jill's account at Chase. Jill deposited $3000 in Cash into her Acct at Chase Bank." 
    }


    ----



    ''',
    llm_config=llm_config_large,
    code_execution_config=False,
    human_input_mode="NEVER",
    is_termination_msg = lambda msg: "satisfactory" in  msg["content"].lower()
)





In [358]:
# Reflection Agent for narrative extraction
reflection_agent_ne  = ConversableAgent(
    name = "reflection_agent_ne",
    description = '''
                    This agent should reflect on the Output of the Narrative Extaction Agent only. It can deermine whether Narratives
                    have been correctly extracted for each account. 
                  ''',
    system_message ='''
                        You receive the following information to review:
                            "Account ID" : Account ID of interest
                            "Narrative": Summary or Extract of the narrative describing the transactional activity associated with the account
                    

                        You will review this information for correctness in the context of the provided SAR and provide feedback if any information is missing or incorrect.
                        Particulary, look for any descriptions of tranasctional activity that might have been missed for the account in question.
                        If the information is  correct or satisfactory, just return the word SATISFACTORY.

                    ''',
    llm_config = llm_config_mini
)





In [359]:
sar_agent = ConversableAgent(
    name="SAR_Agent",
    system_message="You are assigned a SAR from which to extract transactions. You do this by sharing this SAR with other assistant agents who "
                   "work on the SAR and extract useful information. You also pass this extracted information to a set of reflection agents. You then relay the feedback to the assistant agents  ",
    llm_config=llm_config_mini,
    human_input_mode="NEVER",
)





## Create a Group Chat between Reflection Agents

This can be added as a nested chat to the sar agent.

In [360]:
from autogen import GroupChat , GroupChatManager

In [361]:
group_chat_with_reflection_agents = GroupChat(
    agents = [reflection_agent_ee, reflection_agent_er, reflection_agent_ne],
    messages = [],
    speaker_selection_method="auto",
    send_introductions = True,
    max_round = 1
)

In [362]:
group_chat_manager_reflection = GroupChatManager(
    groupchat = group_chat_with_reflection_agents,
    llm_config = llm_config_mini)





In [363]:
def reflection_message(recipient,messages,sender,config):
    print ("Reflecting...")
    return f''' Review that the information below is  accurate within the context of the SAR narrative provided below.   \n
                SENDER: {sender.name} \n
                INFORMATION TO REVIEW: \n {recipient.chat_messages_for_summary(sender)[-1]['content']} \n
                SAR NARATIVE: \n {messages[0]['content']}

            '''

In [364]:
nested_chat_reflection = [
    {
        "recipient": group_chat_manager_reflection,
         "message": reflection_message,
        "summary_method": "reflection_with_llm",
         "summary_args": {"summary_prompt" : 
            "Return review of each step as a JSON object only:"
            "{'Step 1': < Review of Step 1 >} "
            "{'Step 2': < Review of Step 2 >} "
            "{'Step 3': < Review of Step 3 >} "
            "{'Step 4': < Review of Step 4 >} "},
        "max_turns": 1,

    }
]

In [365]:
sar_agent.register_nested_chats(
    nested_chat_reflection,
    trigger = lambda sender: sender in [entity_extraction_agent, entity_extraction_agent, narrative_extraction_agent]
)

Read in training sar files

In [266]:
import os
train_sars = []
for filename in os.listdir("../data/input"):
    if 'train' in filename and filename.endswith('.txt'):
        file_path = os.path.join("../data/input", filename)
        with open(file_path,'r') as file:
            content = file.read()
            train_sars.append(content)

In [267]:
message = train_sars[0]

Combine to create a sequential chat

In [366]:
chat_results = sar_agent.initiate_chats(
      [
        {
            "recipient": entity_extraction_agent,
            "message": message,
            "max_turns": 2,
            "summary_method": "reflection_with_llm",
            "summary_args": {
                "summary_prompt" : "Return the extracted entity information as JSON object only: "
                                    "{'Entites': {'Individuals':['Individual_1','Individual_2'],'Organizations':['Organization_1'],'Financial_Institutions':['FI_1','FI_2']}"
                                    " 'Account IDs': ['Acct_ID_1','Acct_ID_2','Acct_ID_3']"
                                    " 'FI_to_Acct' : {'FI_1': ['Acct_ID_1'],'FI_2': ['Acct_ID_2','Acct_ID_3']}"
                                    " 'Acct_to_Cust' : { 'Acct_ID_1': 'Individual_1','Acct_ID_2': 'Individual_2','Acct_ID_3': Organization_1} }"      
                                                                                                       
                            },
            },

      ] 

                                       )

[34m
********************************************************************************[0m
[34mStarting a new chat....[0m
[34m
********************************************************************************[0m
[33mSAR_Agent[0m (to Entity_Extraction_Agent):

Investigation case number: A5678910. The customer, a grocery store and  its owner, are suspected of intentionally structuring cash deposits to  circumvent federal reporting requirements.  The customer is also  engaged in activity indicative of an informal value transfer operation: deposits of bulk cash, third party out of state personal checks and money 
orders, and engaging in aggregate wire transfers to Dubai, UAE.  The type and volume of activity observed is non-commensurate with the customer’s expected business volume and deviates from the normal volume of similar types of businesses located in the same area as the customer.  Investigative activities are continuing. Our bank has elected to 
directly contact law enforcemen

In [367]:
for chat_result in chat_results:
    print(chat_result.summary)
    print("\n")

```json
{
    "Entities": {
        "Individuals": ["John Doe"],
        "Organizations": ["Acme, Inc.", "Kulkutta Building Supply Company"],
        "Financial_Institutions": ["Bank of Anan"]
    },
    "Account_IDs": ["12345-6789", "23456-7891", "3489728"],
    "FI_to_Acct": {
        "Bank of Anan": ["3489728"],
        "Unknown Bank": ["12345-6789", "23456-7891"]
    },
    "Acct_to_Cust": {
        "12345-6789": "John Doe",
        "23456-7891": "Acme, Inc.",
        "3489728": "Kulkutta Building Supply Company"
    }
}
```




In [94]:
chat_results = sar_agent.initiate_chats(
      [
        {
            "recipient": entity_extraction_agent,
            "message": message,
            "max_turns": 2,
            "summary_method": "reflection_with_llm",
            "summary_args": {
                "summary_prompt" : "Return the extracted entity information as JSON object only: "
                                "{'Account_IDs': ['Acct_ID_1','Acct_ID_2','Acct_ID_3']}"
                                "{'FIs_to_Accts': { 'Entity1': ['Acct_ID_1'],'Entity2':['Acct_ID_2','Acct_ID_3]} }"
                                "{'Customers_to_Accts': { 'Entity2': ['Acct_ID_1'],'Entity4':['Acct_ID_2'],'Entity5':['Acct_ID_3']} }                                                                      ",
            },
        },
        {
            "recipient": entity_resolution_agent,
            "message": message,
            "max_turns": 2,
            "summary_method": "reflection_with_llm",
            "summary_args": {
                "summary_prompt" : '''Return the resolved entites in the SAR narrative as JSON object only: 
                                {'FI_to_Cust_to_Acct':{<Financial Institution 1>: { <CUST_ID_1>: [<ACCT_1>,<ACCT_2>], <CUST_ID_2>:[<ACCT_3>]},
                                                      <Financial Institution 2>: { <CUST_ID_3>: [<ACCT_4>,<ACCT_5>], <CUST_ID_4>:[<ACCT_6>]} } 
                                                      }
                                '''
                                },
        },
        {
            "recipient": narrative_extraction_agent,
            "message": message,
            "max_turns": 2,
            "summary_method": "reflection_with_llm",
            "summary_args": {
                "summary_prompt" : '''Return the extracted  excerpts from the SAR narrative as JSON object only: 
                                    'Narrative': {<Acct_1>: <Excerpt relevant to Acct_1>,
                                                  <Acct_2>: <Excerpt relevant to Acct_2>,
                                                  <Acct_3>: <Excerpt relevant to Acct_3> } 
                                   '''
                                },
        },

    ]


)

[34m
********************************************************************************[0m
[34mStarting a new chat....[0m
[34m
********************************************************************************[0m
[33mSAR_Agent[0m (to Entity_Extraction_Agent):

Investigation case number: A5678910. The customer, a grocery store and  its owner, are suspected of intentionally structuring cash deposits to  circumvent federal reporting requirements.  The customer is also  engaged in activity indicative of an informal value transfer operation: deposits of bulk cash, third party out of state personal checks and money 
orders, and engaging in aggregate wire transfers to Dubai, UAE.  The type and volume of activity observed is non-commensurate with the customer’s expected business volume and deviates from the normal volume of similar types of businesses located in the same area as the customer.  Investigative activities are continuing. Our bank has elected to 
directly contact law enforcemen

In [95]:
chat_results[0].chat_history

[{'content': "Investigation case number: A5678910. The customer, a grocery store and  its owner, are suspected of intentionally structuring cash deposits to  circumvent federal reporting requirements.  The customer is also  engaged in activity indicative of an informal value transfer operation: deposits of bulk cash, third party out of state personal checks and money \norders, and engaging in aggregate wire transfers to Dubai, UAE.  The type and volume of activity observed is non-commensurate with the customer’s expected business volume and deviates from the normal volume of similar types of businesses located in the same area as the customer.  Investigative activities are continuing. Our bank has elected to \ndirectly contact law enforcement concerning this matter along with filing this SAR. \n\nJohn Doe opened a personal checking account, #12345-6789, in March of 1994.  Doe indicated that he was born in Yemen, presented a Virginia driver's license as identification, and claimed he wa

In [96]:
for chat_result in chat_results:
    print(chat_result.summary)
    print("\n")

```json
{
  "Account_IDs": [
    "12345-6789",
    "23456-7891",
    "3489728"
  ],
  "FIs_to_Accts": {
    "Bank of Anan": [
      "3489728"
    ]
  },
  "Customers_to_Accts": {
    "John Doe": [
      "12345-6789"
    ],
    "Acme, Inc.": [
      "23456-7891"
    ],
    "Kulkutta Building Supply Company": [
      "3489728"
    ]
  }
}
```


```json
{
  "FI_to_Cust_to_Acct": {
    "Bank of Anan": {
      "Kulkutta Building Supply Company": ["3489728"]
    },
    "Other Financial Institutions": {
      "John Doe": ["12345-6789"],
      "Acme, Inc.": ["23456-7891"]
    }
  }
}
```


```json
{
  "Narrative": {
    "12345-6789": "Reviews covering the period between January 2 and March 17, 2003, revealed that 13 deposits (consisting of cash, checks, money orders) totaling approximately $50,000 posted to the personal account. Individual amounts ranged between $1,500 and $9,500 and occurred on consecutive business days in several instances. A number of third-party out-of-state checks and mon

Extract results for consumption by trxn generation model

In [60]:
results0 = chat_results[0].summary.split('```',2)[1]
results0 = results0.strip('```json').strip('```')

In [61]:
print(results0)


{
    "Account IDs": ["12345-6789", "23456-7891", "3489728"],
    "FIs_to_Accts": {
        "Dummy_Bank": ["12345-6789", "23456-7891"],
        "Bank of Anan": ["3489728"]
    },
    "Customers_to_Accts": {
        "John Doe": ["12345-6789"],
        "Acme, Inc.": ["23456-7891"],
        "Kulkutta Building Supply Company": ["3489728"]
    }
}



In [62]:
import json

results0 = json.loads(results0)

In [63]:

#Remove Account IDs
accts = results0.pop('Account IDs')
results0

{'FIs_to_Accts': {'Dummy_Bank': ['12345-6789', '23456-7891'],
  'Bank of Anan': ['3489728']},
 'Customers_to_Accts': {'John Doe': ['12345-6789'],
  'Acme, Inc.': ['23456-7891'],
  'Kulkutta Building Supply Company': ['3489728']}}

In [64]:
accts

['12345-6789', '23456-7891', '3489728']

In [57]:
results1 = chat_results[1].summary.split('```',2)[1]
results1 = results1.strip('```json').strip('```')
results1 = json.loads(results1)
print(results1)

{'FI_to_Cust_to_Acct': {'Dummy_Bank': {'CUST_001': ['12345-6789'], 'CUST_002': ['23456-7891']}, 'Bank of Anan': {'CUST_003': ['3489728']}}}


In [58]:
results2 = chat_results[2].summary.split('```',2)[1]
results2 = results2.strip('```json').strip('```')
results2 = json.loads(results2)
print(results2)

{'Narrative': {'12345-6789': 'John Doe opened a personal checking account, #12345-6789, in March of 1994. Between January 2 and March 17, 2003, 13 deposits (consisting of cash, checks, money orders) totaling approximately $50,000 posted to the personal account. Individual amounts ranged between $1,500 and $9,500 and occurred on consecutive business days in several instances. A number of third-party out of state checks and money orders were also deposited into the account. In nine of 13 instances in which cash deposits were made to both accounts on the same day, the combined deposits of cash exceeded $10,000.', '23456-7891': 'A business checking account, #23456-7891, was opened in January of 1998 for Acme, Inc. A review of deposit activity on the Acme, Inc. account from January 2 to March 17, 2003, revealed 33 deposits (consisting of cash, checks, money orders) totaling approximately $275,000. Individual amounts ranged between $4,446 and $9,729; however, 22 of 33 deposits ranged between

In [118]:
results2["Narrative"].keys()

dict_keys(['12345-6789', '23456-7891', '3489728'])

In [68]:
for acct in accts[:1]:
    narratives = results2['Narrative']
    narratives_1 = {'Narrative': {acct:narratives[acct]}}

In [66]:
narratives_1

{'Narrative': {'3489728': 'Between January 17, 2003, and March 21, 2003, John Doe was the originator of nine wires totaling $225,000. The wire transfers were always conducted at the end of each week in the amount of $25,000. All of the wires were remitted to the Bank of Anan in Dubai, UAE, to benefit Kulkutta Building Supply Company, account #3489728.'}}

In [72]:
combined_dict = {**results0,**results1,**narratives_1}
combined_dict

{'FIs_to_Accts': {'Dummy_Bank': ['12345-6789', '23456-7891'],
  'Bank of Anan': ['3489728']},
 'Customers_to_Accts': {'John Doe': ['12345-6789'],
  'Acme, Inc.': ['23456-7891'],
  'Kulkutta Building Supply Company': ['3489728']},
 'FI_to_Cust_to_Acct': {'Dummy_Bank': {'CUST_001': ['12345-6789'],
   'CUST_002': ['23456-7891']},
  'Bank of Anan': {'CUST_003': ['3489728']}},
 'Narrative': {'12345-6789': 'John Doe opened a personal checking account, #12345-6789, in March of 1994. Between January 2 and March 17, 2003, 13 deposits (consisting of cash, checks, money orders) totaling approximately $50,000 posted to the personal account. Individual amounts ranged between $1,500 and $9,500 and occurred on consecutive business days in several instances. A number of third-party out of state checks and money orders were also deposited into the account. In nine of 13 instances in which cash deposits were made to both accounts on the same day, the combined deposits of cash exceeded $10,000.'}}

Convert back to a JSON formatted string

In [76]:
trxn_extraction_inputs = json.dumps(combined_dict,indent=2)
trxn_extraction_inputs

'{\n  "FIs_to_Accts": {\n    "Dummy_Bank": [\n      "12345-6789",\n      "23456-7891"\n    ],\n    "Bank of Anan": [\n      "3489728"\n    ]\n  },\n  "Customers_to_Accts": {\n    "John Doe": [\n      "12345-6789"\n    ],\n    "Acme, Inc.": [\n      "23456-7891"\n    ],\n    "Kulkutta Building Supply Company": [\n      "3489728"\n    ]\n  },\n  "FI_to_Cust_to_Acct": {\n    "Dummy_Bank": {\n      "CUST_001": [\n        "12345-6789"\n      ],\n      "CUST_002": [\n        "23456-7891"\n      ]\n    },\n    "Bank of Anan": {\n      "CUST_003": [\n        "3489728"\n      ]\n    }\n  },\n  "Narrative": {\n    "12345-6789": "John Doe opened a personal checking account, #12345-6789, in March of 1994. Between January 2 and March 17, 2003, 13 deposits (consisting of cash, checks, money orders) totaling approximately $50,000 posted to the personal account. Individual amounts ranged between $1,500 and $9,500 and occurred on consecutive business days in several instances. A number of third-party o

In [77]:
print(trxn_extraction_inputs)

{
  "FIs_to_Accts": {
    "Dummy_Bank": [
      "12345-6789",
      "23456-7891"
    ],
    "Bank of Anan": [
      "3489728"
    ]
  },
  "Customers_to_Accts": {
    "John Doe": [
      "12345-6789"
    ],
    "Acme, Inc.": [
      "23456-7891"
    ],
    "Kulkutta Building Supply Company": [
      "3489728"
    ]
  },
  "FI_to_Cust_to_Acct": {
    "Dummy_Bank": {
      "CUST_001": [
        "12345-6789"
      ],
      "CUST_002": [
        "23456-7891"
      ]
    },
    "Bank of Anan": {
      "CUST_003": [
        "3489728"
      ]
    }
  },
  "Narrative": {
    "12345-6789": "John Doe opened a personal checking account, #12345-6789, in March of 1994. Between January 2 and March 17, 2003, 13 deposits (consisting of cash, checks, money orders) totaling approximately $50,000 posted to the personal account. Individual amounts ranged between $1,500 and $9,500 and occurred on consecutive business days in several instances. A number of third-party out of state checks and money orders we

Construct the prompt to pass to the trxn generation agent

# Phase 2 - Convert Narratives to Trxns

Agent that synthesizes transactions when there are only a few trxns to synthesize.

In [78]:
trxn_generation_agent = ConversableAgent(
    name="trxn_generation_agent",
    system_message='''
    You are an AI Agent tasked with synthesizing transactions from a narrative you are provided. Your strength is synthesizing a small number of transactions, especially when 
    transaction amounts and dates are explicitly listed.

    You will have the following four pieces of information.

    
    1) A JSON called Narrative with Account IDs as key  and  narratives potentially describing transactions made by that account as the values. </n>
    2) The mapping between Individuals and Organizations referenced in the narrative to Account IDs, given by the dictionary Customers_to_Accts. Note, account IDs starting with the prefix "Dummy_" 
    are placeholders for account IDs that have not been identified in the narrative. </n>
    3) The mapping between Financial Institutions to Account IDs given by the dictionary FIs_to_Accts. If the Financial institution for certain accounts are not specified in the narrative,
      a dummy financial institution "Dummy_Bank" will be used for them.</n>
    4) A mapping between Financial institution, Customer IDs and Accounts owned by those Customer IDs given by the dictionary FI_to_Cust_to_Acct
    
    For the narrative corresponding  to each account ID, do the following:

    Step 1) Identify the number of transactions described in the narrative. Note this in a scratch pad for verification later.

    
    Step 2)For each transaction involving the account ID,  

    a)  Identify the transaction channel or product used. This could be Wire, Cash, Checks or something similar. If this is missing, you can make a reasonable assumption.

    b) Identify the Originator Account ID, the Customer ID corresponding to the account, the Originating_Name which is the Individual Or Organization originating
    the transaction.

    c)  Identify the Beneficiary Account ID, the Customer ID corresponding to the account, the beneficiary_Name which is the Individual Or Organization which is the 
    Beneficiary of the transaction. Note that for cash transactions, if only information on the originator or benficary are available, you can assume the originator and beneficiary are the same.

    d) Identify the date and amount of the transaction. If this is missing, you can make reasonable assumptions.

    e) If this is a Cash or Check or similar transaction, determine the Branch or ATM Location where the transaction was conducted. If this is not specified, indicate it as "Missing"

    Ensure the following are extracted.

    -Originator_Name
    -Originator_Account_ID
    -Originator_Customer_ID
    -Beneficiary_Name
    -Beneficiary_Account_ID
    -Beneficiary_Customer_ID
    -Trxn_Channel
    -Trxn_Date in yyyy-mm-dd format
    -Trxn_Amount
    -Branch or ATM Location

    If any information is missing and can't be reasonably assumed, use an empty string "" to indicate it is missing.

    Step 3) Ensure the number of transactions extracted in Step 2 are the same as that noted in Step 1. if this is not the case, go back to Step 2.

    Step 4) Return the transactions as a JSON file with the key being a Trxn ID and value being the transaction attributes

    An example is given below, demarcated by the delimiter ----..

    ----

    Narrative = {"345723": "John deposited $5000 in Cash into Acct #345723 at the Main Road, NY Branch of Bank of America on Jan 4, 2024.
                  John sends $3000 to Acme Inc's account at Bank of America by Wire on Jan 6, 2024 . John  wrote a check to Jill from Acct #345723 on Jan 8,2024 for $1,000"}

                  Customers_to_Accts = {"John":["345723"],"Jill":["Dummy_001"],"Acme Inc":"98765"}
                  FIs_to_Accts = {"Bank of America":["345723","98765"],"Chase Bank":["Dummy_001"]}
                  FI_to_Cust_to_Acct = {"Bank of America": {"CUST_001": ["345723"], "CUST_002":["98765"]}, "Chase Bank": {"CUST_003":["Dummy_001"]}}

    
    Step 1) Identify the number of transactions described in the narrative. Note this is a scratch pad for verification later.
      There are three trxns described in the narrative. A $5000 deposit, A $3000 transfer and a $3000 transfer.

   Step 2) For each transaction, identify the required attributes.

   For the first transaction, the Beneficiary_Name is John as the money is being deposited into John's account. The Beneficiary_Account_ID is #345723. From Bank_to_Cust_to_Acct, 
   The Beneficiary Customer ID is CUST_001.

   The Originator_Name is John, but this is a cash deposit , so there is no Account ID specified from which the cash originates.
   Given this is a cash deposit and information on the Originator is unavailable, it can be assumed to be the same the Beneficary Information.

   The Trxn Channel is Cash as it is a cash deposit.
   The Trxn Date is  2024-01-04.
   The Trxn Amount is $5,000.
   The Branch was Main Road , NY

   For the second transaction, the Originator_Name is John. Although no account ID is specified, it is a wire transaction following a cash transaction, 
   so it has to come from John's account #345723.The Originator_Account_ID is #345723. The Originator Customer ID is CUST_001 as per Bank_to_Cust_to_Acct.

   The Beneficiary_Name is Acme Inc. The Beneficiary_Account_ID is #98765 as this account belongs to Acme Inc at Bank of America.
   From Bank_to_Cust_to_Acct, The Beneficiary Customer ID is CUST_002.


    The Trxn Channel is Wire.
    The Trxn Date is  2024-01-06.
    The Trxn Amount is $3,000.
    The Branch is Missing as it is a wire transaction.

    For the third transaction, the Originator_Name is John. The Originator_Account_ID is 345723 . The Originator Customer ID is CUST_001 as per Bank_to_Cust_to_Acct.

    The Beneficiary_Name is Jill. The Beneficiary_Account_ID must be #Dummy_001 as the information provided indicated that this is Jill's account at Chase Bank.
    From Bank_to_Cust_to_Acct, The Beneficiary Customer ID is CUST_003.


    The Trxn Channel is Check
    The Trxn Date is  2024-01-08.
    The Trxn Amount is $1,000.
    The Branch is Missing as it is a Check.

    Step 3) Ensure the number of transactions extracted in Step 2 are the same as that noted in Step 1. if this is not the case, go back to Step 2.

    Three transactions have been identified, which is the same as the number of transaction in scratch pad from Step 1)

    Step 4) Return the transactions as a JSON file with the key being a Trxn ID and value being the transaction attributes

    Now the transactions can be combined into a JSON file with a key being a Trxn ID staring from 1.

    Trxns = {"345723": {1 : {"Originator_Name": "John", "Originator_Account_ID": "", "Originator_Customer_ID":"","Beneficiary_Name": "John","Beneficiary_Account_ID":"345723",
              "Beneficiary_Customer_ID":"CUST_001" ,"Trxn_Channel":"Cash", "Trxn_Date":"2024-01-04", "Trxn_Amount":5000, "Branch or ATM Location": "Main Road,NY"
            },
              2: {"Originator_Name": "John", "Originator_Account_ID": "345723", "Originator_Customer_ID":"CUST_001","Beneficiary_Name": "Acme,Inc","Beneficiary_Account_ID":"98765",
              "Beneficiary_Customer_ID":"CUST_002" ,"Trxn_Channel":"Wire", "Trxn_Date":"2024-01-06", "Trxn_Amount":3000, "Branch or ATM Location": ""
            },
            3: {"Originator_Name": "John", "Originator_Account_ID": "345723", "Originator_Customer_ID":"CUST_001","Beneficiary_Name": "Jill","Beneficiary_Account_ID":"Dummy_001",
              "Beneficiary_Customer_ID":"CUST_003" ,"Trxn_Channel":"Check", "Trxn_Date":"2024-01-08", "Trxn_Amount":1000, "Branch or ATM Location": ""

        }
    }

    ----



    ''',
    llm_config=llm_config_mini,
    code_execution_config=False,
    human_input_mode="NEVER",
)



Create a tool to help synthesize transactions

In [81]:
from typing import List, Literal
from typing_extensions import Annotated

import numpy as np
from datetime import datetime, timedelta
import random

Channels_allowed = Literal["Wire","Cash","Check"]
def generate_transactions(
        Originator_Name:Annotated[str, "Entity or Customer originating the transactions"],
        Originator_Account_ID:Annotated[str, "Account  of Entity or Customer originating the transactions"],
        Originator_Customer_ID:Annotated[str, "Customer ID of Entity or Customer originating the transactions"],
        Beneficiary_Name:Annotated[str, "Customer ID of Entity or Customer  receiving the transactions"], 
        Beneficiary_Account_ID:Annotated[str, "Account of Entity or Customer  receiving the transactions"],
        Beneficiary_Customer_ID:Annotated[str, "Customer ID of Entity or Customer receiving the transactions"],
        Trxn_Channel:Annotated[List[Channels_allowed], "Transaction Channels used to make the transactions."],
        Start_Date:Annotated[str, "Date on which the first transaction was made"], 
        End_Date:Annotated[str, "Date on which the last transaction was made"],
        Min_Ind_Trxn_Amt:Annotated[float, "The smallest transaction"],
        Max_Ind_Trxn_Amt:Annotated[float, "The largest transaction"],
        Branch_or_ATM_Location:Annotated[str, "The location where transaction was originated or received"],
        N_transactions:Annotated[int, "The number of transactions made between the Originator and Beneficary"]) -> dict:
    
    Start_Date = datetime.strptime(Start_Date,"%Y-%m-%d")
    End_Date = datetime.strptime(End_Date,"%Y-%m-%d")
    trxns = {} #Dictionary to hold transactions
    trxn_channels = random.choices(Trxn_Channel, k = N_transactions)
    
    sample_deltas  =  random.choices(range((End_Date - Start_Date).days),k = N_transactions) #Get random number of days to be added to get new dates
    trxn_dates = [   Start_Date + timedelta(delta) for delta in sample_deltas] # TO DO: Add start and end date to the list
    #Convert back to string
    trxn_dates = [trxn_date.strftime("%Y-%m-%d") for trxn_date in trxn_dates]
    trxn_amounts = np.round(np.random.uniform(low=Min_Ind_Trxn_Amt,high=Max_Ind_Trxn_Amt,size = N_transactions),2)

    for i in range(N_transactions):
        trxns[f"Trxn_{i+1}"] = {"Originator_Name": Originator_Name , "Originator_Account_ID": Originator_Account_ID,"Originator_Customer_ID": Originator_Customer_ID,
                            "Beneficiary_Name": Beneficiary_Name, "Beneficiary_Account_ID": Beneficiary_Account_ID,"Beneficiary_Customer_ID": Beneficiary_Customer_ID,
                             "Trxn_Channel": trxn_channels[i], "Trxn_Date": trxn_dates[i], "Trxn_Amount":trxn_amounts[i],
                              "Branch_or_ATM_Location": Branch_or_ATM_Location }

    return trxns

In [88]:
from autogen.agentchat.contrib.gpt_assistant_agent import GPTAssistantAgent
from autogen.function_utils import get_function_schema

#Assistant API Tool Schema for Trxn Generation
generate_transactions_schema = get_function_schema(
    generate_transactions,
    name = "generate_transactions",
    description = " A function for generating transactions when a large number of transactions have to be synthesizes"

)

Create an agent that can use this tool.

In [89]:
trxn_generation_agent_gpt = GPTAssistantAgent(
    name = "trxn_generation_agent_gpt",
    instructions = """
    You are an AI Agent tasked with synthesizing transactions from a narrative you are provided. Your strength is synthesizing transactions when a larger number of transactions
    are made between two entities  when a range of trxn amounts and dates are specified. When invoked, use the provided function to generate transactions.

    You will have the following four pieces of information.

    
    1) A JSON called Narrative with Account IDs as key  and  narratives potentially describing transactions made by that account as the values. </n>
    2) The mapping between Individuals and Organizations referenced in the narrative to Account IDs, given by the dictionary Customers_to_Accts. Note thataccount IDs starting with the prefix "Dummy_" 
    are placeholders for account IDs that have not been identified in the narrative. </n>
    3) The mapping between Financial Institutions to Account IDs given by the dictionary FIs_to_Accts. If the Financial institution for certain accounts are not specified in the narrative,
      a dummy financial institution "Dummy_Bank" will be used for them.</n>
    4) A mapping between Financial institution, Customer IDs and Accounts owned by those Customer IDs given by the dictionary FI_to_Cust_to_Acct
    
    For the narrative corresponding  to each account ID, do the following:

    Step 1) Identify the number of transactions described in the narrative. Note this in a scratch pad for verification later.

    
    Step 2) Identify the transaction channels or product used. This could be Wire, Cash, Checks or something similar. If this is missing, you can make a reasonable assumption.

    Step 3) Identify the Beneficiary Account ID, the Customer ID corresponding to the account, the beneficiary_Name which is the Individual Or Organization which is the 
    Beneficiary of the transaction. Note that for cash transactions, if only information on the originator or benficary are available, you can assume the originator and beneficiary are the same.

    Step 4) Identify the Originator Account ID, the Customer ID corresponding to the account, the Originating_Name which is the Individual Or Organization originating
    the transaction.

    Step 5) Identify the first and last dates the transactions. If this is missing, you can make reasonable assumptions.

    Step 6) Identify the minimum and maximum amount of the transactions.

    Step 7) If this is a Cash or Check or similar transaction, determine the Branches or ATM Locations where the transaction was conducted. If this is not specified, indicate it as "Missing"

    Ensure the following are extracted.

    -Originator_Name
    -Originator_Account_ID
    -Originator_Customer_ID
    -Beneficiary_Name
    -Beneficiary_Account_ID
    -Beneficiary_Customer_ID
    -Trxn_Channel or List of Trxn Channels
    -First Trxn_Date and Last Trxn Date in yyyy-mm-dd format
    -Min Trxn_Amount and Max Trxn_Amount
    -Branch or ATM Location or List of such locations

    If any information is missing and can't be reasonably assumed, use an empty string "" to indicate it is missing.

    Step 8) Use the function `generate_transactions` that returns a dictionary with the specified number of transactions and attributes.

    An example is given below, demarcated by the delimiter ----..

    ----

    Narrative = {"345723": "John transferred a total of $100,000 from Acct #345723 at Bank of America  to Jill between Jan 1, 2024 and July 4,2024.
                             There were 10 trns, each transactions was $10,000"}

                  Customers_to_Accts = {"John":["345723"],"Jill":["Dummy_001"],"Acme Inc":"98765"}
                  FIs_to_Accts = {"Bank of America":["345723","98765"],"Chase Bank":["Dummy_001"]}
                  FI_to_Cust_to_Acct = {"Bank of America": {"CUST_001": ["345723"], "CUST_002":["98765"]}, "Chase Bank": {"CUST_003":["Dummy_001"]}}

    
   Step 1) Identify the number of transactions described in the narrative. Note this is a scratch pad for verification later.
      There are three trxns described in the narrative. A $5000 deposit, A $3000 transfer and a $1000 transfer.

   Step 2) Identify the transaction channels or product used.
      The first $5000 deposit is Cash. The second $3000 transfer is a Wire. The third $1,000 tranfer is a check.


   Step 3) The Beneficary of the trxns is Jill.The Beneficary_Name is Jill. From Customers_to_Accts, it is noted that Jill's account is Dummy_001. From FIs_to_Accts, this account is at Chase Bank.
           From FI_to_Cust_to_Acct, the Customer ID corresponding to this account is CUST_003
   

   Step 4) The Originator of the trxns is John. rom the narrative, the Originator name is John and the originating account is 345723. From FIs_to_Accts, this account is at Bank of America.
           From FI_to_Cust_to_Acct, the Customer ID corresponding to this account is CUST_001
           
   Step 5) The first transaction was made on Jan 1,2024 and last transaction was made on July 4, 2024.

   Step 6) The mininmum and maximum transaction amount is $10,000

   Step 7) Since the transactions are Wire transactions, Branch or ATM Location can be considered Missing. All the required attributes have been gathered.

   Step 8) Call the function generatr_transactions with the collected arguments. Return results as JSON only.

    ----


    """,
    overwrite_instructions=True,
    overwrite_tools=True,
    llm_config= {
        "config_list":config_list,
        "tools":[generate_transactions_schema]

    }
)









    You are an AI Agent tasked with synthesizing transactions from a narrative you are provided. Your strength is synthesizing transactions when a larger number of transactions
    are made between two entities across multiple dates. When invoked, use the provided function to generate transactions.

    You will have the following four pieces of information.

    
    1) A JSON called Narrative with Account IDs as key  and  narratives potentially describing transactions made by that account as the values. </n>
    2) The mapping between Individuals and Organizations referenced in the narrative to Account IDs, given by the dictionary Customers_to_Accts. Note thataccount IDs starting with the prefix "Dummy_" 
    are placeholders for account IDs that have not been identified in the narrative. </n>
    3) The mapping between Financial Institutions to Account IDs given by the dictionary FIs_to_Accts. If the Financial institution for certain accounts are not specified in the narrative,
    

In [90]:
trxn_generation_agent_gpt.register_function(
    function_map={
        "generate_transactions":generate_transactions
    }
)

In [86]:
sar_agent = autogen.UserProxyAgent(
    name="sar_agent",
    system_message="Share the SAR extract given so that trxns can be extracted",
    code_execution_config={
        "last_n_messages": 2,
        "work_dir": "temp",
        "use_docker": False,
    },  # Please set use_docker=True if docker is available to run the generated code. Using docker is safer than running the generated code directly.
    human_input_mode="ALWAYS",
)

In [93]:
groupchat = autogen.GroupChat(agents = [sar_agent,trxn_generation_agent, trxn_generation_agent_gpt],messages=[],max_round=2)
manager = autogen.GroupChatManager(groupchat=groupchat, llm_config = llm_config_mini)





In [95]:
chat_results = sar_agent.initiate_chat(
    manager,
    message = trxn_extraction_inputs,
    summary_method="reflection_with_llm",
    summary_args= {
        "summary_prompt": '''
                            Return the synthesized transactions in the following format as a JSON object only.
                                 { <Acct_ID> : {<Trxn_ID> : 
                                                    {"Originator_Name": <Originator_Name>,
                                                     "Originator_Account_ID": <Originator_Account_ID>, 
                                                     "Originator_Customer_ID": <Originator_Customer_ID>,
                                                     "Beneficiary_Name": <Beneficiary_Name>,
                                                     "Beneficiary_Account_ID":<Beneficiary_Account_ID>, 
                                                     "Beneficiary_Customer_ID": <Beneficiary_Customer_ID> ,
                                                     "Trxn_Channel": <Trxn_Channel>, 
                                                     "Trxn_Date":<Trxn_Date>, 
                                                     "Trxn_Amount":<Trxn_Amount>, 
                                                     "Branch or ATM Location": <Branch or ATM Location> }
                                 } 
                             


                          '''
                   }
    )


[33msar_agent[0m (to chat_manager):

{
  "FIs_to_Accts": {
    "Dummy_Bank": [
      "12345-6789",
      "23456-7891"
    ],
    "Bank of Anan": [
      "3489728"
    ]
  },
  "Customers_to_Accts": {
    "John Doe": [
      "12345-6789"
    ],
    "Acme, Inc.": [
      "23456-7891"
    ],
    "Kulkutta Building Supply Company": [
      "3489728"
    ]
  },
  "FI_to_Cust_to_Acct": {
    "Dummy_Bank": {
      "CUST_001": [
        "12345-6789"
      ],
      "CUST_002": [
        "23456-7891"
      ]
    },
    "Bank of Anan": {
      "CUST_003": [
        "3489728"
      ]
    }
  },
  "Narrative": {
    "12345-6789": "John Doe opened a personal checking account, #12345-6789, in March of 1994. Between January 2 and March 17, 2003, 13 deposits (consisting of cash, checks, money orders) totaling approximately $50,000 posted to the personal account. Individual amounts ranged between $1,500 and $9,500 and occurred on consecutive business days in several instances. A number of third-party 



[32m
Next speaker: trxn_generation_agent
[0m
[33mtrxn_generation_agent[0m (to chat_manager):

Step 1) Identify the number of transactions described in the narrative. There are 13 deposits mentioned in the narrative, each of which constitutes a transaction.

Step 2) For each transaction, extract the required attributes.

For the 13 transactions, the specific details are not provided, but we can assume some common characteristics based on the narrative.

- Originator_Name: John Doe (as he opened the account)
- Originator_Account_ID: 12345-6789
- Originator_Customer_ID: CUST_001 (as per the mapping)
- Beneficiary_Name: John Doe (the deposits go into his account)
- Beneficiary_Account_ID: 12345-6789 (same as the originator account)
- Beneficiary_Customer_ID: CUST_001 (same as the originator)
- Trxn_Channel: Cash/Check/Money Order (various types used)
- Trxn_Date: Various dates from January 2 to March 17, 2003. We will assume they happened on business days, but for simplicity, we can us

In [96]:
chat_results.summary

'```json\n{\n    "12345-6789": {\n        1: {\n            "Originator_Name": "John Doe",\n            "Originator_Account_ID": "12345-6789",\n            "Originator_Customer_ID": "CUST_001",\n            "Beneficiary_Name": "John Doe",\n            "Beneficiary_Account_ID": "12345-6789",\n            "Beneficiary_Customer_ID": "CUST_001",\n            "Trxn_Channel": "Cash",\n            "Trxn_Date": "2003-01-02",\n            "Trxn_Amount": 1500,\n            "Branch or ATM Location": ""\n        },\n        2: {\n            "Originator_Name": "John Doe",\n            "Originator_Account_ID": "12345-6789",\n            "Originator_Customer_ID": "CUST_001",\n            "Beneficiary_Name": "John Doe",\n            "Beneficiary_Account_ID": "12345-6789",\n            "Beneficiary_Customer_ID": "CUST_001",\n            "Trxn_Channel": "Check",\n            "Trxn_Date": "2003-01-03",\n            "Trxn_Amount": 2000,\n            "Branch or ATM Location": ""\n        },\n        3: {\

In [97]:
cleaned_trxns = chat_results.summary.split('```',2)[1]
trxns = cleaned_trxns.strip('```json').strip('```')

In [102]:
print(trxns)


{
    "12345-6789": {
        1: {
            "Originator_Name": "John Doe",
            "Originator_Account_ID": "12345-6789",
            "Originator_Customer_ID": "CUST_001",
            "Beneficiary_Name": "John Doe",
            "Beneficiary_Account_ID": "12345-6789",
            "Beneficiary_Customer_ID": "CUST_001",
            "Trxn_Channel": "Cash",
            "Trxn_Date": "2003-01-02",
            "Trxn_Amount": 1500,
            "Branch or ATM Location": ""
        },
        2: {
            "Originator_Name": "John Doe",
            "Originator_Account_ID": "12345-6789",
            "Originator_Customer_ID": "CUST_001",
            "Beneficiary_Name": "John Doe",
            "Beneficiary_Account_ID": "12345-6789",
            "Beneficiary_Customer_ID": "CUST_001",
            "Trxn_Channel": "Check",
            "Trxn_Date": "2003-01-03",
            "Trxn_Amount": 2000,
            "Branch or ATM Location": ""
        },
        3: {
            "Originator_Name": "Joh

In [109]:
import ast
trxns_dict = ast.literal_eval(trxns)

In [110]:
trxns_dict

{'12345-6789': {1: {'Originator_Name': 'John Doe',
   'Originator_Account_ID': '12345-6789',
   'Originator_Customer_ID': 'CUST_001',
   'Beneficiary_Name': 'John Doe',
   'Beneficiary_Account_ID': '12345-6789',
   'Beneficiary_Customer_ID': 'CUST_001',
   'Trxn_Channel': 'Cash',
   'Trxn_Date': '2003-01-02',
   'Trxn_Amount': 1500,
   'Branch or ATM Location': ''},
  2: {'Originator_Name': 'John Doe',
   'Originator_Account_ID': '12345-6789',
   'Originator_Customer_ID': 'CUST_001',
   'Beneficiary_Name': 'John Doe',
   'Beneficiary_Account_ID': '12345-6789',
   'Beneficiary_Customer_ID': 'CUST_001',
   'Trxn_Channel': 'Check',
   'Trxn_Date': '2003-01-03',
   'Trxn_Amount': 2000,
   'Branch or ATM Location': ''},
  3: {'Originator_Name': 'John Doe',
   'Originator_Account_ID': '12345-6789',
   'Originator_Customer_ID': 'CUST_001',
   'Beneficiary_Name': 'John Doe',
   'Beneficiary_Account_ID': '12345-6789',
   'Beneficiary_Customer_ID': 'CUST_001',
   'Trxn_Channel': 'Money Order',
 

In [111]:
trxns_dict['12345-6789']

{1: {'Originator_Name': 'John Doe',
  'Originator_Account_ID': '12345-6789',
  'Originator_Customer_ID': 'CUST_001',
  'Beneficiary_Name': 'John Doe',
  'Beneficiary_Account_ID': '12345-6789',
  'Beneficiary_Customer_ID': 'CUST_001',
  'Trxn_Channel': 'Cash',
  'Trxn_Date': '2003-01-02',
  'Trxn_Amount': 1500,
  'Branch or ATM Location': ''},
 2: {'Originator_Name': 'John Doe',
  'Originator_Account_ID': '12345-6789',
  'Originator_Customer_ID': 'CUST_001',
  'Beneficiary_Name': 'John Doe',
  'Beneficiary_Account_ID': '12345-6789',
  'Beneficiary_Customer_ID': 'CUST_001',
  'Trxn_Channel': 'Check',
  'Trxn_Date': '2003-01-03',
  'Trxn_Amount': 2000,
  'Branch or ATM Location': ''},
 3: {'Originator_Name': 'John Doe',
  'Originator_Account_ID': '12345-6789',
  'Originator_Customer_ID': 'CUST_001',
  'Beneficiary_Name': 'John Doe',
  'Beneficiary_Account_ID': '12345-6789',
  'Beneficiary_Customer_ID': 'CUST_001',
  'Trxn_Channel': 'Money Order',
  'Trxn_Date': '2003-01-04',
  'Trxn_Amoun

In [113]:
import pandas as pd
# Flatten the dictionary into a list of rows
flattened_data = []

for key, inner_dict in trxns_dict.items():
    for transaction_id, transaction_details in inner_dict.items():
        # Add the transaction ID and account ID to the details
        transaction_details['Transaction_ID'] = transaction_id
        transaction_details['Account_ID'] = key
        flattened_data.append(transaction_details)

# Convert the flattened data to a DataFrame
df = pd.DataFrame(flattened_data)

In [116]:
# Reorder the columns to make Transaction_ID and Account_ID the first two columns
column_order = ['Transaction_ID', 'Account_ID'] + [col for col in df.columns if col not in ['Transaction_ID', 'Account_ID']]
df = df[column_order]

In [117]:
df

Unnamed: 0,Transaction_ID,Account_ID,Originator_Name,Originator_Account_ID,Originator_Customer_ID,Beneficiary_Name,Beneficiary_Account_ID,Beneficiary_Customer_ID,Trxn_Channel,Trxn_Date,Trxn_Amount,Branch or ATM Location
0,1,12345-6789,John Doe,12345-6789,CUST_001,John Doe,12345-6789,CUST_001,Cash,2003-01-02,1500,
1,2,12345-6789,John Doe,12345-6789,CUST_001,John Doe,12345-6789,CUST_001,Check,2003-01-03,2000,
2,3,12345-6789,John Doe,12345-6789,CUST_001,John Doe,12345-6789,CUST_001,Money Order,2003-01-04,3000,
3,4,12345-6789,John Doe,12345-6789,CUST_001,John Doe,12345-6789,CUST_001,Cash,2003-01-05,4000,
4,5,12345-6789,John Doe,12345-6789,CUST_001,John Doe,12345-6789,CUST_001,Check,2003-01-06,5000,
5,6,12345-6789,John Doe,12345-6789,CUST_001,John Doe,12345-6789,CUST_001,Money Order,2003-01-07,6000,
6,7,12345-6789,John Doe,12345-6789,CUST_001,John Doe,12345-6789,CUST_001,Cash,2003-01-08,7000,
7,8,12345-6789,John Doe,12345-6789,CUST_001,John Doe,12345-6789,CUST_001,Check,2003-01-09,8000,
8,9,12345-6789,John Doe,12345-6789,CUST_001,John Doe,12345-6789,CUST_001,Money Order,2003-01-10,9000,
9,10,12345-6789,John Doe,12345-6789,CUST_001,John Doe,12345-6789,CUST_001,Cash,2003-01-11,8500,
